diff --git a/src/docbkx/book.xml b/src/docbkx/book.xml
index 056520122e0..33c76815dae 100644
--- a/src/docbkx/book.xml
+++ b/src/docbkx/book.xml
@@ -83,16 +83,25 @@
- * This method is used by {@link KeyOnlyFilter} and is an advanced feature of
+ * This method is used by The above may not work if you are running your HBase from its build directory;
diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
index 1cd7f62ac5b..bc745f6b1c0 100644
--- a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
+++ b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
@@ -2103,6 +2103,7 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
list.add(e.getValue().getRegionInfo());
}
}
+ Collections.sort(list);
return list;
}
diff --git a/src/main/java/org/apache/hadoop/hbase/replication/regionserver/Replication.java b/src/main/java/org/apache/hadoop/hbase/replication/regionserver/Replication.java
index 00beab5c818..104d425aa21 100644
--- a/src/main/java/org/apache/hadoop/hbase/replication/regionserver/Replication.java
+++ b/src/main/java/org/apache/hadoop/hbase/replication/regionserver/Replication.java
@@ -40,7 +40,7 @@ import org.apache.hadoop.hbase.util.Bytes;
import org.apache.zookeeper.KeeperException;
/**
- * Gateway to Replication. Used by {@link HRegionServer}.
+ * Gateway to Replication. Used by {@link org.apache.hadoop.hbase.regionserver.HRegionServer}.
*/
public class Replication implements WALObserver {
private final boolean replication;
@@ -159,4 +159,4 @@ public class Replication implements WALObserver {
public void logCloseRequested() {
// not interested
}
-}
\ No newline at end of file
+}
diff --git a/src/main/java/org/apache/hadoop/hbase/security/User.java b/src/main/java/org/apache/hadoop/hbase/security/User.java
index ef902eff629..4b5e9e80cda 100644
--- a/src/main/java/org/apache/hadoop/hbase/security/User.java
+++ b/src/main/java/org/apache/hadoop/hbase/security/User.java
@@ -74,7 +74,7 @@ public abstract class User {
/**
* Returns the shortened version of the user name -- the portion that maps
* to an operating system user name.
- * @return
+ * @return Short name
*/
public abstract String getShortName();
diff --git a/src/main/javadoc/overview.html b/src/main/javadoc/overview.html
index 2cb4b3e568b..e79d715f1ce 100644
--- a/src/main/javadoc/overview.html
+++ b/src/main/javadoc/overview.html
@@ -26,352 +26,30 @@
First review the requirements
-section of the HBase Book. A careful reading will save you grief down the road. What follows presumes you have obtained a copy of HBase,
-see Releases, and are installing
-for the first time. If upgrading your HBase instance, see Upgrading. Three modes are described: standalone, pseudo-distributed (where all servers are run on
-a single host), and fully-distributed. If new to HBase start by following the standalone instructions. Begin by reading Requirements. Whatever your mode, define If you are running a standalone operation, there should be nothing further to configure; proceed to
-Running and Confirming Your Installation. If you are running a distributed
-operation, continue reading. Distributed modes require an instance of the Hadoop Distributed File System (DFS).
-See the Hadoop
-requirements and instructions for how to set up a DFS. A pseudo-distributed mode is simply a distributed mode run on a single host.
-Use this configuration testing and prototyping on hbase. Do not use this configuration
-for production nor for evaluating HBase performance.
+ See the Getting Started
+section of the HBase Book.
Once you have confirmed your DFS setup, configuring HBase for use on one host requires modification of
- Note: Let HBase create the directory. If you don't, you'll get warning saying HBase
-needs a migration run because the directory is missing files expected by HBase (it'll
-create them if you let it). Also Note: Above we bind to localhost. This means that a remote client cannot
-connect. Amend accordingly, if you want to connect from a remote location. For running a fully-distributed operation on more than one host, the following
-configurations must be made in addition to those described in the
-pseudo-distributed operation section above. In In fully-distributed mode, you probably want to change your In addition
-to A distributed HBase depends on a running ZooKeeper cluster. All participating nodes and clients
-need to be able to get to the running ZooKeeper cluster.
-HBase by default manages a ZooKeeper cluster for you, or you can manage it on your own and point HBase to it.
-To toggle HBase management of ZooKeeper, use the When HBase manages the ZooKeeper cluster, you can specify ZooKeeper configuration
-using its canonical At minimum, you should set the list of servers that you want ZooKeeper to run
-on using the To point HBase at an existing ZooKeeper cluster, add
-a suitably configured As an example, to have HBase manage a ZooKeeper quorum on nodes
-rs{1,2,3,4,5}.example.com, bound to port 2222 (the default is 2181), use: When HBase manages ZooKeeper, it will start/stop the ZooKeeper servers as a part
-of the regular start/stop scripts. If you would like to run it yourself, you can
-do: If you do let HBase manage ZooKeeper for you, make sure you configure
-where it's data is stored. By default, it will be stored in /tmp which is
-sometimes cleaned in live systems. Do modify this configuration: Note that you can use HBase in this manner to spin up a ZooKeeper cluster,
-unrelated to HBase. Just make sure to set For more information about setting up a ZooKeeper cluster on your own, see
-the ZooKeeper Getting Started Guide.
-HBase currently uses ZooKeeper version 3.3.1, so any cluster setup with a
-3.x.x version of ZooKeeper should work. Of note, if you have made HDFS client configuration on your Hadoop cluster, HBase will not
-see this configuration unless you do one of the following: An example of such an HDFS client configuration is If you are running in standalone, non-distributed mode, HBase by default uses the local filesystem. If you are running a distributed cluster you will need to start the Hadoop DFS daemons and
-ZooKeeper Quorum before starting HBase and stop the daemons after HBase has shut down. Start and stop the Hadoop DFS daemons by running Start up your ZooKeeper cluster. Start HBase with the following command: Once HBase has started, enter To stop HBase, exit the HBase shell and enter: If you are running a distributed operation, be sure to wait until HBase has shut down completely
-before stopping the Hadoop daemons. The default location for logs is HBase also puts up a UI listing vital attributes. By default its deployed on the master host
-at port 60010 (HBase RegionServers listen on port 60020 by default and put up an informational
-http server at 60030). After installing a new HBase on top of data written by a previous HBase version, before
-starting your cluster, run the For sample Java code, see org.apache.hadoop.hbase.client documentation. If your client is NOT Java, consider the Thrift or REST libraries. See the FAQ that is up on the wiki, HBase Wiki FAQ
- as well as the Troubleshooting page and
- the Frequently Seen Errors page. See Andrew's answer here, up on the user list: Remote Java client connection into EC2 instancesync
.
Currently only the branch-0.20-append
branch has this attribute. No official releases have been made from this branch as of this writing
so you will have to build your own Hadoop from the tip of this branch
@@ -297,6 +308,7 @@ Usually you'll want to use the latest version available except the problematic u
${HBASE_HOME}
to be the location of the root of your HBase installation, e.g.
+/user/local/hbase
. Edit ${HBASE_HOME}/conf/hbase-env.sh
. In this file you can
+set the heapsize for HBase, etc. At a minimum, set JAVA_HOME
to point at the root of
+your Java installation.KeyOnlyFilter
and is an advanced feature of
* KeyValue, proceed with caution.
*/
public void convertToKeyOnly() {
diff --git a/src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java b/src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java
index eb907ddc2d6..affb94070ba 100644
--- a/src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java
+++ b/src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java
@@ -62,7 +62,7 @@ is set to the HBase CLASSPATH
via backticking the command
etc., dependencies on the passed
HADOOP_CLASSPATH and adds the found jars to the mapreduce
job configuration. See the source at
-{@link TableMapReduceUtil#addDependencyJars(org.apache.hadoop.mapreduce.Job)}
+TableMapReduceUtil#addDependencyJars(org.apache.hadoop.mapreduce.Job)
for how this is done.
Table of Contents
-
-
-Getting Started
-${HBASE_HOME}
to be the location of the root of your HBase installation, e.g.
-/user/local/hbase
. Edit ${HBASE_HOME}/conf/hbase-env.sh
. In this file you can
-set the heapsize for HBase, etc. At a minimum, set JAVA_HOME
to point at the root of
-your Java installation.Standalone mode
-Distributed Operation: Pseudo- and Fully-distributed modes
-Pseudo-distributed mode
-${HBASE_HOME}/conf/hbase-site.xml
, which needs to be pointed at the running Hadoop DFS instance.
-Use hbase-site.xml
to override the properties defined in
-${HBASE_HOME}/conf/hbase-default.xml
(hbase-default.xml
itself
-should never be modified) and for HDFS client configurations.
-At a minimum, the hbase.rootdir
,
-which points HBase at the Hadoop filesystem to use,
-and the dfs.replication
, an hdfs client-side
-configuration stipulating how many replicas to keep up,
-should be redefined in hbase-site.xml
. For example,
-adding the properties below to your hbase-site.xml
says that HBase
-should use the /hbase
-directory in the HDFS whose namenode is at port 9000 on your local machine, and that
-it should run with one replica only (recommended for pseudo-distributed mode):
-
-
-
-<configuration>
- ...
- <property>
- <name>hbase.rootdir</name>
- <value>hdfs://localhost:9000/hbase</value>
- <description>The directory shared by region servers.
- </description>
- </property>
- <property>
- <name>dfs.replication</name>
- <value>1</value>
- <description>The replication count for HLog & HFile storage. Should not be greater than HDFS datanode count.
- </description>
- </property>
- ...
-</configuration>
-
-Fully-Distributed Operation
-hbase-site.xml
, set hbase.cluster.distributed
to true
.
-
-
-
-<configuration>
- ...
- <property>
- <name>hbase.cluster.distributed</name>
- <value>true</value>
- <description>The mode the cluster will be in. Possible values are
- false: standalone and pseudo-distributed setups with managed Zookeeper
- true: fully-distributed with unmanaged Zookeeper Quorum (see hbase-env.sh)
- </description>
- </property>
- ...
-</configuration>
-
-hbase.rootdir
-from localhost to the name of the node running the HDFS NameNode and you should set
-the dfs.replication to be the number of datanodes you have in your cluster or 3, which
-ever is the smaller.
-hbase-site.xml
changes, a fully-distributed mode requires that you
-modify ${HBASE_HOME}/conf/regionservers
.
-The regionserver
file lists all hosts running HRegionServer
s, one host per line
-(This file in HBase is like the Hadoop slaves file at ${HADOOP_HOME}/conf/slaves
).HBASE_MANAGES_ZK
variable in ${HBASE_HOME}/conf/hbase-env.sh
.
-This variable, which defaults to true
, tells HBase whether to
-start/stop the ZooKeeper quorum servers alongside the rest of the servers.zoo.cfg
file (see below), or
-just specify ZookKeeper options directly in the ${HBASE_HOME}/conf/hbase-site.xml
-(If new to ZooKeeper, go the path of specifying your configuration in HBase's hbase-site.xml).
-Every ZooKeeper configuration option has a corresponding property in the HBase hbase-site.xml
-XML configuration file named hbase.zookeeper.property.OPTION
.
-For example, the clientPort
setting in ZooKeeper can be changed by
-setting the hbase.zookeeper.property.clientPort
property.
-For the full list of available properties, see ZooKeeper's zoo.cfg
.
-For the default values used by HBase, see ${HBASE_HOME}/conf/hbase-default.xml
.hbase.zookeeper.quorum
property.
-This property defaults to localhost
which is not suitable for a
-fully distributed HBase (it binds to the local machine only and remote clients
-will not be able to connect).
-It is recommended to run a ZooKeeper quorum of 3, 5 or 7 machines, and give each
-ZooKeeper server around 1GB of RAM, and if possible, its own dedicated disk.
-For very heavily loaded clusters, run ZooKeeper servers on separate machines from the
-Region Servers (DataNodes and TaskTrackers).zoo.cfg
to the CLASSPATH
.
-HBase will see this file and use it to figure out where ZooKeeper is.
-Additionally set HBASE_MANAGES_ZK
in ${HBASE_HOME}/conf/hbase-env.sh
-to false
so that HBase doesn't mess with your ZooKeeper setup:
-
-
-
- ...
- # Tell HBase whether it should manage it's own instance of Zookeeper or not.
- export HBASE_MANAGES_ZK=false
-
-
-
-
-
- ${HBASE_HOME}/conf/hbase-env.sh:
-
- ...
- # Tell HBase whether it should manage it's own instance of Zookeeper or not.
- export HBASE_MANAGES_ZK=true
-
- ${HBASE_HOME}/conf/hbase-site.xml:
-
- <configuration>
- ...
- <property>
- <name>hbase.zookeeper.property.clientPort</name>
- <value>2222</value>
- <description>Property from ZooKeeper's config zoo.cfg.
- The port at which the clients will connect.
- </description>
- </property>
- ...
- <property>
- <name>hbase.zookeeper.quorum</name>
- <value>rs1.example.com,rs2.example.com,rs3.example.com,rs4.example.com,rs5.example.com</value>
- <description>Comma separated list of servers in the ZooKeeper Quorum.
- For example, "host1.mydomain.com,host2.mydomain.com,host3.mydomain.com".
- By default this is set to localhost for local and pseudo-distributed modes
- of operation. For a fully-distributed setup, this should be set to a full
- list of ZooKeeper quorum servers. If HBASE_MANAGES_ZK is set in hbase-env.sh
- this is the list of servers which we will start/stop ZooKeeper on.
- </description>
- </property>
- ...
- </configuration>
-
-
-
-
-${HBASE_HOME}/bin/hbase-daemons.sh {start,stop} zookeeper
-
- <property>
- <name>hbase.zookeeper.property.dataDir</name>
- <value>${hbase.tmp.dir}/zookeeper</value>
- <description>Property from ZooKeeper's config zoo.cfg.
- The directory where the snapshot is stored.
- </description>
- </property>
-
-
-
-HBASE_MANAGES_ZK
to
-false
if you want it to stay up so that when HBase shuts down it
-doesn't take ZooKeeper with it.
-
-
-HADOOP_CONF_DIR
to CLASSPATH
in hbase-env.sh
.hdfs-site.xml
(or hadoop-site.xml
) to ${HBASE_HOME}/conf
, orhbase-site.xml
.dfs.replication
. If for example,
-you want to run with a replication factor of 5, hbase will create files with the default of 3 unless
-you do the above to make the configuration available to HBase.Running and Confirming Your Installation
-${HADOOP_HOME}/bin/start-dfs.sh
.
-You can ensure it started properly by testing the put and get of files into the Hadoop filesystem.
-HBase does not normally use the mapreduce daemons. These do not need to be started.
-
-
-${HBASE_HOME}/bin/start-hbase.sh
-${HBASE_HOME}/bin/hbase shell
to obtain a
-shell against HBase from which you can execute commands.
-Type 'help' at the shells' prompt to get a list of commands.
-Test your running install by creating tables, inserting content, viewing content, and then dropping your tables.
-For example:
-
-
-
-hbase> # Type "help" to see shell help screen
-hbase> help
-hbase> # To create a table named "mylittletable" with a column family of "mylittlecolumnfamily", type
-hbase> create "mylittletable", "mylittlecolumnfamily"
-hbase> # To see the schema for you just created "mylittletable" table and its single "mylittlecolumnfamily", type
-hbase> describe "mylittletable"
-hbase> # To add a row whose id is "myrow", to the column "mylittlecolumnfamily:x" with a value of 'v', do
-hbase> put "mylittletable", "myrow", "mylittlecolumnfamily:x", "v"
-hbase> # To get the cell just added, do
-hbase> get "mylittletable", "myrow"
-hbase> # To scan you new table, do
-hbase> scan "mylittletable"
-
-
-
-
-${HBASE_HOME}/bin/stop-hbase.sh
-${HBASE_HOME}/logs
.Upgrading
-${HBASE_DIR}/bin/hbase migrate
migration script.
-It will make any adjustments to the filesystem data under hbase.rootdir
necessary to run
-the HBase version. It does not change your install unless you explicitly ask it to.Example API Usage
Windows
-If you are running HBase on Windows, you must install
-Cygwin
-to have a *nix-like environment for the shell scripts. The full details
-are explained in
-the Windows Installation
-guide.
-
Related Documentation
-
diff --git a/src/site/fml/faq.fml b/src/site/fml/faq.fml
deleted file mode 100644
index 727d9f5aac9..00000000000
--- a/src/site/fml/faq.fml
+++ /dev/null
@@ -1,36 +0,0 @@
-
-
-
June 30th, HBase Contributor Workshop (Day after Hadoop Summit)
+May 10th, 2010: HBase graduates from Hadoop sub-project to Apache Top Level Project
Signup for HBase User Group Meeting, HUG10 hosted by Trend Micro, April 19th, 2010
HBase User Group Meeting, HUG9 hosted by Mozilla, March 10th, 2010