From 363d81c247072698e77dec6e2bb166c18d41be8b Mon Sep 17 00:00:00 2001
From: Michael Stack <stack@apache.org>
Date: Tue, 16 Jun 2009 05:08:06 +0000
Subject: [PATCH] HBASE-1447  Take last version of the hbase-1249 design doc.
 and make documentation out of it

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@785081 13f79535-47bb-0310-9956-ffa450edef68
---
 CHANGES.txt                                   |   2 +
 .../hadoop/hbase/client/package-info.java     | 144 +++++++++---------
 .../hbase/mapreduce/TableInputFormatBase.java |   4 +-
 .../hbase/regionserver/GetDeleteTracker.java  |   1 -
 .../MinorCompactingStoreScanner.java          |   2 +-
 src/java/overview.html                        |  16 +-
 .../hbase/regionserver/TestScanner.java       |   3 -
 7 files changed, 87 insertions(+), 85 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index be4364d20ce..ec6279b9f31 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -193,6 +193,8 @@ Release 0.20.0 - Unreleased
    HBASE-1529  familyMap not invalidated when a Result is (re)read as a
                Writable
    HBASE-1528  Ensure scanners work across memcache snapshot
+   HBASE-1447  Take last version of the hbase-1249 design doc. and make
+               documentation out of it
 
   IMPROVEMENTS
    HBASE-1089  Add count of regions on filesystem to master UI; add percentage
diff --git a/src/java/org/apache/hadoop/hbase/client/package-info.java b/src/java/org/apache/hadoop/hbase/client/package-info.java
index 0ad66d94ef5..5cc375e17d0 100644
--- a/src/java/org/apache/hadoop/hbase/client/package-info.java
+++ b/src/java/org/apache/hadoop/hbase/client/package-info.java
@@ -35,95 +35,93 @@ Provides HBase Client
 
 <div style="background-color: #cccccc; padding: 2px">
 <blockquote><pre>
-REPLACE!!!!!!!!
 import java.io.IOException;
+
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.HTable;
-import org.apache.hadoop.hbase.client.Scanner;
-import org.apache.hadoop.hbase.io.BatchUpdate;
-import org.apache.hadoop.hbase.io.Cell;
-import org.apache.hadoop.hbase.io.RowResult;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.util.Bytes;
 
-public class MyClient {
 
-  public static void main(String args[]) throws IOException {
+// Class that has nothing but a main.
+// Does a Put, Get and a Scan against an hbase table.
+public class MyLittleHBaseClient {
+  public static void main(String[] args) throws IOException {
     // You need a configuration object to tell the client where to connect.
-    // But don't worry, the defaults are pulled from the local config file.
+    // When you create a HBaseConfiguration, it reads in whatever you've set
+    // into your hbase-site.xml and in hbase-default.xml, as long as these can
+    // be found on the CLASSPATH
     HBaseConfiguration config = new HBaseConfiguration();
 
-    // This instantiates an HTable object that connects you to the "myTable"
-    // table. 
-    HTable table = new HTable(config, "myTable");
+    // This instantiates an HTable object that connects you to
+    // the "myLittleHBaseTable" table. 
+    HTable table = new HTable(config, "myLittleHBaseTable");
 
-    // To do any sort of update on a row, you use an instance of the BatchUpdate
-    // class. A BatchUpdate takes a row and optionally a timestamp which your
-    // updates will affect.  If no timestamp, the server applies current time
-    // to the edits.
-    BatchUpdate batchUpdate = new BatchUpdate("myRow");
+    // To add to a row, use Put.  A Put constructor takes the name of the row
+    // you want to insert into as a byte array.  In HBase, the Bytes class has
+    // utility for converting all kinds of java types to byte arrays.  In the
+    // below, we are converting the String "myLittleRow" into a byte array to
+    // use as a row key for our update. Once you have a Put instance, you can
+    // adorn it by setting the names of columns you want to update on the row,
+    // the timestamp to use in your update, etc.If no timestamp, the server
+    // applies current time to the edits.
+    Put p = new Put(Bytes.toBytes("myLittleRow"));
 
-    // The BatchUpdate#put method takes a byte [] (or String) that designates
-    // what cell you want to put a value into, and a byte array that is the
-    // value you want to store. Note that if you want to store Strings, you
-    // have to getBytes() from the String for HBase to store it since HBase is
-    // all about byte arrays. The same goes for primitives like ints and longs
-    // and user-defined classes - you must find a way to reduce it to bytes.
-    // The Bytes class from the hbase util package has utility for going from
-    // String to utf-8 bytes and back again and help for other base types.
-    batchUpdate.put("myColumnFamily:columnQualifier1", 
-      Bytes.toBytes("columnQualifier1 value!"));
+    // To set the value you'd like to update in the row 'myRow', specify the
+    // column family, column qualifier, and value of the table cell you'd like
+    // to update.  The column family must already exist in your table schema.
+    // The qualifier can be anything.  All must be specified as byte arrays as
+    // hbase is all about byte arrays.  Lets pretend the table
+    // 'myLittleHBaseTable' was created with a family 'myLittleFamily'.
+    p.add(Bytes.toBytes("myLittleFamily"), Bytes.toBytes("someQualifier"),
+      Bytes.toBytes("Some Value"));
 
-    // Deletes are batch operations in HBase as well. 
-    batchUpdate.delete("myColumnFamily:cellIWantDeleted");
-
-    // Once you've done all the puts you want, you need to commit the results.
-    // The HTable#commit method takes the BatchUpdate instance you've been 
-    // building and pushes the batch of changes you made into HBase.
-    table.commit(batchUpdate);
+    // Once you've adorned your Put instance with all the updates you want to
+    // make, to commit it do the following (The HTable#put method takes the
+    // Put instance you've been building and pushes the changes you made into
+    // hbase)
+    table.put(p);
 
     // Now, to retrieve the data we just wrote. The values that come back are
-    // Cell instances. A Cell is a combination of the value as a byte array and
-    // the timestamp the value was stored with. If you happen to know that the 
-    // value contained is a string and want an actual string, then you must 
-    // convert it yourself.
-    Cell cell = table.get("myRow", "myColumnFamily:columnQualifier1");
-    // This could throw a NullPointerException if there was no value at the cell
-    // location.
-    String valueStr = Bytes.toString(cell.getValue());
-    
+    // Result instances. Generally, a Result is an object that will package up
+    // the hbase return into the form you find most palatable.
+    Get g = new Get(Bytes.toBytes("myLittleRow"));
+    Result r = table.get(g);
+    byte [] value = r.getValue(Bytes.toBytes("myLittleFamily"),
+      Bytes.toBytes("someQualifier"));
+    // If we convert the value bytes, we should get back 'Some Value', the
+    // value we inserted at this location.
+    String valueStr = Bytes.toString(value);
+    System.out.println("GET: " + valueStr);
+
     // Sometimes, you won't know the row you're looking for. In this case, you
     // use a Scanner. This will give you cursor-like interface to the contents
-    // of the table.
-    Scanner scanner = 
-      // we want to get back only "myColumnFamily:columnQualifier1" when we iterate
-      table.getScanner(new String[]{"myColumnFamily:columnQualifier1"});
-    
-    
-    // Scanners return RowResult instances. A RowResult is like the
-    // row key and the columns all wrapped up in a single Object. 
-    // RowResult#getRow gives you the row key. RowResult also implements 
-    // Map, so you can get to your column results easily. 
-    
-    // Now, for the actual iteration. One way is to use a while loop like so:
-    RowResult rowResult = scanner.next();
-    
-    while (rowResult != null) {
-      // print out the row we found and the columns we were looking for
-      System.out.println("Found row: " + Bytes.toString(rowResult.getRow()) +
-        " with value: " + rowResult.get(Bytes.toBytes("myColumnFamily:columnQualifier1")));
-      rowResult = scanner.next();
+    // of the table.  To set up a Scanner, do like you did above making a Put
+    // and a Get, create a Scan.  Adorn it with column names, etc.
+    Scan s = new Scan();
+    s.addColumn(Bytes.toBytes("myLittleFamily"), Bytes.toBytes("someQualifier"));
+    ResultScanner scanner = table.getScanner(s);
+    try {
+      // Scanners return Result instances.
+      // Now, for the actual iteration. One way is to use a while loop like so:
+      for (Result rr = scanner.next(); rr != null; rr = scanner.next()) {
+        // print out the row we found and the columns we were looking for
+        System.out.println("Found row: " + rr);
+      }
+
+      // The other approach is to use a foreach loop. Scanners are iterable!
+      // for (Result rr : scanner) {
+      //   System.out.println("Found row: " + rr);
+      // }
+    } finally {
+      // Make sure you close your scanners when you are done!
+      // Thats why we have it inside a try/finally clause
+      scanner.close();
     }
-    
-    // The other approach is to use a foreach loop. Scanners are iterable!
-    for (RowResult result : scanner) {
-      // print out the row we found and the columns we were looking for
-      System.out.println("Found row: " + Bytes.toString(rowResult.getRow()) +
-        " with value: " + rowResult.get(Bytes.toBytes("myColumnFamily:columnQualifier1")));
-    }
-    
-    // Make sure you close your scanners when you are done!
-    // Its probably best to put the iteration into a try/finally with the below
-    // inside the finally clause.
-    scanner.close();
   }
 }
 </pre></blockquote>
diff --git a/src/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java b/src/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
index b0eb92e47a7..dcd86028314 100644
--- a/src/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
+++ b/src/java/org/apache/hadoop/hbase/mapreduce/TableInputFormatBase.java
@@ -142,7 +142,7 @@ implements InputFormat<ImmutableBytesWritable, Result> {
     }
 
     /**
-     * @param inputColumns the columns to be placed in {@link RowResult}.
+     * @param inputColumns the columns to be placed in {@link Result}.
      */
     public void setInputColumns(final byte [][] inputColumns) {
       this.trrInputColumns = inputColumns;
@@ -304,7 +304,7 @@ implements InputFormat<ImmutableBytesWritable, Result> {
   }
 
   /**
-   * @param inputColumns to be passed in {@link RowResult} to the map task.
+   * @param inputColumns to be passed in {@link Result} to the map task.
    */
   protected void setInputColumns(byte [][] inputColumns) {
     this.inputColumns = inputColumns;
diff --git a/src/java/org/apache/hadoop/hbase/regionserver/GetDeleteTracker.java b/src/java/org/apache/hadoop/hbase/regionserver/GetDeleteTracker.java
index 5f063dc023b..8a92541a072 100644
--- a/src/java/org/apache/hadoop/hbase/regionserver/GetDeleteTracker.java
+++ b/src/java/org/apache/hadoop/hbase/regionserver/GetDeleteTracker.java
@@ -47,7 +47,6 @@ public class GetDeleteTracker implements DeleteTracker {
 
   /**
    * Constructor
-   * @param comparator
    */
   public GetDeleteTracker() {}
 
diff --git a/src/java/org/apache/hadoop/hbase/regionserver/MinorCompactingStoreScanner.java b/src/java/org/apache/hadoop/hbase/regionserver/MinorCompactingStoreScanner.java
index 3a7b7471de6..1c5c2660a07 100644
--- a/src/java/org/apache/hadoop/hbase/regionserver/MinorCompactingStoreScanner.java
+++ b/src/java/org/apache/hadoop/hbase/regionserver/MinorCompactingStoreScanner.java
@@ -82,7 +82,7 @@ public class MinorCompactingStoreScanner implements KeyValueScanner, InternalSca
   /**
    * High performance merge scan.
    * @param writer
-   * @return
+   * @return True if more.
    * @throws IOException
    */
   public boolean next(HFile.Writer writer) throws IOException {
diff --git a/src/java/overview.html b/src/java/overview.html
index db968a10077..c871211b230 100644
--- a/src/java/overview.html
+++ b/src/java/overview.html
@@ -35,6 +35,14 @@
     ssh must be installed and sshd must be running to use Hadoop's
     scripts to manage remote Hadoop daemons.
   </li>
+      <li>HBase depends on <a href="http://hadoop.apache.org/zookeeper/">ZooKeeper</a> as of release 0.20.0.
+      Clients and Servers now must know where their ZooKeeper Quorum locations before
+      they can do anything else.
+      In basic standalone and pseudo-distributed modes, HBase manages a ZooKeeper instance
+      for you but it is required that you run a ZooKeeper Quorum when running HBase
+      fully distributed (More on this below). The Zookeeper addition changes
+      how some core HBase configuration is done.
+      </li>
   <li>HBase currently is a file handle hog.  The usual default of
   1024 on *nix systems is insufficient if you are loading any significant
   amount of data into regionservers.  See the
@@ -46,11 +54,6 @@
       <li>The clocks on cluster members should be in basic alignments.  Some skew is tolerable but
       wild skew can generate odd behaviors.  Run <a href="http://en.wikipedia.org/wiki/Network_Time_Protocol">NTP</a>
       on your cluster, or an equivalent.</li>
-      <li>HBase depends on <a href="http://hadoop.apache.org/zookeeper/">ZooKeeper</a> as of release 0.20.0.
-      In basic standalone and pseudo-distributed modes, HBase manages a ZooKeeper instance
-      for you but it is required that you run a ZooKeeper Quorum when running HBase
-      fully distributed (More on this below).
-      </li>
       <li>This is a list of patches we recommend you apply to your running Hadoop cluster:
       <ul>
       <li><a hef="https://issues.apache.org/jira/browse/HADOOP-4681">HADOOP-4681 <i>"DFSClient block read failures cause open DFSInputStream to become unusable"</i></a>. This patch will help with the ever-popular, "No live nodes contain current block".
@@ -86,6 +89,9 @@ What follows presumes you have obtained a copy of HBase,
 see <a href="http://hadoop.apache.org/hbase/releases.html">Releases</a>, and are installing
 for the first time. If upgrading your
 HBase instance, see <a href="#upgrading">Upgrading</a>.
+If you have used HBase in the past,
+please read carefully.  Some core configuration has changed in 0.20.x HBase.
+</p>
 <p>Three modes are described: standalone, pseudo-distributed (where all servers are run on
 a single host), and distributed.  If new to hbase start by following the standalone instruction.
 </p>
diff --git a/src/test/org/apache/hadoop/hbase/regionserver/TestScanner.java b/src/test/org/apache/hadoop/hbase/regionserver/TestScanner.java
index d2404226961..23200442ced 100644
--- a/src/test/org/apache/hadoop/hbase/regionserver/TestScanner.java
+++ b/src/test/org/apache/hadoop/hbase/regionserver/TestScanner.java
@@ -38,9 +38,6 @@ import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.filter.StopRowFilter;
-import org.apache.hadoop.hbase.filter.WhileMatchRowFilter;
-import org.apache.hadoop.hbase.io.BatchUpdate;
 import org.apache.hadoop.hbase.io.hfile.Compression;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Writables;