HBASE-3735 Book.xml - adding section on Schema Design on versions

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1089074 13f79535-47bb-0310-9956-ffa450edef68
2011-04-05 15:14:00 +00:00 · 2011-04-05 15:14:00 +00:00 · 943bd0921d
parent d143b1b243
commit 943bd0921d
2 changed files with 49 additions and 34 deletions
--- a/src/docbkx/book.xml
+++ b/src/docbkx/book.xml
@ -205,40 +205,17 @@ throws InterruptedException, IOException {
                  names.
      `</para>
  </section>
-  <section xml:id="precreate.regions">
+  <section xml:id="schema.versions">
  <title>
-  Table Creation: Pre-Creating Regions
+  Number of Versions
  </title>
-<para>
-Tables in HBase are initially created with one region by default.  For bulk imports, this means that all clients will write to the same region until it is large enough to split and become distributed across the cluster.  A useful pattern to speed up the bulk import process is to pre-create empty regions.  Be somewhat conservative in this, because too-many regions can actually degrade performance.  An example of pre-creation using hex-keys is as follows (note:  this example may need to be tweaked to the individual applications keys):
-</para>
-<para>
-<programlisting>public static boolean createTable(HBaseAdmin admin, HTableDescriptor table, byte[][] splits)
-throws IOException {
-  try {
-    admin.createTable( table, splits );
-    return true;
-  } catch (TableExistsException e) {
-    logger.info("table " + table.getNameAsString() + " already exists");
-    // the table already exists...
-    return false;  
-  }
-}
-
-public static byte[][] getHexSplits(String startKey, String endKey, int numRegions) {
-  byte[][] splits = new byte[numRegions-1][];
-  BigInteger lowestKey = new BigInteger(startKey, 16);
-  BigInteger highestKey = new BigInteger(endKey, 16);
-  BigInteger range = highestKey.subtract(lowestKey);
-  BigInteger regionIncrement = range.divide(BigInteger.valueOf(numRegions));
-  lowestKey = lowestKey.add(regionIncrement);
-  for(int i=0; i &lt; numRegions-1;i++) {
-    BigInteger key = lowestKey.add(regionIncrement.multiply(BigInteger.valueOf(i)));
-    byte[] b = String.format("%016x", key).getBytes();
-    splits[i] = b;
-  }
-  return splits;
-}</programlisting>
+  <para>The number of row versions to store is configured per column
+      family via <link xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/HColumnDescriptor.html">HColumnDescriptor</link>.
+      The default is 3.
+      This is an important parameter because as described in the <link linkend="datamodel">Data Model</link>
+      section HBase does <emphasis>not</emphasis> overwrite row values, but rather
+      stores different values per row by time (and qualifier).  Excess versions are removed during major
+      compactions.  The number of versions may need to be increased or decreased depending on application needs.
  </para>
  </section>

--- a/src/docbkx/performance.xml
+++ b/src/docbkx/performance.xml
@ -92,9 +92,47 @@

  <section xml:id="perf.batch.loading">
    <title>Batch Loading</title>
+    <para>Use the bulk load tool if you can.  See
+        <link xlink:href="http://hbase.apache.org/bulk-loads.html">Bulk Loads</link>.
+        Otherwise, pay attention to the below.
+    </para>

-    <para>See the section on <link linkend="precreate.regions">Pre Creating
-    Regions</link> as well as bulk loading</para>
+  <section xml:id="precreate.regions">
+  <title>
+  Table Creation: Pre-Creating Regions
+  </title>
+<para>
+Tables in HBase are initially created with one region by default.  For bulk imports, this means that all clients will write to the same region until it is large enough to split and become distributed across the cluster.  A useful pattern to speed up the bulk import process is to pre-create empty regions.  Be somewhat conservative in this, because too-many regions can actually degrade performance.  An example of pre-creation using hex-keys is as follows (note:  this example may need to be tweaked to the individual applications keys):
+</para>
+<para>
+<programlisting>public static boolean createTable(HBaseAdmin admin, HTableDescriptor table, byte[][] splits)
+throws IOException {
+  try {
+    admin.createTable( table, splits );
+    return true;
+  } catch (TableExistsException e) {
+    logger.info("table " + table.getNameAsString() + " already exists");
+    // the table already exists...
+    return false;  
+  }
+}
+
+public static byte[][] getHexSplits(String startKey, String endKey, int numRegions) {
+  byte[][] splits = new byte[numRegions-1][];
+  BigInteger lowestKey = new BigInteger(startKey, 16);
+  BigInteger highestKey = new BigInteger(endKey, 16);
+  BigInteger range = highestKey.subtract(lowestKey);
+  BigInteger regionIncrement = range.divide(BigInteger.valueOf(numRegions));
+  lowestKey = lowestKey.add(regionIncrement);
+  for(int i=0; i &lt; numRegions-1;i++) {
+    BigInteger key = lowestKey.add(regionIncrement.multiply(BigInteger.valueOf(i)));
+    byte[] b = String.format("%016x", key).getBytes();
+    splits[i] = b;
+  }
+  return splits;
+}</programlisting>
+  </para>
+  </section>
  </section>

  <section>