HBASE-7162 Prefix Compression - Trie data block encoding; hbase-common and hbase-server changes

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1410213 13f79535-47bb-0310-9956-ffa450edef68
2012-11-16 06:53:23 +00:00 · 2012-11-16 06:53:23 +00:00 · c5b57b4558
parent d322efdeb6
commit c5b57b4558
33 changed files with 1992 additions and 146 deletions
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java
@ -37,6 +37,8 @@ import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.ClassSize;
 import org.apache.hadoop.io.RawComparator;
 import org.apache.hadoop.io.Writable;
+import org.apache.hbase.Cell;
+import org.apache.hbase.cell.CellComparator;

 import com.google.common.primitives.Longs;

@ -63,7 +65,7 @@ import com.google.common.primitives.Longs;
 */
@InterfaceAudience.Public
@InterfaceStability.Evolving
-public class KeyValue implements Writable, HeapSize {
+public class KeyValue implements Cell, Writable, HeapSize {
  static final Log LOG = LogFactory.getLog(KeyValue.class);
  // TODO: Group Key-only comparators and operations into a Key class, just
  // for neatness sake, if can figure what to call it.
@ -261,12 +263,23 @@ public class KeyValue implements Writable, HeapSize {
  /** Here be dragons **/

  // used to achieve atomic operations in the memstore.
-  public long getMemstoreTS() {
+  @Override
+  public long getMvccVersion() {
    return memstoreTS;
  }

+  public void setMvccVersion(long mvccVersion){
+    this.memstoreTS = mvccVersion;
+  }
+
+  @Deprecated
+  public long getMemstoreTS() {
+    return getMvccVersion();
+  }
+
+  @Deprecated
  public void setMemstoreTS(long memstoreTS) {
-    this.memstoreTS = memstoreTS;
+    setMvccVersion(memstoreTS);
  }

  // default value is 0, aka DNC
@ -831,19 +844,21 @@ public class KeyValue implements Writable, HeapSize {
        value, voffset, vlength);
  }

-  // Needed doing 'contains' on List.  Only compares the key portion, not the
-  // value.
+  /**
+   * Needed doing 'contains' on List.  Only compares the key portion, not the value.
+   * 
+   * For temporary backwards compatibility with the original KeyValue.equals method, we ignore the
+   * mvccVersion.
+   */
+  @Override
  public boolean equals(Object other) {
-    if (!(other instanceof KeyValue)) {
+    if (!(other instanceof Cell)) {
      return false;
    }
-    KeyValue kv = (KeyValue)other;
-    // Comparing bytes should be fine doing equals test.  Shouldn't have to
-    // worry about special .META. comparators doing straight equals.
-    return Bytes.equals(getBuffer(), getKeyOffset(), getKeyLength(),
-      kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength());
+    return CellComparator.equalsIgnoreMvccVersion(this, (Cell)other);
  }

+  @Override
  public int hashCode() {
    byte[] b = getBuffer();
    int start = getOffset(), end = getOffset() + getLength();
@ -864,6 +879,7 @@ public class KeyValue implements Writable, HeapSize {
   * Clones a KeyValue.  This creates a copy, re-allocating the buffer.
   * @return Fully copied clone of this KeyValue
   */
+  @Override
  public KeyValue clone() {
    byte [] b = new byte[this.length];
    System.arraycopy(this.bytes, this.offset, b, 0, this.length);
@ -1041,9 +1057,18 @@ public class KeyValue implements Writable, HeapSize {
    return keyLength;
  }

+  /**
+   * @return the backing array of the entire KeyValue (all KeyValue fields are in a single array)
+   */
+  @Override
+  public byte[] getValueArray() {
+    return bytes;
+  }
+
  /**
   * @return Value offset
   */
+  @Override
  public int getValueOffset() {
    return getKeyOffset() + getKeyLength();
  }
@ -1051,13 +1076,23 @@ public class KeyValue implements Writable, HeapSize {
  /**
   * @return Value length
   */
+  @Override
  public int getValueLength() {
    return Bytes.toInt(this.bytes, this.offset + Bytes.SIZEOF_INT);
  }

+  /**
+   * @return the backing array of the entire KeyValue (all KeyValue fields are in a single array)
+   */
+  @Override
+  public byte[] getRowArray() {
+    return bytes;
+  }
+
  /**
   * @return Row offset
   */
+  @Override
  public int getRowOffset() {
    return getKeyOffset() + Bytes.SIZEOF_SHORT;
  }
@ -1065,13 +1100,23 @@ public class KeyValue implements Writable, HeapSize {
  /**
   * @return Row length
   */
+  @Override
  public short getRowLength() {
    return Bytes.toShort(this.bytes, getKeyOffset());
  }

+  /**
+   * @return the backing array of the entire KeyValue (all KeyValue fields are in a single array)
+   */
+  @Override
+  public byte[] getFamilyArray() {
+    return bytes;
+  }
+
  /**
   * @return Family offset
   */
+  @Override
  public int getFamilyOffset() {
    return getFamilyOffset(getRowLength());
  }
@ -1086,6 +1131,7 @@ public class KeyValue implements Writable, HeapSize {
  /**
   * @return Family length
   */
+  @Override
  public byte getFamilyLength() {
    return getFamilyLength(getFamilyOffset());
  }
@ -1097,9 +1143,18 @@ public class KeyValue implements Writable, HeapSize {
    return this.bytes[foffset-1];
  }

+  /**
+   * @return the backing array of the entire KeyValue (all KeyValue fields are in a single array)
+   */
+  @Override
+  public byte[] getQualifierArray() {
+    return bytes;
+  }
+
  /**
   * @return Qualifier offset
   */
+  @Override
  public int getQualifierOffset() {
    return getQualifierOffset(getFamilyOffset());
  }
@ -1114,6 +1169,7 @@ public class KeyValue implements Writable, HeapSize {
  /**
   * @return Qualifier length
   */
+  @Override
  public int getQualifierLength() {
    return getQualifierLength(getRowLength(),getFamilyLength());
  }
@ -1273,6 +1329,7 @@ public class KeyValue implements Writable, HeapSize {
   * @return Timestamp
   */
  private long timestampCache = -1;
+  @Override
  public long getTimestamp() {
    if (timestampCache == -1) {
      timestampCache = getTimestamp(getKeyLength());
@ -1296,6 +1353,14 @@ public class KeyValue implements Writable, HeapSize {
    return getType(getKeyLength());
  }

+  /**
+   * @return KeyValue.TYPE byte representation
+   */
+  @Override
+  public byte getTypeByte() {
+    return getType(getKeyLength());
+  }
+
  /**
   * @param keylength Pass if you have it to save on a int creation.
   * @return Type of this KeyValue.
@ -2564,13 +2629,23 @@ public class KeyValue implements Writable, HeapSize {
    }
  }

-  // HeapSize
+  /**
+   * HeapSize implementation
+   *
+   * We do not count the bytes in the rowCache because it should be empty for a KeyValue in the
+   * MemStore.
+   */
+  @Override
  public long heapSize() {
-    return ClassSize.align(ClassSize.OBJECT + (2 * ClassSize.REFERENCE) +
-        ClassSize.align(ClassSize.ARRAY) + ClassSize.align(length) +
-        (3 * Bytes.SIZEOF_INT) +
-        ClassSize.align(ClassSize.ARRAY) +
-        (2 * Bytes.SIZEOF_LONG));
+    int sum = 0;
+    sum += ClassSize.OBJECT;// the KeyValue object itself
+    sum += 2 * ClassSize.REFERENCE;// 2 * pointers to "bytes" and "rowCache" byte[]
+    sum += 2 * ClassSize.align(ClassSize.ARRAY);// 2 * "bytes" and "rowCache" byte[]
+    sum += ClassSize.align(length);// number of bytes of data in the "bytes" array
+    //ignore the data in the rowCache because it is cleared for inactive memstore KeyValues
+    sum += 3 * Bytes.SIZEOF_INT;// offset, length, keyLength
+    sum += 2 * Bytes.SIZEOF_LONG;// timestampCache, memstoreTS
+    return ClassSize.align(sum);
  }

  // this overload assumes that the length bytes have already been read,
@ -2587,11 +2662,13 @@ public class KeyValue implements Writable, HeapSize {
  }

  // Writable
+  @Override
  public void readFields(final DataInput in) throws IOException {
    int length = in.readInt();
    readFields(length, in);
  }

+  @Override
  public void write(final DataOutput out) throws IOException {
    out.writeInt(this.length);
    out.write(this.bytes, this.offset, this.length);
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueTestUtil.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueTestUtil.java
@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase;
+
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.IterableUtils;
+import org.apache.hadoop.hbase.util.Strings;
+
+import com.google.common.collect.Lists;
+
+@InterfaceAudience.Private
+public class KeyValueTestUtil {
+
+  public static KeyValue create(
+      String row,
+      String family,
+      String qualifier,
+      long timestamp,
+      String value)
+  {
+    return create(row, family, qualifier, timestamp, KeyValue.Type.Put, value);
+  }
+
+  public static KeyValue create(
+      String row,
+      String family,
+      String qualifier,
+      long timestamp,
+      KeyValue.Type type,
+      String value)
+  {
+      return new KeyValue(
+          Bytes.toBytes(row),
+          Bytes.toBytes(family),
+          Bytes.toBytes(qualifier),
+          timestamp,
+          type,
+          Bytes.toBytes(value)
+      );
+  }
+
+
+  public static ByteBuffer toByteBufferAndRewind(final Iterable<? extends KeyValue> kvs,
+      boolean includeMemstoreTS) {
+    int totalBytes = KeyValueTool.totalLengthWithMvccVersion(kvs, includeMemstoreTS);
+    ByteBuffer bb = ByteBuffer.allocate(totalBytes);
+    for (KeyValue kv : IterableUtils.nullSafe(kvs)) {
+      KeyValueTool.appendToByteBuffer(bb, kv, includeMemstoreTS);
+    }
+    bb.rewind();
+    return bb;
+  }
+
+  public static List<KeyValue> rewindThenToList(final ByteBuffer bb,
+      final boolean includesMemstoreTS) {
+    bb.rewind();
+    List<KeyValue> kvs = Lists.newArrayList();
+    KeyValue kv = null;
+    while (true) {
+      kv = KeyValueTool.nextShallowCopy(bb, includesMemstoreTS);
+      if (kv == null) {
+        break;
+      }
+      kvs.add(kv);
+    }
+    return kvs;
+  }
+
+
+  /********************* toString ************************************/
+
+  public static String toStringWithPadding(final Collection<? extends KeyValue> kvs,
+      final boolean includeMeta) {
+    int maxRowStringLength = 0;
+    int maxFamilyStringLength = 0;
+    int maxQualifierStringLength = 0;
+    int maxTimestampLength = 0;
+    for (KeyValue kv : kvs) {
+      maxRowStringLength = Math.max(maxRowStringLength, getRowString(kv).length());
+      maxFamilyStringLength = Math.max(maxFamilyStringLength, getFamilyString(kv).length());
+      maxQualifierStringLength = Math.max(maxQualifierStringLength, getQualifierString(kv)
+        .length());
+      maxTimestampLength = Math.max(maxTimestampLength, Long.valueOf(kv.getTimestamp()).toString()
+        .length());
+    }
+    StringBuilder sb = new StringBuilder();
+    for (KeyValue kv : kvs) {
+      if (sb.length() > 0) {
+        sb.append("\n");
+      }
+      String row = toStringWithPadding(kv, maxRowStringLength, maxFamilyStringLength,
+        maxQualifierStringLength, maxTimestampLength, includeMeta);
+      sb.append(row);
+    }
+    return sb.toString();
+  }
+
+  protected static String toStringWithPadding(final KeyValue kv, final int maxRowLength,
+      int maxFamilyLength, int maxQualifierLength, int maxTimestampLength, boolean includeMeta) {
+    String leadingLengths = "";
+    String familyLength = kv.getFamilyLength() + " ";
+    if (includeMeta) {
+      leadingLengths += Strings.padFront(kv.getKeyLength() + "", '0', 4);
+      leadingLengths += " ";
+      leadingLengths += Strings.padFront(kv.getValueLength() + "", '0', 4);
+      leadingLengths += " ";
+      leadingLengths += Strings.padFront(kv.getRowLength() + "", '0', 2);
+      leadingLengths += " ";
+    }
+    int spacesAfterRow = maxRowLength - getRowString(kv).length() + 2;
+    int spacesAfterFamily = maxFamilyLength - getFamilyString(kv).length() + 2;
+    int spacesAfterQualifier = maxQualifierLength - getQualifierString(kv).length() + 1;
+    int spacesAfterTimestamp = maxTimestampLength
+        - Long.valueOf(kv.getTimestamp()).toString().length() + 1;
+    return leadingLengths + getRowString(kv) + Strings.repeat(' ', spacesAfterRow)
+        + familyLength + getFamilyString(kv) + Strings.repeat(' ', spacesAfterFamily)
+        + getQualifierString(kv) + Strings.repeat(' ', spacesAfterQualifier)
+        + getTimestampString(kv) + Strings.repeat(' ', spacesAfterTimestamp)
+        + getTypeString(kv) + " " + getValueString(kv);
+  }
+
+  protected static String getRowString(final KeyValue kv) {
+    return Bytes.toStringBinary(kv.getRow());
+  }
+
+  protected static String getFamilyString(final KeyValue kv) {
+    return Bytes.toStringBinary(kv.getFamily());
+  }
+
+  protected static String getQualifierString(final KeyValue kv) {
+    return Bytes.toStringBinary(kv.getQualifier());
+  }
+
+  protected static String getTimestampString(final KeyValue kv) {
+    return kv.getTimestamp() + "";
+  }
+
+  protected static String getTypeString(final KeyValue kv) {
+    return KeyValue.Type.codeToType(kv.getType()).toString();
+  }
+
+  protected static String getValueString(final KeyValue kv) {
+    return Bytes.toStringBinary(kv.getValue());
+  }
+
+}
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueTool.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueTool.java
@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.util.ByteBufferUtils;
+import org.apache.hadoop.hbase.util.ByteRange;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.IterableUtils;
+import org.apache.hadoop.io.WritableUtils;
+import org.apache.hbase.Cell;
+import org.apache.hbase.cell.CellTool;
+
+/**
+ * static convenience methods for dealing with KeyValues and collections of KeyValues
+ */
+@InterfaceAudience.Private
+public class KeyValueTool {
+
+  /**************** length *********************/
+
+  public static int length(final Cell cell) {
+    return (int)KeyValue.getKeyValueDataStructureSize(cell.getRowLength(), cell.getFamilyLength(),
+      cell.getQualifierLength(), cell.getValueLength());
+  }
+
+  protected static int keyLength(final Cell cell) {
+    return (int)KeyValue.getKeyDataStructureSize(cell.getRowLength(), cell.getFamilyLength(),
+      cell.getQualifierLength());
+  }
+
+  public static int lengthWithMvccVersion(final KeyValue kv, final boolean includeMvccVersion) {
+    int length = kv.getLength();
+    if (includeMvccVersion) {
+      length += WritableUtils.getVIntSize(kv.getMvccVersion());
+    }
+    return length;
+  }
+
+  public static int totalLengthWithMvccVersion(final Iterable<? extends KeyValue> kvs,
+      final boolean includeMvccVersion) {
+    int length = 0;
+    for (KeyValue kv : IterableUtils.nullSafe(kvs)) {
+      length += lengthWithMvccVersion(kv, includeMvccVersion);
+    }
+    return length;
+  }
+
+
+  /**************** copy key only *********************/
+
+  public static KeyValue copyToNewKeyValue(final Cell cell) {
+    KeyValue kvCell = new KeyValue(copyToNewByteArray(cell));
+    kvCell.setMvccVersion(cell.getMvccVersion());
+    return kvCell;
+  }
+
+  public static ByteBuffer copyKeyToNewByteBuffer(final Cell cell) {
+    byte[] bytes = new byte[keyLength(cell)];
+    appendKeyToByteArrayWithoutValue(cell, bytes, 0);
+    ByteBuffer buffer = ByteBuffer.wrap(bytes);
+    buffer.position(buffer.limit());//make it look as if each field were appended
+    return buffer;
+  }
+
+  public static byte[] copyToNewByteArray(final Cell cell) {
+    int v1Length = length(cell);
+    byte[] backingBytes = new byte[v1Length];
+    appendToByteArray(cell, backingBytes, 0);
+    return backingBytes;
+  }
+
+  protected static int appendKeyToByteArrayWithoutValue(final Cell cell, final byte[] output,
+      final int offset) {
+    int nextOffset = offset;
+    nextOffset = Bytes.putShort(output, nextOffset, cell.getRowLength());
+    nextOffset = CellTool.copyRowTo(cell, output, nextOffset);
+    nextOffset = Bytes.putByte(output, nextOffset, cell.getFamilyLength());
+    nextOffset = CellTool.copyFamilyTo(cell, output, nextOffset);
+    nextOffset = CellTool.copyQualifierTo(cell, output, nextOffset);
+    nextOffset = Bytes.putLong(output, nextOffset, cell.getTimestamp());
+    nextOffset = Bytes.putByte(output, nextOffset, cell.getTypeByte());
+    return nextOffset;
+  }
+
+
+  /**************** copy key and value *********************/
+
+  public static int appendToByteArray(final Cell cell, final byte[] output, final int offset) {
+    int pos = offset;
+    pos = Bytes.putInt(output, pos, keyLength(cell));
+    pos = Bytes.putInt(output, pos, cell.getValueLength());
+    pos = appendKeyToByteArrayWithoutValue(cell, output, pos);
+    CellTool.copyValueTo(cell, output, pos);
+    return pos + cell.getValueLength();
+  }
+
+  public static ByteBuffer copyToNewByteBuffer(final Cell cell) {
+    byte[] bytes = new byte[length(cell)];
+    appendToByteArray(cell, bytes, 0);
+    ByteBuffer buffer = ByteBuffer.wrap(bytes);
+    buffer.position(buffer.limit());//make it look as if each field were appended
+    return buffer;
+  }
+
+  public static void appendToByteBuffer(final ByteBuffer bb, final KeyValue kv,
+      final boolean includeMvccVersion) {
+    // keep pushing the limit out. assume enough capacity
+    bb.limit(bb.position() + kv.getLength());
+    bb.put(kv.getBuffer(), kv.getOffset(), kv.getLength());
+    if (includeMvccVersion) {
+      int numMvccVersionBytes = WritableUtils.getVIntSize(kv.getMvccVersion());
+      ByteBufferUtils.extendLimit(bb, numMvccVersionBytes);
+      ByteBufferUtils.writeVLong(bb, kv.getMvccVersion());
+    }
+  }
+
+
+  /**************** iterating *******************************/
+
+  /**
+   * Creates a new KeyValue object positioned in the supplied ByteBuffer and sets the ByteBuffer's
+   * position to the start of the next KeyValue. Does not allocate a new array or copy data.
+   */
+  public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includesMvccVersion) {
+    if (bb.isDirect()) {
+      throw new IllegalArgumentException("only supports heap buffers");
+    }
+    if (bb.remaining() < 1) {
+      return null;
+    }
+    int underlyingArrayOffset = bb.arrayOffset() + bb.position();
+    int keyLength = bb.getInt();
+    int valueLength = bb.getInt();
+    int kvLength = KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE + keyLength + valueLength;
+    KeyValue keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength);
+    ByteBufferUtils.skip(bb, keyLength + valueLength);
+    if (includesMvccVersion) {
+      long mvccVersion = ByteBufferUtils.readVLong(bb);
+      keyValue.setMvccVersion(mvccVersion);
+    }
+    return keyValue;
+  }
+
+
+  /*************** next/previous **********************************/
+
+  /**
+   * Append single byte 0x00 to the end of the input row key
+   */
+  public static KeyValue createFirstKeyInNextRow(final Cell in){
+    byte[] nextRow = new byte[in.getRowLength() + 1];
+    System.arraycopy(in.getRowArray(), in.getRowOffset(), nextRow, 0, in.getRowLength());
+    nextRow[nextRow.length - 1] = 0;//maybe not necessary
+    return KeyValue.createFirstOnRow(nextRow);
+  }
+
+  /**
+   * Increment the row bytes and clear the other fields
+   */
+  public static KeyValue createFirstKeyInIncrementedRow(final Cell in){
+    byte[] thisRow = new ByteRange(in.getRowArray(), in.getRowOffset(), in.getRowLength())
+        .deepCopyToNewArray();
+    byte[] nextRow = Bytes.unsignedCopyAndIncrement(thisRow);
+    return KeyValue.createFirstOnRow(nextRow);
+  }
+
+  /**
+   * Decrement the timestamp.  For tests (currently wasteful)
+   *
+   * Remember timestamps are sorted reverse chronologically.
+   * @param in
+   * @return
+   */
+  public static KeyValue previousKey(final KeyValue in) {
+    return KeyValue.createFirstOnRow(CellTool.getRowArray(in), CellTool.getFamilyArray(in),
+      CellTool.getQualifierArray(in), in.getTimestamp() - 1);
+  }
+}
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java
@ -162,7 +162,7 @@ public interface DataBlockEncoder {
     */
    public ByteBuffer getValueShallowCopy();

-    /** @return key value at current position. */
+    /** @return key value at current position with position set to limit */
    public ByteBuffer getKeyValueBuffer();

    /**
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultDecodingContext.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultDecodingContext.java
@ -24,7 +24,6 @@ import java.nio.ByteBuffer;

 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
-import org.apache.hadoop.hbase.io.hfile.HFileBlock;

 /**
 * A default implementation of {@link HFileBlockDecodingContext}. It assumes the
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultEncodingContext.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultEncodingContext.java
@ -25,10 +25,11 @@ import java.io.IOException;
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
 import org.apache.hadoop.hbase.io.hfile.BlockType;
-import org.apache.hadoop.hbase.io.hfile.HFileBlock;
 import org.apache.hadoop.io.compress.CompressionOutputStream;
 import org.apache.hadoop.io.compress.Compressor;

+import com.google.common.base.Preconditions;
+
 /**
 * A default implementation of {@link HFileBlockEncodingContext}. It will
 * compress the data section as one continuous buffer.
@ -85,28 +86,15 @@ public class HFileBlockDefaultEncodingContext implements
                + compressionAlgorithm, e);
      }
    }
-    if (headerBytes == null) {
-      dummyHeader = HFileBlock.DUMMY_HEADER;
-    } else {
-      dummyHeader = headerBytes;
-    }
-  }
-
-  /**
-   * @param compressionAlgorithm compression algorithm
-   * @param encoding encoding
-   */
-  public HFileBlockDefaultEncodingContext(
-      Compression.Algorithm compressionAlgorithm,
-      DataBlockEncoding encoding) {
-    this(compressionAlgorithm, encoding, null);
+    dummyHeader = Preconditions.checkNotNull(headerBytes, 
+      "Please pass HFileBlock.DUMMY_HEADER instead of null for param headerBytes");
  }

  /**
   * prepare to start a new encoding.
   * @throws IOException
   */
-  void prepareEncoding() throws IOException {
+  public void prepareEncoding() throws IOException {
    encodedStream.reset();
    dataOut.write(dummyHeader);
    if (encodingAlgo != null
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockEncodingContext.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockEncodingContext.java
@ -17,6 +17,7 @@
 package org.apache.hadoop.hbase.io.encoding;

 import java.io.IOException;
+import java.io.OutputStream;

 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.hfile.BlockType;
@ -30,6 +31,11 @@ import org.apache.hadoop.hbase.io.hfile.BlockType;
 */
 public interface HFileBlockEncodingContext {

+  /**
+   * @return OutputStream to which encoded data is written
+   */
+  public OutputStream getOutputStreamForEncoder();
+
  /**
   * @return encoded and compressed bytes with header which are ready to write
   *         out to disk
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ArrayUtils.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ArrayUtils.java
@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+public class ArrayUtils {
+
+  public static int length(byte[] a) {
+    if (a == null) {
+      return 0;
+    }
+    return a.length;
+  }
+
+  public static int length(long[] a) {
+    if (a == null) {
+      return 0;
+    }
+    return a.length;
+  }
+
+  public static int length(Object[] a) {
+    if (a == null) {
+      return 0;
+    }
+    return a.length;
+  }
+
+  public static boolean isEmpty(byte[] a) {
+    if (a == null) {
+      return true;
+    }
+    if (a.length == 0) {
+      return true;
+    }
+    return false;
+  }
+
+  public static boolean isEmpty(long[] a) {
+    if (a == null) {
+      return true;
+    }
+    if (a.length == 0) {
+      return true;
+    }
+    return false;
+  }
+
+  public static boolean isEmpty(Object[] a) {
+    if (a == null) {
+      return true;
+    }
+    if (a.length == 0) {
+      return true;
+    }
+    return false;
+  }
+
+  public static long getFirst(long[] a) {
+    return a[0];
+  }
+
+  public static long getLast(long[] a) {
+    return a[a.length - 1];
+  }
+
+  public static int getTotalLengthOfArrays(Iterable<byte[]> arrays) {
+    if (arrays == null) {
+      return 0;
+    }
+    int length = 0;
+    for (byte[] bytes : arrays) {
+      length += length(bytes);
+    }
+    return length;
+  }
+
+  public static ArrayList<Long> toList(long[] array){
+    int length = length(array);
+    ArrayList<Long> list = new ArrayList<Long>(length);
+    for(int i=0; i < length; ++i){
+      list.add(array[i]);
+    }
+    return list;
+  }
+
+  public static byte[] growIfNecessary(byte[] array, int minLength, int numAdditionalBytes) {
+    if(array.length >= minLength){
+      return array;
+    }
+    return Arrays.copyOf(array, minLength + numAdditionalBytes);
+  }
+
+  public static int[] growIfNecessary(int[] array, int minLength, int numAdditionalInts) {
+    if(array.length >= minLength){
+      return array;
+    }
+    return Arrays.copyOf(array, minLength + numAdditionalInts);
+  }
+
+  public static long[] growIfNecessary(long[] array, int minLength, int numAdditionalLongs) {
+    if(array.length >= minLength){
+      return array;
+    }
+    return Arrays.copyOf(array, minLength + numAdditionalLongs);
+  }
+
+}
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
@ -16,6 +16,7 @@
 */
 package org.apache.hadoop.hbase.util;

+import java.io.ByteArrayOutputStream;
 import java.io.DataInput;
 import java.io.DataInputStream;
 import java.io.IOException;
@ -25,8 +26,7 @@ import java.nio.ByteBuffer;

 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
-import org.apache.hadoop.hbase.io.encoding.
-    EncoderBufferTooSmallException;
+import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.WritableUtils;

 /**
@ -299,23 +299,6 @@ public final class ByteBufferUtils {
    return tmpLength;
  }

-  /**
-   * Asserts that there is at least the given amount of unfilled space
-   * remaining in the given buffer.
-   * @param out typically, the buffer we are writing to
-   * @param length the required space in the buffer
-   * @throws EncoderBufferTooSmallException If there are no enough bytes.
-   */
-  public static void ensureSpace(ByteBuffer out, int length)
-      throws EncoderBufferTooSmallException {
-    if (out.position() + length > out.limit()) {
-      throw new EncoderBufferTooSmallException(
-          "Buffer position=" + out.position() +
-          ", buffer limit=" + out.limit() +
-          ", length to be written=" + length);
-    }
-  }
-
  /**
   * Copy the given number of bytes from the given stream and put it at the
   * current position of the given buffer, updating the position in the buffer.
@ -335,6 +318,17 @@ public final class ByteBufferUtils {
      }
    }
  }
+  
+  /**
+   * Copy from the InputStream to a new heap ByteBuffer until the InputStream is exhausted.
+   */
+  public static ByteBuffer drainInputStreamToBuffer(InputStream is) throws IOException {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream(4096);
+    IOUtils.copyBytes(is, baos, 4096, true);
+    ByteBuffer buffer = ByteBuffer.wrap(baos.toByteArray());
+    buffer.rewind();
+    return buffer;
+  }

  /**
   * Copy from one buffer to another from given offset
@ -440,4 +434,24 @@ public final class ByteBufferUtils {
    buffer.position(buffer.position() + length);
  }

+  public static void extendLimit(ByteBuffer buffer, int numBytes) {
+    buffer.limit(buffer.limit() + numBytes);
+  }
+
+  /**
+   * Copy the bytes from position to limit into a new byte[] of the exact length and sets the
+   * position and limit back to their original values (though not thread safe).
+   * @param buffer copy from here
+   * @param startPosition put buffer.get(startPosition) into byte[0]
+   * @return a new byte[] containing the bytes in the specified range
+   */
+  public static byte[] toBytes(ByteBuffer buffer, int startPosition) {
+    int originalPosition = buffer.position();
+    byte[] output = new byte[buffer.limit() - startPosition];
+    buffer.position(startPosition);
+    buffer.get(output);
+    buffer.position(originalPosition);
+    return output;
+  }
+
 }
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRange.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRange.java
@ -0,0 +1,301 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util;
+
+
+
+
+/**
+ * Lightweight, reusable class for specifying ranges of byte[]'s. CompareTo and equals methods are
+ * lexicographic, which is native to HBase.
+ * <p/>
+ * This class differs from ByteBuffer:
+ * <li/>On-heap bytes only
+ * <li/>Implements equals, hashCode, and compareTo so that it can be used in standard java
+ * Collections, similar to String.
+ * <li/>Does not maintain mark/position iterator state inside the class. Doing so leads to many bugs
+ * in complex applications.
+ * <li/>Allows the addition of simple core methods like this.copyTo(that, offset).
+ * <li/>Can be reused in tight loops like a major compaction which can save significant amounts of
+ * garbage.
+ * <li/>(Without reuse, we throw off garbage like this thing:
+ * http://www.youtube.com/watch?v=lkmBH-MjZF4
+ * <p/>
+ * Mutable, and always evaluates equals, hashCode, and compareTo based on the current contents.
+ * <p/>
+ * Can contain convenience methods for comparing, printing, cloning, spawning new arrays, copying to
+ * other arrays, etc. Please place non-core methods into {@link ByteRangeTool}.
+ * <p/>
+ * We may consider converting this to an interface and creating separate implementations for a
+ * single byte[], a paged byte[] (growable byte[][]), a ByteBuffer, etc
+ */
+public class ByteRange implements Comparable<ByteRange> {
+
+  private static final int UNSET_HASH_VALUE = -1;
+
+
+  /********************** fields *****************************/
+
+  // Do not make these final, as the intention is to reuse objects of this class
+
+  /**
+   * The array containing the bytes in this range.  It will be >= length.
+   */
+  private byte[] bytes;
+
+  /**
+   * The index of the first byte in this range.  ByteRange.get(0) will return bytes[offset].
+   */
+  private int offset;
+
+  /**
+   * The number of bytes in the range.  Offset + length must be <= bytes.length
+   */
+  private int length;
+
+  /**
+   * Variable for lazy-caching the hashCode of this range.  Useful for frequently used ranges,
+   * long-lived ranges, or long ranges.
+   */
+  private int hash = UNSET_HASH_VALUE;
+
+
+  /********************** construct ***********************/
+
+  public ByteRange() {
+    set(new byte[0]);//Could probably get away with a null array if the need arises.
+  }
+
+  public ByteRange(byte[] bytes) {
+    set(bytes);
+  }
+
+  public ByteRange(byte[] bytes, int offset, int length) {
+    set(bytes, offset, length);
+  }
+
+
+  /********************** write methods *************************/
+
+  public ByteRange clear() {
+    clearHashCache();
+    bytes = null;
+    offset = 0;
+    length = 0;
+    return this;
+  }
+
+  public ByteRange set(byte[] bytes) {
+    clearHashCache();
+    this.bytes = bytes;
+    this.offset = 0;
+    this.length = ArrayUtils.length(bytes);
+    return this;
+  }
+
+  public ByteRange set(byte[] bytes, int offset, int length) {
+    clearHashCache();
+    this.bytes = bytes;
+    this.offset = offset;
+    this.length = length;
+    return this;
+  }
+
+  public void setLength(int length) {
+    clearHashCache();
+    this.length = length;
+  }
+
+
+  /*********** read methods (add non-core methods to ByteRangeUtils) *************/
+
+  /**
+   * @param index zero-based index
+   * @return single byte at index
+   */
+  public byte get(int index) {
+    return bytes[offset + index];
+  }
+
+  /**
+   * Instantiate a new byte[] with exact length, which is at least 24 bytes + length.  Copy the
+   * contents of this range into it.
+   * @return The newly cloned byte[].
+   */
+  public byte[] deepCopyToNewArray() {
+    byte[] result = new byte[length];
+    System.arraycopy(bytes, offset, result, 0, length);
+    return result;
+  }
+
+  /**
+   * Create a new ByteRange with new backing byte[] and copy the state of this range into the new
+   * range.  Copy the hash over if it is already calculated.
+   * @return 
+   */
+  public ByteRange deepCopy() {
+    ByteRange clone = new ByteRange(deepCopyToNewArray());
+    if (isHashCached()) {
+      clone.hash = hash;
+    }
+    return clone;
+  }
+
+  /**
+   * Wrapper for System.arraycopy.  Copy the contents of this range into the provided array.
+   * @param destination Copy to this array
+   * @param destinationOffset First index in the destination array.
+   * @return void to avoid confusion between which ByteRange should be returned
+   */
+  public void deepCopyTo(byte[] destination, int destinationOffset) {
+    System.arraycopy(bytes, offset, destination, destinationOffset, length);
+  }
+
+  /**
+   * Wrapper for System.arraycopy. Copy the contents of this range into the provided array.
+   * @param innerOffset Start copying from this index in this source ByteRange. First byte copied is
+   *          bytes[offset + innerOffset]
+   * @param copyLength Copy this many bytes
+   * @param destination Copy to this array
+   * @param destinationOffset First index in the destination array.
+   * @return void to avoid confusion between which ByteRange should be returned
+   */
+  public void deepCopySubRangeTo(int innerOffset, int copyLength, byte[] destination,
+      int destinationOffset) {
+    System.arraycopy(bytes, offset + innerOffset, destination, destinationOffset, copyLength);
+  }
+
+  /**
+   * Create a new ByteRange that points at this range's byte[]. The new range can have different
+   * values for offset and length, but modifying the shallowCopy will modify the bytes in this
+   * range's array. Pass over the hash code if it is already cached.
+   * @param innerOffset First byte of clone will be this.offset + copyOffset.
+   * @param copyLength Number of bytes in the clone.
+   * @return new ByteRange object referencing this range's byte[].
+   */
+  public ByteRange shallowCopySubRange(int innerOffset, int copyLength) {
+    ByteRange clone = new ByteRange(bytes, offset + innerOffset, copyLength);
+    if (isHashCached()) {
+      clone.hash = hash;
+    }
+    return clone;
+  }
+
+  //TODO move to ByteRangeUtils because it is non-core method
+  public int numEqualPrefixBytes(ByteRange that, int thatInnerOffset) {
+    int maxCompares = Math.min(length, that.length - thatInnerOffset);
+    for (int i = 0; i < maxCompares; ++i) {
+      if (bytes[offset + i] != that.bytes[that.offset + thatInnerOffset + i]) {
+        return i;
+      }
+    }
+    return maxCompares;
+  }
+
+  public byte[] getBytes() {
+    return bytes;
+  }
+
+  public int getOffset() {
+    return offset;
+  }
+
+  public int getLength() {
+    return length;
+  }
+
+  public boolean isEmpty(){
+    return isEmpty(this);
+  }
+
+  public boolean notEmpty(){
+    return notEmpty(this);
+  }
+
+
+  /******************* static methods ************************/
+
+  public static boolean isEmpty(ByteRange range){
+    return range == null || range.length == 0;
+  }
+
+  public static boolean notEmpty(ByteRange range){
+    return range != null && range.length > 0;
+  }
+
+  /******************* standard methods *********************/
+
+  @Override
+  public boolean equals(Object thatObject) {
+    if (thatObject == null){
+      return false;
+    }
+    if (this == thatObject) {
+      return true;
+    }
+    if (hashCode() != thatObject.hashCode()) {
+      return false;
+    }
+    if (!(thatObject instanceof ByteRange)) {
+      return false;
+    }
+    ByteRange that = (ByteRange) thatObject;
+    return Bytes.equals(bytes, offset, length, that.bytes, that.offset, that.length);
+  }
+
+  @Override
+  public int hashCode() {
+    if (isHashCached()) {// hash is already calculated and cached
+      return hash;
+    }
+    if (this.isEmpty()) {// return 0 for empty ByteRange
+      hash = 0;
+      return hash;
+    }
+    int off = offset;
+    hash = 0;
+    for (int i = 0; i < length; i++) {
+      hash = 31 * hash + bytes[off++];
+    }
+    return hash;
+  }
+
+  private boolean isHashCached() {
+    return hash != UNSET_HASH_VALUE;
+  }
+
+  private void clearHashCache() {
+    hash = UNSET_HASH_VALUE;
+  }
+
+  /**
+   * Bitwise comparison of each byte in the array.  Unsigned comparison, not paying attention to
+   * java's signed bytes.
+   */
+  @Override
+  public int compareTo(ByteRange other) {
+    return Bytes.compareTo(bytes, offset, length, other.bytes, other.offset, other.length);
+  }
+
+  @Override
+  public String toString() {
+    return Bytes.toStringBinary(bytes, offset, length);
+  }
+
+}
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRangeTool.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRangeTool.java
@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util;
+
+import java.util.ArrayList;
+import java.util.Collection;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Utility methods {@link ByteRange}.
+ */
+public class ByteRangeTool {
+
+  public static ArrayList<byte[]> copyToNewArrays(Collection<ByteRange> ranges) {
+    if (ranges == null) {
+      return new ArrayList<byte[]>(0);
+    }
+    ArrayList<byte[]> arrays = Lists.newArrayListWithCapacity(ranges.size());
+    for (ByteRange range : ranges) {
+      arrays.add(range.deepCopyToNewArray());
+    }
+    return arrays;
+  }
+
+  public static ArrayList<ByteRange> fromArrays(Collection<byte[]> arrays) {
+    if (arrays == null) {
+      return new ArrayList<ByteRange>(0);
+    }
+    ArrayList<ByteRange> ranges = Lists.newArrayListWithCapacity(arrays.size());
+    for (byte[] array : arrays) {
+      ranges.add(new ByteRange(array));
+    }
+    return ranges;
+  }
+
+}
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
@ -1647,7 +1647,7 @@ public class Bytes {

    return toString(b, 0, n);
  }
-  
+
  /**
   * Copy the byte array given in parameter and return an instance 
   * of a new byte array with the same length and the same content.
@ -1660,4 +1660,62 @@ public class Bytes {
    System.arraycopy(bytes, 0, result, 0, bytes.length);	  
    return result;
  }
+
+  /**
+   * Search sorted array "a" for byte "key". I can't remember if I wrote this or copied it from
+   * somewhere. (mcorgan)
+   * @param a Array to search. Entries must be sorted and unique.
+   * @param fromIndex First index inclusive of "a" to include in the search.
+   * @param toIndex Last index exclusive of "a" to include in the search.
+   * @param key The byte to search for.
+   * @return The index of key if found. If not found, return -(index + 1), where negative indicates
+   *         "not found" and the "index + 1" handles the "-0" case.
+   */
+  public static int unsignedBinarySearch(byte[] a, int fromIndex, int toIndex, byte key) {
+    int unsignedKey = key & 0xff;
+    int low = fromIndex;
+    int high = toIndex - 1;
+
+    while (low <= high) {
+      int mid = (low + high) >>> 1;
+      int midVal = a[mid] & 0xff;
+
+      if (midVal < unsignedKey) {
+        low = mid + 1;
+      } else if (midVal > unsignedKey) {
+        high = mid - 1;
+      } else {
+        return mid; // key found
+      }
+    }
+    return -(low + 1); // key not found.
+  }
+
+  /**
+   * Treat the byte[] as an unsigned series of bytes, most significant bits first.  Start by adding
+   * 1 to the rightmost bit/byte and carry over all overflows to the more significant bits/bytes.
+   *
+   * @param input The byte[] to increment.
+   * @return The incremented copy of "in".  May be same length or 1 byte longer.
+   */
+  public static byte[] unsignedCopyAndIncrement(final byte[] input) {
+    byte[] copy = copy(input);
+    if (copy == null) {
+      throw new IllegalArgumentException("cannot increment null array");
+    }
+    for (int i = copy.length - 1; i >= 0; --i) {
+      if (copy[i] == -1) {// -1 is all 1-bits, which is the unsigned maximum
+        copy[i] = 0;
+      } else {
+        ++copy[i];
+        return copy;
+      }
+    }
+    // we maxed out the array
+    byte[] out = new byte[copy.length + 1];
+    out[0] = 1;
+    System.arraycopy(copy, 0, out, 1, copy.length);
+    return out;
+  }
+
 }
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/CollectionUtils.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/CollectionUtils.java
@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Utility methods for dealing with Collections, including treating null collections as empty.
+ */
+public class CollectionUtils {
+
+  private static final List<Object> EMPTY_LIST = Collections.unmodifiableList(
+    new ArrayList<Object>(0));
+
+  
+  @SuppressWarnings("unchecked")
+  public static <T> Collection<T> nullSafe(Collection<T> in) {
+    if (in == null) {
+      return (Collection<T>)EMPTY_LIST;
+    }
+    return in;
+  }
+
+	/************************ size ************************************/
+
+  public static <T> int nullSafeSize(Collection<T> collection) {
+    if (collection == null) {
+      return 0;
+    }
+    return collection.size();
+  }
+
+  public static <A, B> boolean nullSafeSameSize(Collection<A> a, Collection<B> b) {
+    return nullSafeSize(a) == nullSafeSize(b);
+  }
+
+	/*************************** empty ****************************************/
+
+  public static <T> boolean isEmpty(Collection<T> collection) {
+    return collection == null || collection.isEmpty();
+  }
+
+  public static <T> boolean notEmpty(Collection<T> collection) {
+    return !isEmpty(collection);
+  }
+
+	/************************ first/last **************************/
+
+  public static <T> T getFirst(Collection<T> collection) {
+    if (CollectionUtils.isEmpty(collection)) {
+      return null;
+    }
+    for (T t : collection) {
+      return t;
+    }
+    return null;
+  }
+  
+  /**
+   * @param list any list
+   * @return -1 if list is empty, otherwise the max index
+   */
+  public static int getLastIndex(List<?> list){
+    if(isEmpty(list)){
+      return -1;
+    }
+    return list.size() - 1;
+  }
+  
+  /**
+   * @param list
+   * @param index the index in question
+   * @return true if it is the last index or if list is empty and -1 is passed for the index param
+   */
+  public static boolean isLastIndex(List<?> list, int index){
+    return index == getLastIndex(list);
+  }
+
+  public static <T> T getLast(List<T> list) {
+    if (isEmpty(list)) {
+      return null;
+    }
+    return list.get(list.size() - 1);
+  }
+}
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/IterableUtils.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/IterableUtils.java
@ -1,5 +1,4 @@
 /*
- *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
@ -17,37 +16,26 @@
 * limitations under the License.
 */

-package org.apache.hadoop.hbase;
+package org.apache.hadoop.hbase.util;

-import org.apache.hadoop.hbase.util.Bytes;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;

-public class KeyValueTestUtil {
+/**
+ * Utility methods for Iterable including null-safe handlers.
+ */
+public class IterableUtils {

-  public static KeyValue create(
-      String row,
-      String family,
-      String qualifier,
-      long timestamp,
-      String value)
-  {
-    return create(row, family, qualifier, timestamp, KeyValue.Type.Put, value);
+  private static final List<Object> EMPTY_LIST = Collections
+      .unmodifiableList(new ArrayList<Object>(0));
+
+  @SuppressWarnings("unchecked")
+  public static <T> Iterable<T> nullSafe(Iterable<T> in) {
+    if (in == null) {
+      return (List<T>) EMPTY_LIST;
+    }
+    return in;
  }

-  public static KeyValue create(
-      String row,
-      String family,
-      String qualifier,
-      long timestamp,
-      KeyValue.Type type,
-      String value)
-  {
-      return new KeyValue(
-          Bytes.toBytes(row),
-          Bytes.toBytes(family),
-          Bytes.toBytes(qualifier),
-          timestamp,
-          type,
-          Bytes.toBytes(value)
-      );
-  }
 }
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Strings.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Strings.java
@ -76,4 +76,41 @@ public class Strings {
      return null;
    return dnPtr.endsWith(".") ? dnPtr.substring(0, dnPtr.length()-1) : dnPtr;
  }
+
+  /**
+   * Null-safe length check.
+   * @param input
+   * @return true if null or length==0
+   */
+  public static boolean isEmpty(String input) {
+    return input == null || input.length() == 0;
+  }
+
+  /**
+   * Push the input string to the right by appending a character before it, usually a space.
+   * @param input the string to pad
+   * @param padding the character to repeat to the left of the input string
+   * @param length the desired total length including the padding
+   * @return padding characters + input
+   */
+  public static String padFront(String input, char padding, int length) {
+    if (input.length() > length) {
+      throw new IllegalArgumentException("input \"" + input + "\" longer than maxLength=" + length);
+    }
+    int numPaddingCharacters = length - input.length();
+    return repeat(padding, numPaddingCharacters) + input;
+  }
+
+  /**
+   * @param c repeat this character
+   * @param reapeatFor the length of the output String
+   * @return c, repeated repeatFor times
+   */
+  public static String repeat(char c, int reapeatFor) {
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < reapeatFor; ++i) {
+      sb.append(c);
+    }
+    return sb.toString();
+  }
 }
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/test/LoadTestKVGenerator.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/test/LoadTestKVGenerator.java
@ -80,8 +80,10 @@ public class LoadTestKVGenerator {
   */
  public byte[] generateRandomSizeValue(long key, String qual) {
    String rowKey = md5PrefixedKey(key);
-    int dataSize = minValueSize + randomForValueSize.nextInt(
-        Math.abs(maxValueSize - minValueSize));
+    int dataSize = minValueSize;
+    if(minValueSize != maxValueSize){
+      dataSize = minValueSize + randomForValueSize.nextInt(Math.abs(maxValueSize - minValueSize));
+    }
    return getValueForRowColumn(rowKey, qual, dataSize);
  }

--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/RedundantKVGenerator.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/RedundantKVGenerator.java
@ -14,7 +14,7 @@
 * License for the specific language governing permissions and limitations
 * under the License.
 */
-package org.apache.hadoop.hbase.io.encoding;
+package org.apache.hadoop.hbase.util.test;

 import java.nio.ByteBuffer;
 import java.util.ArrayList;
--- a/hbase-common/src/main/java/org/apache/hbase/Cell.java
+++ b/hbase-common/src/main/java/org/apache/hbase/Cell.java
@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hbase;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hbase.cell.CellTool;
+
+
+/**
+ * The unit of storage in HBase consisting of the following fields:<br/>
+ * <pre>
+ * 1) row
+ * 2) column family
+ * 3) column qualifier
+ * 4) timestamp
+ * 5) type
+ * 6) MVCC version
+ * 7) value
+ * </pre>
+ * <p/>
+ * Uniqueness is determined by the combination of row, column family, column qualifier,
+ * timestamp, and type.
+ * <p/>
+ * The natural comparator will perform a bitwise comparison on row, column family, and column
+ * qualifier. Less intuitively, it will then treat the greater timestamp as the lesser value with
+ * the goal of sorting newer cells first.
+ * <p/>
+ * This interface does not include methods that allocate new byte[]'s such as those used in client
+ * or debugging code. These should be placed in a sub-interface or the {@link CellTool} class.
+ * <p/>
+ * Cell implements Comparable<Cell> which is only meaningful when comparing to other keys in the
+ * same table. It uses {@link #CellComparator} which does not work on the -ROOT- and .META. tables.
+ * <p/>
+ * In the future, we may consider adding a boolean isOnHeap() method and a getValueBuffer() method
+ * that can be used to pass a value directly from an off-heap ByteBuffer to the network without
+ * copying into an on-heap byte[].
+ * <p/>
+ * Historic note: the original Cell implementation (KeyValue) requires that all fields be encoded as
+ * consecutive bytes in the same byte[], whereas this interface allows fields to reside in separate
+ * byte[]'s.
+ * <p/>
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public interface Cell {
+
+  //1) Row
+
+  /**
+   * Contiguous raw bytes that may start at any index in the containing array. Max length is
+   * Short.MAX_VALUE which is 32,767 bytes.
+   * @return The array containing the row bytes.
+   */
+  byte[] getRowArray();
+
+  /**
+   * @return Array index of first row byte
+   */
+  int getRowOffset();
+
+  /**
+   * @return Number of row bytes. Must be < rowArray.length - offset.
+   */
+  short getRowLength();
+
+
+  //2) Family
+
+  /**
+   * Contiguous bytes composed of legal HDFS filename characters which may start at any index in the
+   * containing array. Max length is Byte.MAX_VALUE, which is 127 bytes.
+   * @return the array containing the family bytes.
+   */
+  byte[] getFamilyArray();
+
+  /**
+   * @return Array index of first row byte
+   */
+  int getFamilyOffset();
+
+  /**
+   * @return Number of family bytes.  Must be < familyArray.length - offset.
+   */
+  byte getFamilyLength();
+
+
+  //3) Qualifier
+
+  /**
+   * Contiguous raw bytes that may start at any index in the containing array. Max length is
+   * Short.MAX_VALUE which is 32,767 bytes.
+   * @return The array containing the qualifier bytes.
+   */
+  byte[] getQualifierArray();
+
+  /**
+   * @return Array index of first qualifier byte
+   */
+  int getQualifierOffset();
+
+  /**
+   * @return Number of qualifier bytes.  Must be < qualifierArray.length - offset.
+   */
+  int getQualifierLength();
+
+
+  //4) Timestamp
+
+  /**
+   * @return Long value representing time at which this cell was "Put" into the row.  Typically
+   * represents the time of insertion, but can be any value from Long.MIN_VALUE to Long.MAX_VALUE.
+   */
+  long getTimestamp();
+
+
+  //5) Type
+
+  /**
+   * see {@link #KeyValue.TYPE}
+   * @return The byte representation of the KeyValue.TYPE of this cell: one of Put, Delete, etc
+   */
+  byte getTypeByte();
+
+
+  //6) MvccVersion
+
+  /**
+   * Internal use only. A region-specific sequence ID given to each operation. It always exists for
+   * cells in the memstore but is not retained forever. It may survive several flushes, but
+   * generally becomes irrelevant after the cell's row is no longer involved in any operations that
+   * require strict consistency.
+   * @return mvccVersion (always >= 0 if exists), or 0 if it no longer exists
+   */
+  long getMvccVersion();
+
+
+  //7) Value
+
+  /**
+   * Contiguous raw bytes that may start at any index in the containing array. Max length is
+   * Integer.MAX_VALUE which is 2,147,483,648 bytes.
+   * @return The array containing the value bytes.
+   */
+  byte[] getValueArray();
+
+  /**
+   * @return Array index of first value byte
+   */
+  int getValueOffset();
+
+  /**
+   * @return Number of value bytes.  Must be < valueArray.length - offset.
+   */
+  int getValueLength();
+
+}
--- a/hbase-common/src/main/java/org/apache/hbase/cell/CellComparator.java
+++ b/hbase-common/src/main/java/org/apache/hbase/cell/CellComparator.java
@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hbase.cell;
+
+import java.io.Serializable;
+import java.util.Comparator;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hbase.Cell;
+
+import com.google.common.primitives.Longs;
+
+/**
+ * Compare two traditional HBase cells.
+ *
+ * Note: This comparator is not valid for -ROOT- and .META. tables.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public class CellComparator implements Comparator<Cell>, Serializable{
+  private static final long serialVersionUID = -8760041766259623329L;
+
+  @Override
+  public int compare(Cell a, Cell b) {
+    return compareStatic(a, b);
+  }
+
+
+  public static int compareStatic(Cell a, Cell b) {
+    //row
+    int c = Bytes.compareTo(
+        a.getRowArray(), a.getRowOffset(), a.getRowLength(),
+        b.getRowArray(), b.getRowOffset(), b.getRowLength());
+    if (c != 0) return c;
+
+    //family
+    c = Bytes.compareTo(
+      a.getFamilyArray(), a.getFamilyOffset(), a.getFamilyLength(),
+      b.getFamilyArray(), b.getFamilyOffset(), b.getFamilyLength());
+    if (c != 0) return c;
+
+    //qualifier
+    c = Bytes.compareTo(
+        a.getQualifierArray(), a.getQualifierOffset(), a.getQualifierLength(),
+        b.getQualifierArray(), b.getQualifierOffset(), b.getQualifierLength());
+    if (c != 0) return c;
+
+    //timestamp: later sorts first
+    c = -Longs.compare(a.getTimestamp(), b.getTimestamp());
+    if (c != 0) return c;
+
+    //type
+    c = (0xff & a.getTypeByte()) - (0xff & b.getTypeByte());
+    if (c != 0) return c;
+
+    //mvccVersion: later sorts first
+    return -Longs.compare(a.getMvccVersion(), b.getMvccVersion());
+  }
+
+
+  /**************** equals ****************************/
+
+  public static boolean equals(Cell a, Cell b){
+    if (!areKeyLengthsEqual(a, b)) {
+      return false;
+    }
+    //TODO compare byte[]'s in reverse since later bytes more likely to differ
+    return 0 == compareStatic(a, b);
+  }
+
+  public static boolean equalsRow(Cell a, Cell b){
+    if(!areRowLengthsEqual(a, b)){
+      return false;
+    }
+    return 0 == Bytes.compareTo(
+      a.getRowArray(), a.getRowOffset(), a.getRowLength(),
+      b.getRowArray(), b.getRowOffset(), b.getRowLength());
+  }
+
+
+  /********************* hashCode ************************/
+
+  /**
+   * Returns a hash code that is always the same for two Cells having a matching equals(..) result.
+   * Currently does not guard against nulls, but it could if necessary.
+   */
+  public static int hashCode(Cell cell){
+    if (cell == null) {// return 0 for empty Cell
+      return 0;
+    }
+
+    //pre-calculate the 3 hashes made of byte ranges
+    int rowHash = Bytes.hashCode(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength());
+    int familyHash = Bytes.hashCode(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength());
+    int qualifierHash = Bytes.hashCode(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());
+
+    //combine the 6 sub-hashes
+    int hash = 31 * rowHash + familyHash;
+    hash = 31 * hash + qualifierHash;
+    hash = 31 * hash + (int)cell.getTimestamp();
+    hash = 31 * hash + cell.getTypeByte();
+    hash = 31 * hash + (int)cell.getMvccVersion();
+    return hash;
+  }
+
+
+  /******************** lengths *************************/
+
+  public static boolean areKeyLengthsEqual(Cell a, Cell b) {
+    return a.getRowLength() == b.getRowLength()
+        && a.getFamilyLength() == b.getFamilyLength()
+        && a.getQualifierLength() == b.getQualifierLength();
+  }
+
+  public static boolean areRowLengthsEqual(Cell a, Cell b) {
+    return a.getRowLength() == b.getRowLength();
+  }
+
+
+  /***************** special cases ****************************/
+
+  /**
+   * special case for KeyValue.equals
+   */
+  private static int compareStaticIgnoreMvccVersion(Cell a, Cell b) {
+    //row
+    int c = Bytes.compareTo(
+        a.getRowArray(), a.getRowOffset(), a.getRowLength(),
+        b.getRowArray(), b.getRowOffset(), b.getRowLength());
+    if (c != 0) return c;
+
+    //family
+    c = Bytes.compareTo(
+      a.getFamilyArray(), a.getFamilyOffset(), a.getFamilyLength(),
+      b.getFamilyArray(), b.getFamilyOffset(), b.getFamilyLength());
+    if (c != 0) return c;
+
+    //qualifier
+    c = Bytes.compareTo(
+        a.getQualifierArray(), a.getQualifierOffset(), a.getQualifierLength(),
+        b.getQualifierArray(), b.getQualifierOffset(), b.getQualifierLength());
+    if (c != 0) return c;
+
+    //timestamp: later sorts first
+    c = -Longs.compare(a.getTimestamp(), b.getTimestamp());
+    if (c != 0) return c;
+
+    //type
+    c = (0xff & a.getTypeByte()) - (0xff & b.getTypeByte());
+    return c;
+  }
+
+  /**
+   * special case for KeyValue.equals
+   */
+  public static boolean equalsIgnoreMvccVersion(Cell a, Cell b){
+    return 0 == compareStaticIgnoreMvccVersion(a, b);
+  }
+
+}
--- a/hbase-common/src/main/java/org/apache/hbase/cell/CellOutputStream.java
+++ b/hbase-common/src/main/java/org/apache/hbase/cell/CellOutputStream.java
@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hbase.cell;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hbase.Cell;
+
+/**
+ * Accepts a stream of Cells and adds them to its internal data structure. This can be used to build
+ * a block of cells during compactions and flushes, or to build a byte[] to send to the client. This
+ * could be backed by a List<KeyValue>, but more efficient implementations will append results to a
+ * byte[] to eliminate overhead, and possibly encode the cells further.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public interface CellOutputStream {
+
+  /**
+   * Implementation must copy the entire state of the Cell. If the appended Cell is modified
+   * immediately after the append method returns, the modifications must have absolutely no effect
+   * on the copy of the Cell that was added to the appender. For example, calling someList.add(cell)
+   * is not correct.
+   */
+  void write(Cell cell);
+
+  /**
+   * Let the implementation decide what to do.  Usually means writing accumulated data into a byte[]
+   * that can then be read from the implementation to be sent to disk, put in the block cache, or
+   * sent over the network.
+   */
+  void flush();
+
+}
--- a/hbase-common/src/main/java/org/apache/hbase/cell/CellScannerPosition.java
+++ b/hbase-common/src/main/java/org/apache/hbase/cell/CellScannerPosition.java
@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hbase.cell;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/**
+ * An indicator of the state of the scanner after an operation such as nextCell() or positionAt(..).
+ * For example:
+ * <ul>
+ * <li>In a DataBlockScanner, the AFTER_LAST position indicates to the parent StoreFileScanner that
+ * it should load the next block.</li>
+ * <li>In a StoreFileScanner, the AFTER_LAST position indicates that the file has been exhausted.</li>
+ * <li>In a RegionScanner, the AFTER_LAST position indicates that the scanner should move to the
+ * next region.</li>
+ * </ul>
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public enum CellScannerPosition {
+
+  /**
+   * getCurrentCell() will NOT return a valid cell. Calling nextCell() will advance to the first
+   * cell.
+   */
+  BEFORE_FIRST,
+
+  /**
+   * getCurrentCell() will return a valid cell, but it is not the cell requested by positionAt(..),
+   * rather it is the nearest cell before the requested cell.
+   */
+  BEFORE,
+
+  /**
+   * getCurrentCell() will return a valid cell, and it is exactly the cell that was requested by
+   * positionAt(..).
+   */
+  AT,
+
+  /**
+   * getCurrentCell() will return a valid cell, but it is not the cell requested by positionAt(..),
+   * rather it is the nearest cell after the requested cell.
+   */
+  AFTER,
+
+  /**
+   * getCurrentCell() will NOT return a valid cell. Calling nextCell() will have no effect.
+   */
+  AFTER_LAST
+
+}
--- a/hbase-common/src/main/java/org/apache/hbase/cell/CellTool.java
+++ b/hbase-common/src/main/java/org/apache/hbase/cell/CellTool.java
@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hbase.cell;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.util.ByteRange;
+import org.apache.hbase.Cell;
+
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public final class CellTool {
+
+  /******************* ByteRange *******************************/
+
+  public static ByteRange fillRowRange(Cell cell, ByteRange range) {
+    return range.set(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength());
+  }
+
+  public static ByteRange fillFamilyRange(Cell cell, ByteRange range) {
+    return range.set(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength());
+  }
+
+  public static ByteRange fillQualifierRange(Cell cell, ByteRange range) {
+    return range.set(cell.getQualifierArray(), cell.getQualifierOffset(),
+      cell.getQualifierLength());
+  }
+
+
+  /***************** get individual arrays for tests ************/
+
+  public static byte[] getRowArray(Cell cell){
+    byte[] output = new byte[cell.getRowLength()];
+    copyRowTo(cell, output, 0);
+    return output;
+  }
+
+  public static byte[] getFamilyArray(Cell cell){
+    byte[] output = new byte[cell.getFamilyLength()];
+    copyFamilyTo(cell, output, 0);
+    return output;
+  }
+
+  public static byte[] getQualifierArray(Cell cell){
+    byte[] output = new byte[cell.getQualifierLength()];
+    copyQualifierTo(cell, output, 0);
+    return output;
+  }
+
+  public static byte[] getValueArray(Cell cell){
+    byte[] output = new byte[cell.getValueLength()];
+    copyValueTo(cell, output, 0);
+    return output;
+  }
+
+
+  /******************** copyTo **********************************/
+
+  public static int copyRowTo(Cell cell, byte[] destination, int destinationOffset) {
+    System.arraycopy(cell.getRowArray(), cell.getRowOffset(), destination, destinationOffset,
+      cell.getRowLength());
+    return destinationOffset + cell.getRowLength();
+  }
+
+  public static int copyFamilyTo(Cell cell, byte[] destination, int destinationOffset) {
+    System.arraycopy(cell.getFamilyArray(), cell.getFamilyOffset(), destination, destinationOffset,
+      cell.getFamilyLength());
+    return destinationOffset + cell.getFamilyLength();
+  }
+
+  public static int copyQualifierTo(Cell cell, byte[] destination, int destinationOffset) {
+    System.arraycopy(cell.getQualifierArray(), cell.getQualifierOffset(), destination,
+      destinationOffset, cell.getQualifierLength());
+    return destinationOffset + cell.getQualifierLength();
+  }
+
+  public static int copyValueTo(Cell cell, byte[] destination, int destinationOffset) {
+    System.arraycopy(cell.getValueArray(), cell.getValueOffset(), destination, destinationOffset,
+      cell.getValueLength());
+    return destinationOffset + cell.getValueLength();
+  }
+
+
+  /********************* misc *************************************/
+
+  public static byte getRowByte(Cell cell, int index) {
+    return cell.getRowArray()[cell.getRowOffset() + index];
+  }
+
+
+  /********************** KeyValue (move to KeyValueUtils) *********************/
+
+  public static ByteBuffer getValueBufferShallowCopy(Cell cell) {
+    ByteBuffer buffer = ByteBuffer.wrap(cell.getValueArray(), cell.getValueOffset(),
+      cell.getValueLength());
+//    buffer.position(buffer.limit());//make it look as if value was appended
+    return buffer;
+  }
+
+}
--- a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteRange.java
+++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteRange.java
@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import junit.framework.Assert;
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hbase.SmallTests;
+import org.junit.experimental.categories.Category;
+
+
+@Category(SmallTests.class)
+public class TestByteRange extends TestCase {
+
+  public void testEmpty(){
+    Assert.assertTrue(ByteRange.isEmpty(null));
+    ByteRange r = new ByteRange();
+    Assert.assertTrue(ByteRange.isEmpty(r));
+    Assert.assertFalse(ByteRange.notEmpty(r));
+    Assert.assertTrue(r.isEmpty());
+    Assert.assertFalse(r.notEmpty());
+    Assert.assertNotNull(r.getBytes());//should be empty byte[], but could change this behavior
+    Assert.assertEquals(0, r.getBytes().length);
+    Assert.assertEquals(0, r.getOffset());
+    Assert.assertEquals(0, r.getLength());
+    Assert.assertTrue(Bytes.equals(new byte[0], r.deepCopyToNewArray()));
+    Assert.assertEquals(0, r.compareTo(new ByteRange(new byte[0], 0, 0)));
+    Assert.assertEquals(0, r.hashCode());
+  }
+
+  public void testBasics(){
+    ByteRange r = new ByteRange(new byte[]{1, 3, 2});
+    Assert.assertFalse(ByteRange.isEmpty(r));
+    Assert.assertNotNull(r.getBytes());//should be empty byte[], but could change this behavior
+    Assert.assertEquals(3, r.getBytes().length);
+    Assert.assertEquals(0, r.getOffset());
+    Assert.assertEquals(3, r.getLength());
+
+    //cloning (deep copying)
+    Assert.assertTrue(Bytes.equals(new byte[]{1, 3, 2}, r.deepCopyToNewArray()));
+    Assert.assertNotSame(r.getBytes(), r.deepCopyToNewArray());
+
+    //hash code
+    Assert.assertTrue(r.hashCode() > 0);
+    Assert.assertEquals(r.hashCode(), r.deepCopy().hashCode());
+
+    //copying to arrays
+    byte[] destination = new byte[]{-59};//junk
+    r.deepCopySubRangeTo(2, 1, destination, 0);
+    Assert.assertTrue(Bytes.equals(new byte[]{2}, destination));
+
+    //set length
+    r.setLength(1);
+    Assert.assertTrue(Bytes.equals(new byte[]{1}, r.deepCopyToNewArray()));
+    r.setLength(2);//verify we retained the 2nd byte, but dangerous in real code
+    Assert.assertTrue(Bytes.equals(new byte[]{1, 3}, r.deepCopyToNewArray()));
+  }
+
+}
+
--- a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java
+++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java
@ -28,8 +28,10 @@ import java.util.Arrays;
 import java.util.Random;

 import junit.framework.TestCase;
-import org.junit.experimental.categories.Category;
+
 import org.apache.hadoop.hbase.SmallTests;
+import org.junit.Assert;
+import org.junit.experimental.categories.Category;


@Category(SmallTests.class)
@ -376,7 +378,7 @@ public class TestBytes extends TestCase {
    assertEquals("World", Bytes.readStringFixedSize(dis, 18));
    assertEquals("", Bytes.readStringFixedSize(dis, 9));
  }
-  
+
  public void testCopy() throws Exception {
    byte [] bytes = Bytes.toBytes("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
    byte [] copy =  Bytes.copy(bytes);
@ -396,5 +398,32 @@ public class TestBytes extends TestCase {
    String bytes = Bytes.toStringBinary(Bytes.toBytes(2.17));
    assertEquals(2.17, Bytes.toDouble(Bytes.toBytesBinary(bytes)), 0);        
  }
+
+  public void testUnsignedBinarySearch(){
+    byte[] bytes = new byte[]{0,5,123,127,-128,-100,-1};
+    Assert.assertEquals(Bytes.unsignedBinarySearch(bytes, 0, bytes.length, (byte)5), 1);
+    Assert.assertEquals(Bytes.unsignedBinarySearch(bytes, 0, bytes.length, (byte)127), 3);
+    Assert.assertEquals(Bytes.unsignedBinarySearch(bytes, 0, bytes.length, (byte)-128), 4);
+    Assert.assertEquals(Bytes.unsignedBinarySearch(bytes, 0, bytes.length, (byte)-100), 5);
+    Assert.assertEquals(Bytes.unsignedBinarySearch(bytes, 0, bytes.length, (byte)-1), 6);
+    Assert.assertEquals(Bytes.unsignedBinarySearch(bytes, 0, bytes.length, (byte)2), -1-1);
+    Assert.assertEquals(Bytes.unsignedBinarySearch(bytes, 0, bytes.length, (byte)-5), -6-1);
+  }
+
+  public void testUnsignedIncrement(){
+    byte[] a = Bytes.toBytes(0);
+    int a2 = Bytes.toInt(Bytes.unsignedCopyAndIncrement(a), 0);
+    Assert.assertTrue(a2==1);
+
+    byte[] b = Bytes.toBytes(-1);
+    byte[] actual = Bytes.unsignedCopyAndIncrement(b);
+    Assert.assertNotSame(b, actual);
+    byte[] expected = new byte[]{1,0,0,0,0};
+    Assert.assertArrayEquals(expected, actual);
+
+    byte[] c = Bytes.toBytes(255);//should wrap to the next significant byte
+    int c2 = Bytes.toInt(Bytes.unsignedCopyAndIncrement(c), 0);
+    Assert.assertTrue(c2==256);
+  }
 }

--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java
@ -352,4 +352,21 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder {
    }
  }

+  /**
+   * Asserts that there is at least the given amount of unfilled space
+   * remaining in the given buffer.
+   * @param out typically, the buffer we are writing to
+   * @param length the required space in the buffer
+   * @throws EncoderBufferTooSmallException If there are no enough bytes.
+   */
+  protected static void ensureSpace(ByteBuffer out, int length)
+      throws EncoderBufferTooSmallException {
+    if (out.position() + length > out.limit()) {
+      throw new EncoderBufferTooSmallException(
+          "Buffer position=" + out.position() +
+          ", buffer limit=" + out.limit() +
+          ", length to be written=" + length);
+    }
+  }
+
 }
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java
@ -233,8 +233,7 @@ public class DiffKeyDeltaEncoder extends BufferedDataBlockEncoder {

    // create KeyValue buffer and fill it prefix
    int keyOffset = buffer.position();
-    ByteBufferUtils.ensureSpace(buffer, keyLength + valueLength
-        + KeyValue.ROW_OFFSET);
+    ensureSpace(buffer, keyLength + valueLength + KeyValue.ROW_OFFSET);
    buffer.putInt(keyLength);
    buffer.putInt(valueLength);

--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java
@ -230,8 +230,7 @@ public class FastDiffDeltaEncoder extends BufferedDataBlockEncoder {
    }
    int commonLength = ByteBufferUtils.readCompressedInt(source);

-    ByteBufferUtils.ensureSpace(out, state.keyLength + state.valueLength +
-        KeyValue.ROW_OFFSET);
+    ensureSpace(out, state.keyLength + state.valueLength + KeyValue.ROW_OFFSET);

    int kvPos = out.position();

--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java
@ -126,8 +126,7 @@ public class PrefixKeyDeltaEncoder extends BufferedDataBlockEncoder {
    int keyOffset;
    keyLength += commonLength;

-    ByteBufferUtils.ensureSpace(buffer, keyLength + valueLength
-        + KeyValue.ROW_OFFSET);
+    ensureSpace(buffer, keyLength + valueLength + KeyValue.ROW_OFFSET);

    buffer.putInt(keyLength);
    buffer.putInt(valueLength);
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
@ -725,7 +725,7 @@ public class HFileBlock implements Cacheable {
      this.dataBlockEncoder = dataBlockEncoder != null
          ? dataBlockEncoder : NoOpDataBlockEncoder.INSTANCE;
      defaultBlockEncodingCtx =
-        new HFileBlockDefaultEncodingContext(compressionAlgorithm, null);
+        new HFileBlockDefaultEncodingContext(compressionAlgorithm, null, DUMMY_HEADER);
      dataBlockEncodingCtx =
        this.dataBlockEncoder.newOnDiskDataBlockEncodingContext(
            compressionAlgorithm, DUMMY_HEADER);
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestDataBlockEncoders.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestDataBlockEncoders.java
@ -35,6 +35,7 @@ import org.apache.hadoop.hbase.LargeTests;
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.hfile.HFileBlock;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.test.RedundantKVGenerator;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.runner.RunWith;
@ -75,7 +76,7 @@ public class TestDataBlockEncoders {
      return encoder.newDataBlockEncodingContext(algo, encoding,
          HFileBlock.DUMMY_HEADER);
    } else {
-      return new HFileBlockDefaultEncodingContext(algo, encoding);
+      return new HFileBlockDefaultEncodingContext(algo, encoding, HFileBlock.DUMMY_HEADER);
    }
  }

--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestEncodedSeekers.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestEncodedSeekers.java
@ -27,19 +27,17 @@ import java.util.Map;

 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.MediumTests;
-import org.apache.hadoop.hbase.SmallTests;
 import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
-import org.apache.hadoop.hbase.io.hfile.HFile;
 import org.apache.hadoop.hbase.io.hfile.LruBlockCache;
 import org.apache.hadoop.hbase.regionserver.HRegion;
-import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.MultiThreadedWriter;
+import org.apache.hadoop.hbase.util.Strings;
 import org.apache.hadoop.hbase.util.test.LoadTestKVGenerator;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
@ -59,9 +57,10 @@ public class TestEncodedSeekers {
  private static final byte[] CF_BYTES = Bytes.toBytes(CF_NAME);
  private static final int MAX_VERSIONS = 5;

+  private static final int BLOCK_SIZE = 64 * 1024;
  private static final int MIN_VALUE_SIZE = 30;
  private static final int MAX_VALUE_SIZE = 60;
-  private static final int NUM_ROWS = 1000;
+  private static final int NUM_ROWS = 1003;
  private static final int NUM_COLS_PER_ROW = 20;
  private static final int NUM_HFILES = 4;
  private static final int NUM_ROWS_PER_FLUSH = NUM_ROWS / NUM_HFILES;
@ -101,61 +100,73 @@ public class TestEncodedSeekers {
            .setMaxVersions(MAX_VERSIONS)
            .setDataBlockEncoding(encoding)
            .setEncodeOnDisk(encodeOnDisk)
+            .setBlocksize(BLOCK_SIZE)
    );
-    LoadTestKVGenerator dataGenerator = new LoadTestKVGenerator(
-        MIN_VALUE_SIZE, MAX_VALUE_SIZE);

-    // Write
-    for (int i = 0; i < NUM_ROWS; ++i) {
+    //write the data, but leave some in the memstore
+    doPuts(region);
+    
+    //verify correctness when memstore contains data
+    doGets(region);
+    
+    //verify correctness again after compacting
+    region.compactStores();
+    doGets(region);
+
+    
+    Map<DataBlockEncoding, Integer> encodingCounts = cache.getEncodingCountsForTest();
+
+    // Ensure that compactions don't pollute the cache with unencoded blocks
+    // in case of in-cache-only encoding.
+    System.err.println("encodingCounts=" + encodingCounts);
+    assertEquals(1, encodingCounts.size());
+    DataBlockEncoding encodingInCache = encodingCounts.keySet().iterator().next();
+    assertEquals(encoding, encodingInCache);
+    assertTrue(encodingCounts.get(encodingInCache) > 0);
+  }
+  
+  
+  private void doPuts(HRegion region) throws IOException{
+    LoadTestKVGenerator dataGenerator = new LoadTestKVGenerator(MIN_VALUE_SIZE, MAX_VALUE_SIZE);
+     for (int i = 0; i < NUM_ROWS; ++i) {
      byte[] key = MultiThreadedWriter.longToByteArrayKey(i);
      for (int j = 0; j < NUM_COLS_PER_ROW; ++j) {
        Put put = new Put(key);
        String colAsStr = String.valueOf(j);
+        byte[] col = Bytes.toBytes(colAsStr);
        byte[] value = dataGenerator.generateRandomSizeValue(i, colAsStr);
        put.add(CF_BYTES, Bytes.toBytes(colAsStr), value);
+        if(VERBOSE){
+          KeyValue kvPut = new KeyValue(key, CF_BYTES, col, value);
+          System.err.println(Strings.padFront(i+"", ' ', 4)+" "+kvPut);
+        }
        region.put(put);
      }
      if (i % NUM_ROWS_PER_FLUSH == 0) {
        region.flushcache();
      }
    }
-
-    for (int doneCompaction = 0; doneCompaction <= 1; ++doneCompaction) {
-      // Read
-      for (int i = 0; i < NUM_ROWS; ++i) {
-        final byte[] rowKey = MultiThreadedWriter.longToByteArrayKey(i);
-        for (int j = 0; j < NUM_COLS_PER_ROW; ++j) {
-          if (VERBOSE) {
-            System.err.println("Reading row " + i + ", column " +  j);
-          }
-          final String qualStr = String.valueOf(j);
-          final byte[] qualBytes = Bytes.toBytes(qualStr);
-          Get get = new Get(rowKey);
-          get.addColumn(CF_BYTES, qualBytes);
-          Result result = region.get(get, null);
-          assertEquals(1, result.size());
-          assertTrue(LoadTestKVGenerator.verify(Bytes.toString(rowKey), qualStr,
-              result.getValue(CF_BYTES, qualBytes)));
+  }
+  
+  
+  private void doGets(HRegion region) throws IOException{
+    for (int i = 0; i < NUM_ROWS; ++i) {
+      final byte[] rowKey = MultiThreadedWriter.longToByteArrayKey(i);
+      for (int j = 0; j < NUM_COLS_PER_ROW; ++j) {
+        final String qualStr = String.valueOf(j);
+        if (VERBOSE) {
+          System.err.println("Reading row " + i + ", column " + j + " " + Bytes.toString(rowKey)+"/"
+              +qualStr);
        }
-      }
-
-      if (doneCompaction == 0) {
-        // Compact, then read again at the next loop iteration.
-        region.compactStores();
+        final byte[] qualBytes = Bytes.toBytes(qualStr);
+        Get get = new Get(rowKey);
+        get.addColumn(CF_BYTES, qualBytes);
+        Result result = region.get(get, null);
+        assertEquals(1, result.size());
+        assertTrue(LoadTestKVGenerator.verify(Bytes.toString(rowKey), qualStr,
+            result.getValue(CF_BYTES, qualBytes)));
      }
    }
-
-    Map<DataBlockEncoding, Integer> encodingCounts =
-        cache.getEncodingCountsForTest();
-
-    // Ensure that compactions don't pollute the cache with unencoded blocks
-    // in case of in-cache-only encoding.
-    System.err.println("encodingCounts=" + encodingCounts);
-    assertEquals(1, encodingCounts.size());
-    DataBlockEncoding encodingInCache =
-        encodingCounts.keySet().iterator().next();
-    assertEquals(encoding, encodingInCache);
-    assertTrue(encodingCounts.get(encodingInCache) > 0);
  }

 }
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java
@ -33,9 +33,8 @@ import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
 import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
 import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
-import org.apache.hadoop.hbase.io.encoding.RedundantKVGenerator;
 import org.apache.hadoop.hbase.util.ChecksumType;
-import org.junit.After;
+import org.apache.hadoop.hbase.util.test.RedundantKVGenerator;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
@ -112,9 +111,8 @@ public class TestHFileDataBlockEncoder {
  public void testEncodingWritePath() throws IOException {
    // usually we have just block without headers, but don't complicate that
    HFileBlock block = getSampleHFileBlock();
-    HFileBlockEncodingContext context =
-        new HFileBlockDefaultEncodingContext(
-            Compression.Algorithm.NONE, blockEncoder.getEncodingOnDisk());
+    HFileBlockEncodingContext context = new HFileBlockDefaultEncodingContext(
+        Compression.Algorithm.NONE, blockEncoder.getEncodingOnDisk(), HFileBlock.DUMMY_HEADER);
    blockEncoder.beforeWriteToDisk(block.getBufferWithoutHeader(),
            includesMemstoreTS, context, block.getBlockType());

--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java
@ -310,4 +310,16 @@ public class TestByteBufferUtils {
      throw new RuntimeException("Bug in test!", e);
    }
  }
+
+  @Test
+  public void testToBytes(){
+    ByteBuffer buffer = ByteBuffer.allocate(5);
+    buffer.put(new byte[]{0,1,2,3,4});
+    assertEquals(5, buffer.position());
+    assertEquals(5, buffer.limit());
+    byte[] copy = ByteBufferUtils.toBytes(buffer, 2);
+    assertArrayEquals(new byte[]{2,3,4}, copy);
+    assertEquals(5, buffer.position());
+    assertEquals(5, buffer.limit());
+  }
 }