HBASE-62 Allow user add arbitrary key/value pairs to table and column descriptors, and HBASE-34, 42, 43, and 700

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@677517 13f79535-47bb-0310-9956-ffa450edef68
2008-07-17 07:17:26 +00:00 · 2008-07-17 07:17:26 +00:00 · 6279d759c0
commit 6279d759c0
parent d136ccecaf
39 changed files with 10767 additions and 293 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -186,6 +186,13 @@ Trunk (unreleased changes)
   HBASE-744   BloomFilter serialization/deserialization broken
   HBASE-742   Column length limit is not enforced (Jean-Daniel Cryans via Stack)
   HBASE-737   Scanner: every cell in a row has the same timestamp
+   HBASE-700   hbase.io.index.interval need be configuratable in column family
+               (Andrew Purtell via Stack)
+   HBASE-62    Allow user add arbitrary key/value pairs to table and column
+               descriptors (Andrew Purtell via Stack)
+   HBASE-34    Set memcache flush size per column (Andrew Purtell via Stack)
+   HBASE-42    Set region split size on table creation (Andrew Purtell via Stack)
+   HBASE-43    Add a read-only attribute to columns (Andrew Purtell via Stack)

  IMPROVEMENTS
   HBASE-559   MR example job to count table rows
--- a/src/java/org/apache/hadoop/hbase/BloomFilterDescriptor.java
+++ b/src/java/org/apache/hadoop/hbase/BloomFilterDescriptor.java
@ -73,7 +73,19 @@ public class BloomFilterDescriptor implements WritableComparable {
  public BloomFilterDescriptor() {
    super();
  }
-  
+
+  /*
+   * Constructor.
+   * <p>
+   * Creates a deep copy of the supplied BloomFilterDescriptor.
+   */
+  public BloomFilterDescriptor(BloomFilterDescriptor desc) {
+    super();
+    this.filterType = desc.filterType;
+    this.nbHash = desc.nbHash;
+    this.vectorSize = desc.vectorSize;
+  }
+
  /**
   * Creates a BloomFilterDescriptor for the specified type of filter, fixes
   * the number of hash functions to 4 and computes a vector size using:
--- a/src/java/org/apache/hadoop/hbase/HColumnDescriptor.java
+++ b/src/java/org/apache/hadoop/hbase/HColumnDescriptor.java
@ -22,7 +22,10 @@ package org.apache.hadoop.hbase;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;

+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableComparable;
@ -39,10 +42,11 @@ import org.apache.hadoop.io.WritableComparable;
 public class HColumnDescriptor implements WritableComparable {
  // For future backward compatibility

-  // Version 3 was when column names becaome byte arrays and when we picked up
+  // Version 3 was when column names become byte arrays and when we picked up
  // Time-to-live feature.  Version 4 was when we moved to byte arrays, HBASE-82.
  // Version 5 was when bloom filter descriptors were removed.
-  private static final byte COLUMN_DESCRIPTOR_VERSION = (byte)5;
+  // Version 6 adds metadata as a map where keys and values are byte[].
+  private static final byte COLUMN_DESCRIPTOR_VERSION = (byte)6;

  /** 
   * The type of compression.
@ -57,14 +61,17 @@ public class HColumnDescriptor implements WritableComparable {
    BLOCK
  }

-  // Defines for jruby/shell
  public static final String COMPRESSION = "COMPRESSION";
  public static final String IN_MEMORY = "IN_MEMORY";
  public static final String BLOCKCACHE = "BLOCKCACHE";
  public static final String LENGTH = "LENGTH";
  public static final String TTL = "TTL";
+  public static final String VERSIONS = "VERSIONS";
  public static final String BLOOMFILTER = "BLOOMFILTER";
  public static final String FOREVER = "FOREVER";
+  public static final String MAPFILE_INDEX_INTERVAL =
+      "MAPFILE_INDEX_INTERVAL";
+  public static final String MEMCACHE_FLUSHSIZE = "MEMCACHE_FLUSHSIZE";

  /**
   * Default compression type.
@ -104,20 +111,16 @@ public class HColumnDescriptor implements WritableComparable {

  // Column family name
  private byte [] name;
-  // Number of versions to keep
-  private int maxVersions = DEFAULT_VERSIONS;
-  // Compression setting if any
-  private CompressionType compressionType = DEFAULT_COMPRESSION;
-  // Serve reads from in-memory cache
-  private boolean inMemory = DEFAULT_IN_MEMORY;
-  // Serve reads from in-memory block cache
-  private boolean blockCacheEnabled = DEFAULT_BLOCKCACHE;
-  // Maximum value size
-  private int maxValueLength = DEFAULT_LENGTH;
-  // Time to live of cell contents, in seconds from last timestamp
-  private int timeToLive = DEFAULT_TTL;
-  // True if bloom filter was specified
-  private boolean bloomFilter = false;
+
+  /**
+   * Default mapfile index interval.
+   */
+  public static final int DEFAULT_MAPFILE_INDEX_INTERVAL = 128;
+
+  // Column metadata
+  protected Map<ImmutableBytesWritable,ImmutableBytesWritable> values =
+    new HashMap<ImmutableBytesWritable,ImmutableBytesWritable>();
+

  /**
   * Default constructor. Must be present for Writable.
@ -159,6 +162,21 @@ public class HColumnDescriptor implements WritableComparable {
      Integer.MAX_VALUE, DEFAULT_TTL, false);
  }

+  /**
+   * Constructor.
+   * Makes a deep copy of the supplied descriptor. 
+   * Can make a modifiable descriptor from an UnmodifyableHColumnDescriptor.
+   * @param desc The descriptor.
+   */
+  public HColumnDescriptor(HColumnDescriptor desc) {
+    super();
+    this.name = desc.name.clone();
+    for (Map.Entry<ImmutableBytesWritable, ImmutableBytesWritable> e:
+        desc.values.entrySet()) {
+      this.values.put(e.getKey(), e.getValue());
+    }
+  }
+
  /**
   * Constructor
   * @param columnName Column family name.  Must have the ':' ending.
@ -188,13 +206,13 @@ public class HColumnDescriptor implements WritableComparable {
      // Until there is support, consider 0 or < 0 -- a configuration error.
      throw new IllegalArgumentException("Maximum versions must be positive");
    }
-    this.maxVersions = maxVersions;
-    this.inMemory = inMemory;
-    this.blockCacheEnabled = blockCacheEnabled;
-    this.maxValueLength = maxValueLength;
-    this.timeToLive = timeToLive;
-    this.bloomFilter = bloomFilter;
-    this.compressionType = compression;
+    setMaxVersions(maxVersions);
+    setInMemory(inMemory);
+    setBlockCacheEnabled(blockCacheEnabled);
+    setMaxValueLength(maxValueLength);
+    setTimeToLive(timeToLive);
+    setCompressionType(compression);
+    setBloomfilter(bloomFilter);
  }
  
  private static byte [] stripColon(final byte [] n) {
@ -203,7 +221,7 @@ public class HColumnDescriptor implements WritableComparable {
    System.arraycopy(n, 0, result, 0, n.length - 1);
    return result;
  }
-  
+
  /**
   * @param b Family name.
   * @return <code>b</code>
@ -237,6 +255,13 @@ public class HColumnDescriptor implements WritableComparable {
    return name;
  }

+  /**
+   * @return Name of this column family with colon as required by client API
+   */
+  public byte [] getNameWithColon() {
+    return HStoreKey.addDelimiter(this.name);
+  }
+
  /**
   * @return Name of this column family
   */
@ -244,71 +269,216 @@ public class HColumnDescriptor implements WritableComparable {
    return Bytes.toString(this.name);
  }

+  /**
+   * @param key The key.
+   * @return The value.
+   */
+  public byte[] getValue(byte[] key) {
+    ImmutableBytesWritable ibw = values.get(new ImmutableBytesWritable(key));
+    if (ibw == null)
+      return null;
+    return ibw.get();
+  }
+
+  /**
+   * @param key The key.
+   * @return The value as a string.
+   */
+  public String getValue(String key) {
+    byte[] value = getValue(Bytes.toBytes(key));
+    if (value == null)
+      return null;
+    return Bytes.toString(value);
+  }
+
+  /**
+   * @param key The key.
+   * @param value The value.
+   */
+  public void setValue(byte[] key, byte[] value) {
+    values.put(new ImmutableBytesWritable(key),
+      new ImmutableBytesWritable(value));
+  }
+
+  /**
+   * @param key The key.
+   * @param value The value.
+   */
+  public void setValue(String key, String value) {
+    setValue(Bytes.toBytes(key), Bytes.toBytes(value));
+  }
+
  /** @return compression type being used for the column family */
  public CompressionType getCompression() {
-    return this.compressionType;
+    String value = getValue(COMPRESSION);
+    if (value != null) {
+      if (value.equalsIgnoreCase("BLOCK"))
+        return CompressionType.BLOCK;
+      else if (value.equalsIgnoreCase("RECORD"))
+        return CompressionType.RECORD;
+    }
+    return CompressionType.NONE;
  }
  
  /** @return maximum number of versions */
  public int getMaxVersions() {
-    return this.maxVersions;
+    String value = getValue(VERSIONS);
+    if (value != null)
+      return Integer.valueOf(value);
+    return DEFAULT_VERSIONS;
+  }
+
+  /**
+   * @param maxVersions maximum number of versions
+   */
+  public void setMaxVersions(int maxVersions) {
+    setValue(VERSIONS, Integer.toString(maxVersions));
  }
  
  /**
   * @return Compression type setting.
   */
  public CompressionType getCompressionType() {
-    return this.compressionType;
+    return getCompression();
+  }
+
+  /**
+   * @param type Compression type setting.
+   */
+  public void setCompressionType(CompressionType type) {
+    String compressionType;
+    switch (type) {
+      case BLOCK:  compressionType = "BLOCK";   break;
+      case RECORD: compressionType = "RECORD";  break;
+      default:     compressionType = "NONE";    break;
+    }
+    setValue(COMPRESSION, compressionType);
  }

  /**
   * @return True if we are to keep all in use HRegionServer cache.
   */
  public boolean isInMemory() {
-    return this.inMemory;
+    String value = getValue(IN_MEMORY);
+    if (value != null)
+      return Boolean.valueOf(value);
+    return DEFAULT_IN_MEMORY;
  }
  
+  /**
+   * @param inMemory True if we are to keep all values in the HRegionServer
+   * cache
+   */
+  public void setInMemory(boolean inMemory) {
+    setValue(IN_MEMORY, Boolean.toString(inMemory));
+  }
+
  /**
   * @return Maximum value length.
   */
  public int getMaxValueLength() {
-    return this.maxValueLength;
+    String value = getValue(LENGTH);
+    if (value != null)
+      return Integer.valueOf(value);
+    return DEFAULT_LENGTH;
+  }
+
+  /**
+   * @param maxLength Maximum value length.
+   */
+  public void setMaxValueLength(int maxLength) {
+    setValue(LENGTH, Integer.toString(maxLength));
  }

  /**
   * @return Time to live.
   */
  public int getTimeToLive() {
-    return this.timeToLive;
+    String value = getValue(TTL);
+    if (value != null)
+      return Integer.valueOf(value);
+    return DEFAULT_TTL;
+  }
+
+  /**
+   * @param timeToLive
+   */
+  public void setTimeToLive(int timeToLive) {
+    setValue(TTL, Integer.toString(timeToLive));
  }

  /**
   * @return True if MapFile blocks should be cached.
   */
  public boolean isBlockCacheEnabled() {
-    return blockCacheEnabled;
+    String value = getValue(BLOCKCACHE);
+    if (value != null)
+      return Boolean.valueOf(value);
+    return DEFAULT_BLOCKCACHE;
+  }
+
+  /**
+   * @param blockCacheEnabled True if MapFile blocks should be cached.
+   */
+  public void setBlockCacheEnabled(boolean blockCacheEnabled) {
+    setValue(BLOCKCACHE, Boolean.toString(blockCacheEnabled));
  }

  /**
   * @return true if a bloom filter is enabled
   */
-  public boolean isBloomFilterEnabled() {
-    return this.bloomFilter;
+  public boolean isBloomfilter() {
+    String value = getValue(BLOOMFILTER);
+    if (value != null)
+      return Boolean.valueOf(value);
+    return DEFAULT_BLOOMFILTER;
+  }
+
+  /**
+   * @param onOff Enable/Disable bloom filter
+   */
+  public void setBloomfilter(final boolean onOff) {
+    setValue(BLOOMFILTER, Boolean.toString(onOff));
+  }
+
+  /**
+   * @return The number of entries that are added to the store MapFile before
+   * an index entry is added.
+   */
+  public int getMapFileIndexInterval() {
+    String value = getValue(MAPFILE_INDEX_INTERVAL);
+    if (value != null)
+      return Integer.valueOf(value);
+    return DEFAULT_MAPFILE_INDEX_INTERVAL;
+  }
+
+  /**
+   * @param interval The number of entries that are added to the store MapFile before
+   * an index entry is added.
+   */
+  public void setMapFileIndexInterval(int interval) {
+    setValue(MAPFILE_INDEX_INTERVAL, Integer.toString(interval));
  }

  /** {@inheritDoc} */
  @Override
  public String toString() {
-    return "{" + HConstants.NAME + " => '" + Bytes.toString(name) +
-      "', " + HConstants.VERSIONS + " => " + maxVersions +
-      ", " + COMPRESSION + " => '" + this.compressionType +
-      "', " + IN_MEMORY + " => " + inMemory +
-      ", " + BLOCKCACHE + " => " + blockCacheEnabled +
-      ", " + LENGTH + " => " + maxValueLength +
-      ", " + TTL + " => " +
-          (timeToLive == HConstants.FOREVER ? "FOREVER" : 
-              Integer.toString(timeToLive)) +
-      ", " + BLOOMFILTER + " => " + bloomFilter + "}";
+    StringBuffer s = new StringBuffer();
+    s.append('{');
+    s.append(HConstants.NAME);
+    s.append(" => '");
+    s.append(Bytes.toString(name));
+    s.append("'");
+    for (Map.Entry<ImmutableBytesWritable, ImmutableBytesWritable> e:
+        values.entrySet()) {
+      s.append(", ");
+      s.append(Bytes.toString(e.getKey().get()));
+      s.append(" => '");
+      s.append(Bytes.toString(e.getValue().get()));
+      s.append("'");
+    }
+    s.append('}');
+    return s.toString();
  }
  
  /** {@inheritDoc} */
@ -321,14 +491,8 @@ public class HColumnDescriptor implements WritableComparable {
  @Override
  public int hashCode() {
    int result = Bytes.hashCode(this.name);
-    result ^= Integer.valueOf(this.maxVersions).hashCode();
-    result ^= this.compressionType.hashCode();
-    result ^= Boolean.valueOf(this.inMemory).hashCode();
-    result ^= Boolean.valueOf(this.blockCacheEnabled).hashCode();
-    result ^= Integer.valueOf(this.maxValueLength).hashCode();
-    result ^= Integer.valueOf(this.timeToLive).hashCode();
-    result ^= Boolean.valueOf(this.bloomFilter).hashCode();
    result ^= Byte.valueOf(COLUMN_DESCRIPTOR_VERSION).hashCode();
+    result ^= values.hashCode();
    return result;
  }
  
@ -336,37 +500,51 @@ public class HColumnDescriptor implements WritableComparable {

  /** {@inheritDoc} */
  public void readFields(DataInput in) throws IOException {
-    int versionNumber = in.readByte();
-    if (versionNumber <= 2) {
-      Text t = new Text();
-      t.readFields(in);
-      this.name = t.getBytes();
-      if (HStoreKey.getFamilyDelimiterIndex(this.name) > 0) {
-        this.name = stripColon(this.name);
+    int version = in.readByte();
+    if (version < 6) {
+      if (version <= 2) {
+        Text t = new Text();
+        t.readFields(in);
+        this.name = t.getBytes();
+        if (HStoreKey.getFamilyDelimiterIndex(this.name) > 0) {
+          this.name = stripColon(this.name);
+        }
+      } else {
+        this.name = Bytes.readByteArray(in);
+      }
+      this.values.clear();
+      setMaxVersions(in.readInt());
+      int ordinal = in.readInt();
+      setCompressionType(CompressionType.values()[ordinal]);
+      setInMemory(in.readBoolean());
+      setMaxValueLength(in.readInt());
+      setBloomfilter(in.readBoolean());
+      if (isBloomfilter() && version < 5) {
+        // If a bloomFilter is enabled and the column descriptor is less than
+        // version 5, we need to skip over it to read the rest of the column
+        // descriptor. There are no BloomFilterDescriptors written to disk for
+        // column descriptors with a version number >= 5
+        BloomFilterDescriptor junk = new BloomFilterDescriptor();
+        junk.readFields(in);
+      }
+      if (version > 1) {
+        setBlockCacheEnabled(in.readBoolean());
+      }
+      if (version > 2) {
+       setTimeToLive(in.readInt());
      }
    } else {
+      // version 6+
      this.name = Bytes.readByteArray(in);
-    }
-    this.maxVersions = in.readInt();
-    int ordinal = in.readInt();
-    this.compressionType = CompressionType.values()[ordinal];
-    this.inMemory = in.readBoolean();
-    this.maxValueLength = in.readInt();
-    this.bloomFilter = in.readBoolean();
-    if (this.bloomFilter && versionNumber < 5) {
-      // If a bloomFilter is enabled and the column descriptor is less than
-      // version 5, we need to skip over it to read the rest of the column
-      // descriptor. There are no BloomFilterDescriptors written to disk for
-      // column descriptors with a version number >= 5
-      BloomFilterDescriptor junk = new BloomFilterDescriptor();
-      junk.readFields(in);
-    }
-    if (versionNumber > 1) {
-      this.blockCacheEnabled = in.readBoolean();
-    }
-
-    if (versionNumber > 2) {
-      this.timeToLive = in.readInt();
+      this.values.clear();
+      int numValues = in.readInt();
+      for (int i = 0; i < numValues; i++) {
+        ImmutableBytesWritable key = new ImmutableBytesWritable();
+        ImmutableBytesWritable value = new ImmutableBytesWritable();
+        key.readFields(in);
+        value.readFields(in);
+        values.put(key, value);
+      }
    }
  }

@ -374,13 +552,12 @@ public class HColumnDescriptor implements WritableComparable {
  public void write(DataOutput out) throws IOException {
    out.writeByte(COLUMN_DESCRIPTOR_VERSION);
    Bytes.writeByteArray(out, this.name);
-    out.writeInt(this.maxVersions);
-    out.writeInt(this.compressionType.ordinal());
-    out.writeBoolean(this.inMemory);
-    out.writeInt(this.maxValueLength);
-    out.writeBoolean(this.bloomFilter);
-    out.writeBoolean(this.blockCacheEnabled);
-    out.writeInt(this.timeToLive);
+    out.writeInt(values.size());
+    for (Map.Entry<ImmutableBytesWritable, ImmutableBytesWritable> e:
+        values.entrySet()) {
+      e.getKey().write(out);
+      e.getValue().write(out);
+    }
  }

  // Comparable
@ -389,57 +566,13 @@ public class HColumnDescriptor implements WritableComparable {
  public int compareTo(Object o) {
    HColumnDescriptor other = (HColumnDescriptor)o;
    int result = Bytes.compareTo(this.name, other.getName());
-    if(result == 0) {
-      result = Integer.valueOf(this.maxVersions).compareTo(
-          Integer.valueOf(other.maxVersions));
-    }
-    
-    if(result == 0) {
-      result = this.compressionType.compareTo(other.compressionType);
-    }
-    
-    if(result == 0) {
-      if(this.inMemory == other.inMemory) {
-        result = 0;
-        
-      } else if(this.inMemory) {
+    if (result == 0) {
+      // punt on comparison for ordering, just calculate difference
+      result = this.values.hashCode() - other.values.hashCode();
+      if (result < 0)
        result = -1;
-        
-      } else {
+      else if (result > 0)
        result = 1;
-      }
-    }
-    
-    if(result == 0) {
-      if(this.blockCacheEnabled == other.blockCacheEnabled) {
-        result = 0;
-        
-      } else if(this.blockCacheEnabled) {
-        result = -1;
-        
-      } else {
-        result = 1;
-      }
-    }
-    
-    if(result == 0) {
-      result = other.maxValueLength - this.maxValueLength;
-    }
-
-    if(result == 0) {
-      result = other.timeToLive - this.timeToLive;
-    }
-
-    if(result == 0) {
-      if(this.bloomFilter == other.bloomFilter) {
-        result = 0;
-        
-      } else if(this.bloomFilter) {
-        result = -1;
-        
-      } else {
-        result = 1;
-      }
    }
    return result;
  }
--- a/src/java/org/apache/hadoop/hbase/HRegionInfo.java
+++ b/src/java/org/apache/hadoop/hbase/HRegionInfo.java
@ -27,6 +27,7 @@ import java.util.List;

 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.JenkinsHash;
+import org.apache.hadoop.io.VersionedWritable;
 import org.apache.hadoop.io.WritableComparable;

 /**
@ -34,7 +35,9 @@ import org.apache.hadoop.io.WritableComparable;
 * Contains HRegion id, start and end keys, a reference to this
 * HRegions' table descriptor, etc.
 */
-public class HRegionInfo implements WritableComparable {
+public class HRegionInfo extends VersionedWritable implements WritableComparable {
+  private final byte VERSION = 0;
+
  /**
   * @param regionName
   * @return the encodedName
@ -137,6 +140,7 @@ public class HRegionInfo implements WritableComparable {
   * first meta regions
   */
  private HRegionInfo(long regionId, HTableDescriptor tableDesc) {
+    super();
    this.regionId = regionId;
    this.tableDesc = tableDesc;
    this.regionName = createRegionName(tableDesc.getName(), null, regionId);
@ -146,6 +150,7 @@ public class HRegionInfo implements WritableComparable {

  /** Default constructor - creates empty object */
  public HRegionInfo() {
+    super();
    this.tableDesc = new HTableDescriptor();
  }
  
@ -193,6 +198,7 @@ public class HRegionInfo implements WritableComparable {
  public HRegionInfo(HTableDescriptor tableDesc, final byte [] startKey,
    final byte [] endKey, final boolean split, final long regionid)
  throws IllegalArgumentException {
+    super();
    if (tableDesc == null) {
      throw new IllegalArgumentException("tableDesc cannot be null");
    }
@ -214,6 +220,7 @@ public class HRegionInfo implements WritableComparable {
   * @param other
   */
  public HRegionInfo(HRegionInfo other) {
+    super();
    this.endKey = other.getEndKey();
    this.offLine = other.isOffline();
    this.regionId = other.getRegionId();
@ -307,7 +314,14 @@ public class HRegionInfo implements WritableComparable {
  public HTableDescriptor getTableDesc(){
    return tableDesc;
  }
-  
+
+  /**
+   * @param newDesc new table descriptor to use
+   */
+  public void setTableDesc(HTableDescriptor newDesc) {
+    this.tableDesc = newDesc;
+  }
+
  /** @return true if this is the root region */
  public boolean isRootRegion() {
    return this.tableDesc.isRootRegion();
@ -381,6 +395,10 @@ public class HRegionInfo implements WritableComparable {
  public int hashCode() {
    return this.hashCode;
  }
+  
+  public byte getVersion() {
+    return VERSION;
+  }

  //
  // Writable
@ -390,6 +408,7 @@ public class HRegionInfo implements WritableComparable {
   * {@inheritDoc}
   */
  public void write(DataOutput out) throws IOException {
+    super.write(out);
    Bytes.writeByteArray(out, endKey);
    out.writeBoolean(offLine);
    out.writeLong(regionId);
@ -404,6 +423,7 @@ public class HRegionInfo implements WritableComparable {
   * {@inheritDoc}
   */
  public void readFields(DataInput in) throws IOException {
+    super.readFields(in);
    this.endKey = Bytes.readByteArray(in);
    this.offLine = in.readBoolean();
    this.regionId = in.readLong();
--- a/src/java/org/apache/hadoop/hbase/HTableDescriptor.java
+++ b/src/java/org/apache/hadoop/hbase/HTableDescriptor.java
@ -29,6 +29,7 @@ import java.util.Iterator;
 import java.util.Map;

 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.io.WritableComparable;

@ -53,13 +54,32 @@ public class HTableDescriptor implements WritableComparable {
          new HColumnDescriptor(HConstants.COLUMN_FAMILY_HISTORIAN,
              HConstants.ALL_VERSIONS, HColumnDescriptor.CompressionType.NONE,
              false, false, Integer.MAX_VALUE, HConstants.FOREVER, false) });
-  
-  private boolean rootregion = false;
-  private boolean metaregion = false;
+
+  // Changes prior to version 3 were not recorded here.
+  // Version 3 adds metadata as a map where keys and values are byte[].
+  public static final byte TABLE_DESCRIPTOR_VERSION = 3;
+
  private byte [] name = HConstants.EMPTY_BYTE_ARRAY;
  private String nameAsString = "";
+
+  // Table metadata
+  protected Map<ImmutableBytesWritable,ImmutableBytesWritable> values =
+    new HashMap<ImmutableBytesWritable,ImmutableBytesWritable>();
  
  public static final String FAMILIES = "FAMILIES";
+
+  public static final String MAX_FILESIZE = "MAX_FILESIZE";
+  public static final String IN_MEMORY = "IN_MEMORY";
+  public static final String READONLY = "READONLY";
+  public static final String MEMCACHE_FLUSHSIZE = "MEMCACHE_FLUSHSIZE";
+  public static final String IS_ROOT = "IS_ROOT";
+  public static final String IS_META = "IS_META";
+
+  public static final boolean DEFAULT_IN_MEMORY = false;
+
+  public static final boolean DEFAULT_READONLY = false;
+
+  public static final int DEFAULT_MEMCACHE_FLUSH_SIZE = 1024*1024*64;
  
  // Key is hash of the family name.
  private final Map<Integer, HColumnDescriptor> families =
@ -107,9 +127,32 @@ public class HTableDescriptor implements WritableComparable {
   * @see <a href="HADOOP-1581">HADOOP-1581 HBASE: Un-openable tablename bug</a>
   */
  public HTableDescriptor(final byte [] name) {
-    setMetaFlags(name);
-    this.name = this.metaregion? name: isLegalTableName(name);
+    super();
+    this.name = this.isMetaRegion() ? name: isLegalTableName(name);
    this.nameAsString = Bytes.toString(this.name);
+    setMetaFlags(this.name);
+  }
+
+  /**
+   * Constructor.
+   * <p>
+   * Makes a deep copy of the supplied descriptor. 
+   * Can make a modifiable descriptor from an UnmodifyableHTableDescriptor.
+   * @param desc The descriptor.
+   */
+  public HTableDescriptor(final HTableDescriptor desc)
+  {
+    super();
+    this.name = desc.name.clone();
+    this.nameAsString = Bytes.toString(this.name);
+    setMetaFlags(this.name);
+    for (HColumnDescriptor c: desc.families.values()) {
+      this.families.put(Bytes.mapKey(c.getName()), new HColumnDescriptor(c));
+    }
+    for (Map.Entry<ImmutableBytesWritable, ImmutableBytesWritable> e:
+        desc.values.entrySet()) {
+      this.values.put(e.getKey(), e.getValue());
+    }
  }

  /*
@ -118,11 +161,46 @@ public class HTableDescriptor implements WritableComparable {
   * @param name
   */
  private void setMetaFlags(final byte [] name) {
-    this.rootregion = Bytes.equals(name, HConstants.ROOT_TABLE_NAME);
-    this.metaregion =
-      this.rootregion? true: Bytes.equals(name, HConstants.META_TABLE_NAME);
+    setRootRegion(Bytes.equals(name, HConstants.ROOT_TABLE_NAME));
+    setMetaRegion(isRootRegion() ||
+      Bytes.equals(name, HConstants.META_TABLE_NAME));
  }
-  
+
+  /** @return true if this is the root region */
+  public boolean isRootRegion() {
+    String value = getValue(IS_ROOT);
+    if (value != null)
+      return Boolean.valueOf(value);
+    return false;
+  }
+
+  /** @param isRoot true if this is the root region */
+  protected void setRootRegion(boolean isRoot) {
+    values.put(new ImmutableBytesWritable(Bytes.toBytes(IS_ROOT)),
+      new ImmutableBytesWritable(Bytes.toBytes(Boolean.toString(isRoot))));
+  }
+
+  /** @return true if this is a meta region (part of the root or meta tables) */
+  public boolean isMetaRegion() {
+    String value = getValue(IS_META);
+    if (value != null)
+      return Boolean.valueOf(value);
+    return false;
+  }
+
+  /**
+   * @param isMeta true if this is a meta region (part of the root or meta
+   * tables) */
+  protected void setMetaRegion(boolean isMeta) {
+    values.put(new ImmutableBytesWritable(Bytes.toBytes(IS_META)),
+      new ImmutableBytesWritable(Bytes.toBytes(Boolean.toString(isMeta))));
+  }
+
+  /** @return true if table is the meta table */
+  public boolean isMetaTable() {
+    return isMetaRegion() && !isRootRegion();
+  }
+
  /**
   * Check passed buffer is legal user-space table name.
   * @param b Table name.
@ -147,19 +225,80 @@ public class HTableDescriptor implements WritableComparable {
    return b;
  }

-  /** @return true if this is the root region */
-  public boolean isRootRegion() {
-    return rootregion;
+  /**
+   * @param key The key.
+   * @return The value.
+   */
+  public byte[] getValue(byte[] key) {
+    ImmutableBytesWritable ibw = values.get(new ImmutableBytesWritable(key));
+    if (ibw == null)
+      return null;
+    return ibw.get();
  }
-  
-  /** @return true if table is the meta table */
-  public boolean isMetaTable() {
-    return metaregion && !rootregion;
+
+  /**
+   * @param key The key.
+   * @return The value as a string.
+   */
+  public String getValue(String key) {
+    byte[] value = getValue(Bytes.toBytes(key));
+    if (value == null)
+      return null;
+    return Bytes.toString(value);
  }
-  
-  /** @return true if this is a meta region (part of the root or meta tables) */
-  public boolean isMetaRegion() {
-    return metaregion;
+
+  /**
+   * @param key The key.
+   * @param value The value.
+   */
+  public void setValue(byte[] key, byte[] value) {
+    values.put(new ImmutableBytesWritable(key),
+      new ImmutableBytesWritable(value));
+  }
+
+  /**
+   * @param key The key.
+   * @param value The value.
+   */
+  public void setValue(String key, String value) {
+    setValue(Bytes.toBytes(key), Bytes.toBytes(value));
+  }
+
+  /**
+   * @return true if all columns in the table should be kept in the 
+   * HRegionServer cache only
+   */
+  public boolean isInMemory() {
+    String value = getValue(IN_MEMORY);
+    if (value != null)
+      return Boolean.valueOf(value);
+    return DEFAULT_IN_MEMORY;
+  }
+
+  /**
+   * @param inMemory True if all of the columns in the table should be kept in
+   * the HRegionServer cache only.
+   */
+  public void setInMemory(boolean inMemory) {
+    setValue(IN_MEMORY, Boolean.toString(inMemory));
+  }
+
+  /**
+   * @return true if all columns in the table should be read only
+   */
+  public boolean isReadOnly() {
+    String value = getValue(READONLY);
+    if (value != null)
+      return Boolean.valueOf(value);
+    return DEFAULT_READONLY;
+  }
+
+  /**
+   * @param readOnly True if all of the columns in the table should be read
+   * only.
+   */
+  public void setReadOnly(boolean readOnly) {
+    setValue(READONLY, Boolean.toString(readOnly));
  }

  /** @return name of table */
@ -172,6 +311,39 @@ public class HTableDescriptor implements WritableComparable {
    return this.nameAsString;
  }

+  /** @return max hregion size for table */
+  public long getMaxFileSize() {
+    String value = getValue(MAX_FILESIZE);
+    if (value != null)
+      return Long.valueOf(value);
+    return HConstants.DEFAULT_MAX_FILE_SIZE;
+  }
+
+  /**
+   * @param maxFileSize The maximum file size that a store file can grow to
+   * before a split is triggered.
+   */
+  public void setMaxFileSize(long maxFileSize) {
+    setValue(MAX_FILESIZE, Long.toString(maxFileSize));
+  }
+
+  /**
+   * @return memory cache flush size for each hregion
+   */
+  public int getMemcacheFlushSize() {
+    String value = getValue(MEMCACHE_FLUSHSIZE);
+    if (value != null)
+      return Integer.valueOf(value);
+    return DEFAULT_MEMCACHE_FLUSH_SIZE;
+  }
+
+  /**
+   * @param memcacheFlushSize memory cache flush size for each hregion
+   */
+  public void setMemcacheFlushSize(int memcacheFlushSize) {
+    setValue(MEMCACHE_FLUSHSIZE, Integer.toString(memcacheFlushSize));
+  }
+
  /**
   * Adds a column family.
   * @param family HColumnDescriptor of familyto add.
@ -211,10 +383,28 @@ public class HTableDescriptor implements WritableComparable {
   */
  @Override
  public String toString() {
-    return HConstants.NAME + " => '" + Bytes.toString(this.name) +
-      "', " + FAMILIES + " => " + this.families.values();
+    StringBuffer s = new StringBuffer();
+    s.append('{');
+    s.append(HConstants.NAME);
+    s.append(" => '");
+    s.append(Bytes.toString(name));
+    s.append("'");
+    for (Map.Entry<ImmutableBytesWritable, ImmutableBytesWritable> e:
+        values.entrySet()) {
+      s.append(", ");
+      s.append(Bytes.toString(e.getKey().get()));
+      s.append(" => '");
+      s.append(Bytes.toString(e.getValue().get()));
+      s.append("'");
+    }
+    s.append(", ");
+    s.append(FAMILIES);
+    s.append(" => ");
+    s.append(families.values());
+    s.append('}');
+    return s.toString();
  }
-  
+
  /** {@inheritDoc} */
  @Override
  public boolean equals(Object obj) {
@ -224,42 +414,64 @@ public class HTableDescriptor implements WritableComparable {
  /** {@inheritDoc} */
  @Override
  public int hashCode() {
-    // TODO: Cache.
    int result = Bytes.hashCode(this.name);
+    result ^= Byte.valueOf(TABLE_DESCRIPTOR_VERSION).hashCode();
    if (this.families != null && this.families.size() > 0) {
      for (HColumnDescriptor e: this.families.values()) {
        result ^= e.hashCode();
      }
    }
+    result ^= values.hashCode();
    return result;
  }
-  
+
  // Writable

  /** {@inheritDoc} */
-  public void write(DataOutput out) throws IOException {
-    out.writeBoolean(rootregion);
-    out.writeBoolean(metaregion);
-    Bytes.writeByteArray(out, name);
-    out.writeInt(families.size());
-    for(Iterator<HColumnDescriptor> it = families.values().iterator();
-        it.hasNext(); ) {
-      it.next().write(out);
+  public void readFields(DataInput in) throws IOException {
+    int version = in.readInt();
+    if (version < 3)
+      throw new IOException("versions < 3 are not supported (and never existed!?)");
+    // version 3+
+    name = Bytes.readByteArray(in);
+    nameAsString = Bytes.toString(this.name);
+    setRootRegion(in.readBoolean());
+    setMetaRegion(in.readBoolean());
+    values.clear();
+    int numVals = in.readInt();
+    for (int i = 0; i < numVals; i++) {
+      ImmutableBytesWritable key = new ImmutableBytesWritable();
+      ImmutableBytesWritable value = new ImmutableBytesWritable();
+      key.readFields(in);
+      value.readFields(in);
+      values.put(key, value);
+    }
+    families.clear();
+    int numFamilies = in.readInt();
+    for (int i = 0; i < numFamilies; i++) {
+      HColumnDescriptor c = new HColumnDescriptor();
+      c.readFields(in);
+      families.put(Bytes.mapKey(c.getName()), c);
    }
  }

  /** {@inheritDoc} */
-  public void readFields(DataInput in) throws IOException {
-    this.rootregion = in.readBoolean();
-    this.metaregion = in.readBoolean();
-    this.name = Bytes.readByteArray(in);
-    this.nameAsString = Bytes.toString(this.name);
-    int numCols = in.readInt();
-    this.families.clear();
-    for (int i = 0; i < numCols; i++) {
-      HColumnDescriptor c = new HColumnDescriptor();
-      c.readFields(in);
-      this.families.put(Bytes.mapKey(c.getName()), c);
+  public void write(DataOutput out) throws IOException {
+	out.writeInt(TABLE_DESCRIPTOR_VERSION);
+    Bytes.writeByteArray(out, name);
+    out.writeBoolean(isRootRegion());
+    out.writeBoolean(isMetaRegion());
+    out.writeInt(values.size());
+    for (Map.Entry<ImmutableBytesWritable, ImmutableBytesWritable> e:
+        values.entrySet()) {
+      e.getKey().write(out);
+      e.getValue().write(out);
+    }
+    out.writeInt(families.size());
+    for(Iterator<HColumnDescriptor> it = families.values().iterator();
+        it.hasNext(); ) {
+      HColumnDescriptor family = it.next();
+      family.write(out);
    }
  }

@ -272,12 +484,10 @@ public class HTableDescriptor implements WritableComparable {
    if (result == 0) {
      result = families.size() - other.families.size();
    }
-    
    if (result == 0 && families.size() != other.families.size()) {
      result = Integer.valueOf(families.size()).compareTo(
          Integer.valueOf(other.families.size()));
    }
-    
    if (result == 0) {
      for (Iterator<HColumnDescriptor> it = families.values().iterator(),
          it2 = other.families.values().iterator(); it.hasNext(); ) {
@ -287,6 +497,14 @@ public class HTableDescriptor implements WritableComparable {
        }
      }
    }
+    if (result == 0) {
+      // punt on comparison for ordering, just calculate difference
+      result = this.values.hashCode() - other.values.hashCode();
+      if (result < 0)
+        result = -1;
+      else if (result > 0)
+        result = 1;
+    }
    return result;
  }

@ -323,4 +541,4 @@ public class HTableDescriptor implements WritableComparable {
  public static Path getTableDir(Path rootdir, final byte [] tableName) {
    return new Path(rootdir, Bytes.toString(tableName));
  }
-}
+}
--- a/src/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
+++ b/src/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
@ -563,7 +563,26 @@ public class HBaseAdmin {
    }
  }

-  
+  /**
+   * Modify a table's HTableDescriptor
+   * 
+   * @param tableName name of table
+   * @param desc the updated descriptor
+   * @throws IOException
+   */
+  public void modifyTableMeta(final byte [] tableName, HTableDescriptor desc)
+  throws IOException {
+    if (this.master == null) {
+      throw new MasterNotRunningException("master has been shut down");
+    }
+    HTableDescriptor.isLegalTableName(tableName);
+    try {
+      this.master.modifyTableMeta(tableName, desc);
+    } catch (RemoteException e) {
+      throw RemoteExceptionHandler.decodeRemoteException(e);
+    }
+  }
+
  /** 
   * Shuts down the HBase instance 
   * @throws IOException
--- a/src/java/org/apache/hadoop/hbase/client/HTable.java
+++ b/src/java/org/apache/hadoop/hbase/client/HTable.java
@ -243,7 +243,8 @@ public class HTable {
   * @throws IOException
   */
  public HTableDescriptor getTableDescriptor() throws IOException {
-    return this.connection.getHTableDescriptor(this.tableName);
+    return new UnmodifyableHTableDescriptor(
+      this.connection.getHTableDescriptor(this.tableName));
  }

  /**
--- a/src/java/org/apache/hadoop/hbase/client/MetaScanner.java
+++ b/src/java/org/apache/hadoop/hbase/client/MetaScanner.java
@ -82,8 +82,7 @@ class MetaScanner implements HConstants {
  /**
   * Visitor class called to process each row of the .META. table
   */
-  protected interface MetaScannerVisitor {
-
+  interface MetaScannerVisitor {
    /**
     * Visitor method that accepts a RowResult and the meta region location.
     * Implementations can return false to stop the region's loop if it becomes
--- a/src/java/org/apache/hadoop/hbase/client/UnmodifyableHColumnDescriptor.java
+++ b/src/java/org/apache/hadoop/hbase/client/UnmodifyableHColumnDescriptor.java
@ -0,0 +1,55 @@
+package org.apache.hadoop.hbase.client;
+
+import org.apache.hadoop.hbase.HColumnDescriptor;
+
+public class UnmodifyableHColumnDescriptor extends HColumnDescriptor {
+
+  public UnmodifyableHColumnDescriptor (final HColumnDescriptor desc) {
+    super(desc);
+  }
+
+  @Override
+  public void setValue(byte[] key, byte[] value) {
+    throw new UnsupportedOperationException("HColumnDescriptor is read-only");
+  }
+
+  @Override
+  public void setValue(String key, String value) {
+    throw new UnsupportedOperationException("HColumnDescriptor is read-only");
+  }
+
+  @Override
+  public void setMaxVersions(int maxVersions) {
+    throw new UnsupportedOperationException("HColumnDescriptor is read-only");
+  }
+
+  @Override
+  public void setInMemory(boolean inMemory) {
+    throw new UnsupportedOperationException("HColumnDescriptor is read-only");
+  }
+
+  @Override
+  public void setBlockCacheEnabled(boolean blockCacheEnabled) {
+    throw new UnsupportedOperationException("HColumnDescriptor is read-only");
+  }
+
+  @Override
+  public void setMaxValueLength(int maxLength) {
+    throw new UnsupportedOperationException("HColumnDescriptor is read-only");
+  }
+
+  @Override
+  public void setTimeToLive(int timeToLive) {
+    throw new UnsupportedOperationException("HColumnDescriptor is read-only");
+  }
+
+  @Override
+  public void setCompressionType(CompressionType type) {
+    throw new UnsupportedOperationException("HColumnDescriptor is read-only");
+  }
+
+  @Override
+  public void setMapFileIndexInterval(int interval) {
+    throw new UnsupportedOperationException("HTableDescriptor is read-only");
+  }
+}
--- a/src/java/org/apache/hadoop/hbase/client/UnmodifyableHTableDescriptor.java
+++ b/src/java/org/apache/hadoop/hbase/client/UnmodifyableHTableDescriptor.java
@ -27,7 +27,6 @@ import org.apache.hadoop.hbase.HTableDescriptor;
 * Read-only table descriptor.
 */
 public class UnmodifyableHTableDescriptor extends HTableDescriptor {
-
  public UnmodifyableHTableDescriptor() {
 	  super();
  }
@ -39,7 +38,7 @@ public class UnmodifyableHTableDescriptor extends HTableDescriptor {
  UnmodifyableHTableDescriptor(final HTableDescriptor desc) {
    super(desc.getName());
    for (HColumnDescriptor c: desc.getFamilies()) {
-      super.addFamily(c);
+      super.addFamily(new UnmodifyableHColumnDescriptor(c));
    }
  }

@ -61,4 +60,34 @@ public class UnmodifyableHTableDescriptor extends HTableDescriptor {
  public HColumnDescriptor removeFamily(final byte [] column) {
    throw new UnsupportedOperationException("HTableDescriptor is read-only");
  }
+
+  @Override
+  public void setInMemory(boolean inMemory) {
+    throw new UnsupportedOperationException("HTableDescriptor is read-only");
+  }
+
+  @Override
+  public void setReadOnly(boolean readOnly) {
+    throw new UnsupportedOperationException("HTableDescriptor is read-only");
+  }
+
+  @Override
+  public void setValue(byte[] key, byte[] value) {
+    throw new UnsupportedOperationException("HTableDescriptor is read-only");
+  }
+
+  @Override
+  public void setValue(String key, String value) {
+    throw new UnsupportedOperationException("HTableDescriptor is read-only");
+  }
+
+  @Override
+  public void setMaxFileSize(long maxFileSize) {
+    throw new UnsupportedOperationException("HTableDescriptor is read-only");
+  }
+
+  @Override
+  public void setMemcacheFlushSize(int memcacheFlushSize) {
+    throw new UnsupportedOperationException("HTableDescriptor is read-only");
+  }
 }
--- a/src/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java
+++ b/src/java/org/apache/hadoop/hbase/ipc/HMasterInterface.java
@ -104,7 +104,16 @@ public interface HMasterInterface extends VersionedProtocol {
   * @throws IOException
   */
  public void disableTable(final byte [] tableName) throws IOException;
-  
+
+  /**
+   * Modify a table's metadata
+   * 
+   * @param tableName
+   * @param desc
+   */
+  public void modifyTableMeta(byte[] tableName, HTableDescriptor desc)
+    throws IOException;
+
  /**
   * Shutdown an HBase cluster.
   * @throws IOException
--- a/src/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/src/java/org/apache/hadoop/hbase/master/HMaster.java
@ -688,6 +688,13 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
    new ChangeTableState(this, tableName, false).process();
  }

+  /** {@inheritDoc} */
+  public void modifyTableMeta(final byte[] tableName, HTableDescriptor desc)
+    throws IOException
+  {
+    new ModifyTableMeta(this, tableName, desc).process();
+  }
+
  /** {@inheritDoc} */
  public HServerAddress findRootRegion() {
    return regionManager.getRootRegionLocation();
--- a/src/java/org/apache/hadoop/hbase/master/ModifyTableMeta.java
+++ b/src/java/org/apache/hadoop/hbase/master/ModifyTableMeta.java
@ -0,0 +1,79 @@
+/**
+ * Copyright 2008 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.TableNotDisabledException;
+import org.apache.hadoop.hbase.ipc.HRegionInterface;
+import org.apache.hadoop.hbase.io.BatchUpdate;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Writables;
+
+/** Instantiated to modify table descriptor metadata */
+class ModifyTableMeta extends TableOperation {
+
+  private static Log LOG = LogFactory.getLog(ModifyTableMeta.class);
+
+  private HTableDescriptor desc;
+
+  ModifyTableMeta(final HMaster master, final byte [] tableName, 
+    HTableDescriptor desc) 
+  throws IOException {
+    super(master, tableName);
+    this.desc = desc;
+    LOG.debug("modifying " + Bytes.toString(tableName) + ": " +
+        desc.toString());
+  }
+
+  protected void updateRegionInfo(HRegionInterface server, byte [] regionName,
+    HRegionInfo i)
+  throws IOException {
+    BatchUpdate b = new BatchUpdate(i.getRegionName());
+    b.put(COL_REGIONINFO, Writables.getBytes(i));
+    server.batchUpdate(regionName, b);
+    LOG.debug("updated HTableDescriptor for region " + i.getRegionNameAsString());
+  }
+
+  @Override
+  protected void processScanItem(
+    @SuppressWarnings("unused") String serverName,
+    @SuppressWarnings("unused") long startCode, final HRegionInfo info) 
+      throws IOException {
+    if (isEnabled(info)) {
+      throw new TableNotDisabledException(tableName.toString());
+    }
+  }
+
+  @Override
+  protected void postProcessMeta(MetaRegion m, HRegionInterface server)
+  throws IOException {
+    for (HRegionInfo i: unservedRegions) {
+      i.setTableDesc(desc);
+      updateRegionInfo(server, m.getRegionName(), i);
+    }
+    // kick off a meta scan right away
+    master.regionManager.metaScannerThread.interrupt();
+  }
+}
--- a/src/java/org/apache/hadoop/hbase/regionserver/HAbstractScanner.java
+++ b/src/java/org/apache/hadoop/hbase/regionserver/HAbstractScanner.java
@ -125,7 +125,7 @@ public abstract class HAbstractScanner implements InternalScanner {
  private boolean multipleMatchers;

  /** Constructor for abstract base class */
-  HAbstractScanner(long timestamp, byte [][] targetCols) throws IOException {
+  protected HAbstractScanner(long timestamp, byte [][] targetCols) throws IOException {
    this.timestamp = timestamp;
    this.wildcardMatch = false;
    this.multipleMatchers = false;
--- a/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++ b/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@ -156,6 +156,7 @@ public class HRegion implements HConstants {
        b.getRegionInfo().getTableDesc().getNameAsString())) {
      throw new IOException("Regions do not belong to the same table");
    }
+
    FileSystem fs = a.getFilesystem();

    // Make sure each region's cache is empty
@ -483,13 +484,19 @@ public class HRegion implements HConstants {
      fs.delete(merges, true);
    }

-    // By default, we flush the cache when 64M.
-    this.memcacheFlushSize = conf.getInt("hbase.hregion.memcache.flush.size",
-      1024*1024*64);
+    int flushSize = regionInfo.getTableDesc().getMemcacheFlushSize();
+    if (flushSize == HTableDescriptor.DEFAULT_MEMCACHE_FLUSH_SIZE) {
+      flushSize = conf.getInt("hbase.hregion.memcache.flush.size",
+                      HTableDescriptor.DEFAULT_MEMCACHE_FLUSH_SIZE);
+    }
+    this.memcacheFlushSize = flushSize;

    this.blockingMemcacheSize = this.memcacheFlushSize *
      conf.getInt("hbase.hregion.memcache.block.multiplier", 1);

+    if (this.regionInfo.getTableDesc().isReadOnly())
+      this.writestate.writesEnabled = false;
+
    // HRegion is ready to go!
    this.writestate.compacting = false;
    this.lastFlushTime = System.currentTimeMillis();
@ -1311,6 +1318,10 @@ public class HRegion implements HConstants {
  public void batchUpdate(BatchUpdate b)
  throws IOException {

+    if (!this.writestate.writesEnabled) {
+      throw new IOException("region is read only");
+    }
+
    // Do a rough check that we have resources to accept a write.  The check is
    // 'rough' in that between the resource check and the call to obtain a 
    // read lock, resources may run out.  For now, the thought is that this
@ -1418,6 +1429,9 @@ public class HRegion implements HConstants {
  public void deleteAll(final byte [] row, final byte [] column, final long ts)
  throws IOException {
    checkColumn(column);
+    if (!this.writestate.writesEnabled) {
+      throw new IOException("region is read only");
+    }
    Integer lid = obtainRowLock(row);
    try {
      deleteMultiple(row, column, ts, ALL_VERSIONS);
@ -1434,6 +1448,9 @@ public class HRegion implements HConstants {
   */
  public void deleteAll(final byte [] row, final long ts)
  throws IOException {
+    if (!this.writestate.writesEnabled) {
+      throw new IOException("region is read only");
+    }
    Integer lid = obtainRowLock(row);    
    try {
      for (HStore store : stores.values()){
@ -1461,6 +1478,9 @@ public class HRegion implements HConstants {
   */
  public void deleteFamily(byte [] row, byte [] family, long timestamp)
  throws IOException{
+    if (!this.writestate.writesEnabled) {
+      throw new IOException("region is read only");
+    }
    Integer lid = obtainRowLock(row);    
    try {
      // find the HStore for the column family
@ -1493,6 +1513,9 @@ public class HRegion implements HConstants {
  private void deleteMultiple(final byte [] row, final byte [] column,
      final long ts, final int versions)
  throws IOException {
+    if (!this.writestate.writesEnabled) {
+      throw new IOException("region is read only");
+    }
    HStoreKey origin = new HStoreKey(row, column, ts);
    Set<HStoreKey> keys = getKeys(origin, versions);
    if (keys.size() > 0) {
@ -1520,6 +1543,9 @@ public class HRegion implements HConstants {
      final byte [] val)
  throws IOException {
    checkColumn(key.getColumn());
+    if (!this.writestate.writesEnabled) {
+      throw new IOException("region is read only");
+    }
    TreeMap<HStoreKey, byte []> targets = this.targetColumns.get(lockid);
    if (targets == null) {
      targets = new TreeMap<HStoreKey, byte []>();
@ -1541,6 +1567,9 @@ public class HRegion implements HConstants {
    if (updatesByColumn == null || updatesByColumn.size() <= 0) {
      return;
    }
+    if (!this.writestate.writesEnabled) {
+      throw new IOException("region is read only");
+    }
    boolean flush = false;
    this.updatesLock.readLock().lock();
    try {
--- a/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
+++ b/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
@ -172,15 +172,20 @@ public class HStore implements HConstants {
    this.storeName = Bytes.toBytes(this.info.getEncodedName() + "/" +
      Bytes.toString(this.family.getName()));
    this.storeNameStr = Bytes.toString(this.storeName);
-    
+
    // By default, we compact if an HStore has more than
    // MIN_COMMITS_FOR_COMPACTION map files
    this.compactionThreshold =
      conf.getInt("hbase.hstore.compactionThreshold", 3);
    
    // By default we split region if a file > DEFAULT_MAX_FILE_SIZE.
-    this.desiredMaxFileSize =
-      conf.getLong("hbase.hregion.max.filesize", DEFAULT_MAX_FILE_SIZE);
+    long maxFileSize = info.getTableDesc().getMaxFileSize();
+    if (maxFileSize == HConstants.DEFAULT_MAX_FILE_SIZE) {
+      maxFileSize = conf.getLong("hbase.hregion.max.filesize",
+        HConstants.DEFAULT_MAX_FILE_SIZE);
+    }
+    this.desiredMaxFileSize = maxFileSize;
+
    this.storeSize = 0L;

    if (family.getCompression() == HColumnDescriptor.CompressionType.BLOCK) {
@ -242,11 +247,11 @@ public class HStore implements HConstants {
      if (first) {
        // Use a block cache (if configured) for the first reader only
        // so as to control memory usage.
-        r = e.getValue().getReader(this.fs, this.family.isBloomFilterEnabled(),
+        r = e.getValue().getReader(this.fs, this.family.isBloomfilter(),
          family.isBlockCacheEnabled());
        first = false;
      } else {
-        r = e.getValue().getReader(this.fs, this.family.isBloomFilterEnabled(),
+        r = e.getValue().getReader(this.fs, this.family.isBloomfilter(),
            false);
      }
      this.readers.put(e.getKey(), r);
@ -582,7 +587,8 @@ public class HStore implements HConstants {
      HStoreFile flushedFile = new HStoreFile(conf, fs, basedir,
        info.getEncodedName(),  family.getName(), -1L, null);
      MapFile.Writer out = flushedFile.getWriter(this.fs, this.compression,
-        this.family.isBloomFilterEnabled(), cache.size());
+        this.family.isBloomfilter(), cache.size());
+       out.setIndexInterval(family.getMapFileIndexInterval());
      
      // Here we tried picking up an existing HStoreFile from disk and
      // interlacing the memcache flush compacting as we go.  The notion was
@ -651,7 +657,7 @@ public class HStore implements HConstants {
      Long flushid = Long.valueOf(logCacheFlushId);
      // Open the map file reader.
      this.readers.put(flushid,
-        flushedFile.getReader(this.fs, this.family.isBloomFilterEnabled(),
+        flushedFile.getReader(this.fs, this.family.isBloomfilter(),
        this.family.isBlockCacheEnabled()));
      this.storefiles.put(flushid, flushedFile);
      // Tell listeners of the change in readers.
@ -737,9 +743,9 @@ public class HStore implements HConstants {
        return checkSplit();
      }
      /*
-       * We create a new list of MapFile.Reader objects so we don't screw up the
-       * caching associated with the currently-loaded ones. Our iteration-based
-       * access pattern is practically designed to ruin the cache.
+       * We create a new list of MapFile.Reader objects so we don't screw up
+       * the caching associated with the currently-loaded ones. Our iteration-
+       * based access pattern is practically designed to ruin the cache.
       */
      List<MapFile.Reader> readers = new ArrayList<MapFile.Reader>();
      for (HStoreFile file: filesToCompact) {
@ -749,7 +755,7 @@ public class HStore implements HConstants {
          readers.add(reader);
          
          // Compute the size of the new bloomfilter if needed
-          if (this.family.isBloomFilterEnabled()) {
+          if (this.family.isBloomfilter()) {
            nrows += reader.getBloomFilterSize();
          }
        } catch (IOException e) {
@ -775,7 +781,8 @@ public class HStore implements HConstants {
          FSUtils.getPath(compactedOutputFile.getMapFilePath()));
      }
      MapFile.Writer writer = compactedOutputFile.getWriter(this.fs,
-        this.compression, this.family.isBloomFilterEnabled(), nrows);
+        this.compression, this.family.isBloomfilter(), nrows);
+      writer.setIndexInterval(family.getMapFileIndexInterval());
      try {
        compactHStoreFiles(writer, readers);
      } finally {
@ -1029,7 +1036,7 @@ public class HStore implements HConstants {
              // Use a block cache (if configured) for this reader since
              // it is the only one.
              finalCompactedFile.getReader(this.fs,
-                  this.family.isBloomFilterEnabled(),
+                  this.family.isBloomfilter(),
                  this.family.isBlockCacheEnabled()));
          this.storefiles.put(orderVal, finalCompactedFile);
          // Tell observers that list of Readers has changed.
@ -1814,4 +1821,4 @@ public class HStore implements HConstants {
      return key;
    }
  }
-}
+}
--- a/src/java/org/apache/hadoop/hbase/rest/TableHandler.java
+++ b/src/java/org/apache/hadoop/hbase/rest/TableHandler.java
@ -413,7 +413,7 @@ public class TableHandler extends GenericHandler {
          doElement(outputter, "name", Bytes.toString(e.getName()));
          doElement(outputter, "compression", e.getCompression().toString());
          doElement(outputter, "bloomfilter",
-              Boolean.toString(e.isBloomFilterEnabled()));
+              Boolean.toString(e.isBloomfilter()));
          doElement(outputter, "max-versions",
            Integer.toString(e.getMaxVersions()));
          doElement(outputter, "maximum-cell-size",
--- a/src/java/org/apache/hadoop/hbase/thrift/ThriftUtilities.java
+++ b/src/java/org/apache/hadoop/hbase/thrift/ThriftUtilities.java
@ -67,7 +67,7 @@ public class ThriftUtilities {
    col.inMemory = in.isInMemory();
    col.blockCacheEnabled = in.isBlockCacheEnabled();
    col.maxValueLength = in.getMaxValueLength();
-    col.bloomFilterType = Boolean.toString(in.isBloomFilterEnabled());
+    col.bloomFilterType = Boolean.toString(in.isBloomfilter());
    return col;
  }
  
--- a/src/java/org/apache/hadoop/hbase/util/Migrate.java
+++ b/src/java/org/apache/hadoop/hbase/util/Migrate.java
@ -37,9 +37,11 @@ import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HStoreKey;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.MasterNotRunningException;
 import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.io.BatchUpdate;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.HStore;
 import org.apache.hadoop.hbase.regionserver.HStoreFile;
@ -188,7 +190,7 @@ public class Migrate extends Configured implements Tool {
      float version = Float.parseFloat(versionStr);
      if (version == 0.1f) {
        checkForUnrecoveredLogFiles(getRootDirFiles());
-        migrate();
+        migrateToV5();
      } else {
        throw new IOException("Unrecognized or non-migratable version: " +
          version);
@ -209,10 +211,94 @@ public class Migrate extends Configured implements Tool {
    }
  }
  
-  private void migrate() throws IOException {
+  private void migrateToV5() throws IOException {
+    rewriteMetaHRegionInfo();
    addHistorianFamilyToMeta();
    updateBloomFilters();
  }
+  
+  /**
+   * Rewrite the meta tables so that HRI is versioned and so we move to new
+   * HCD and HCD.
+   * @throws IOException 
+   */
+  private void rewriteMetaHRegionInfo() throws IOException {
+    if (this.readOnly && this.migrationNeeded) {
+      return;
+    }
+    // Read using old classes.
+    final org.apache.hadoop.hbase.util.migration.v5.MetaUtils utils =
+      new org.apache.hadoop.hbase.util.migration.v5.MetaUtils(this.conf);
+    try {
+      // Scan the root region
+      utils.scanRootRegion(new org.apache.hadoop.hbase.util.migration.v5.MetaUtils.ScannerListener() {
+        public boolean processRow(org.apache.hadoop.hbase.util.migration.v5.HRegionInfo info)
+        throws IOException {
+          // Scan every meta region
+          final org.apache.hadoop.hbase.util.migration.v5.HRegion metaRegion =
+            utils.getMetaRegion(info);
+          // If here, we were able to read with old classes.  If readOnly, then
+          // needs migration.
+          if (readOnly && !migrationNeeded) {
+            migrationNeeded = true;
+            return false;
+          }
+          updateHRegionInfo(utils.getRootRegion(), info);
+          utils.scanMetaRegion(info, new org.apache.hadoop.hbase.util.migration.v5.MetaUtils.ScannerListener() {
+            public boolean processRow(org.apache.hadoop.hbase.util.migration.v5.HRegionInfo hri)
+            throws IOException {
+              updateHRegionInfo(metaRegion, hri);
+              return true;
+            }
+          });
+          return true;
+        }
+      });
+    } catch (Exception e) {
+      LOG.error("", e);
+    } finally {
+      utils.shutdown();
+    }
+  }
+  
+  /*
+   * Move from old pre-v5 hregioninfo to current HRegionInfo
+   * Persist back into <code>r</code>
+   * @param mr
+   * @param oldHri
+   */
+  void updateHRegionInfo(org.apache.hadoop.hbase.util.migration.v5.HRegion mr,
+    org.apache.hadoop.hbase.util.migration.v5.HRegionInfo oldHri)
+  throws IOException {
+    byte [] oldHriTableName = oldHri.getTableDesc().getName();
+    HTableDescriptor newHtd =
+      Bytes.equals(HConstants.ROOT_TABLE_NAME, oldHriTableName)?
+        HTableDescriptor.ROOT_TABLEDESC:
+        Bytes.equals(HConstants.META_TABLE_NAME, oldHriTableName)?
+          HTableDescriptor.META_TABLEDESC:
+          new HTableDescriptor(oldHri.getTableDesc().getName());
+    for (org.apache.hadoop.hbase.util.migration.v5.HColumnDescriptor oldHcd:
+        oldHri.getTableDesc().getFamilies()) {
+      HColumnDescriptor newHcd = new HColumnDescriptor(
+        HStoreKey.addDelimiter(oldHcd.getName()),
+        oldHcd.getMaxValueLength(),
+        HColumnDescriptor.CompressionType.valueOf(oldHcd.getCompressionType().toString()),
+        oldHcd.isInMemory(), oldHcd.isBlockCacheEnabled(),
+        oldHcd.getMaxValueLength(), oldHcd.getTimeToLive(),
+        oldHcd.isBloomFilterEnabled());
+      newHtd.addFamily(newHcd);
+    }
+    HRegionInfo newHri = new HRegionInfo(newHtd, oldHri.getStartKey(),
+      oldHri.getEndKey(), oldHri.isSplit(), oldHri.getRegionId());
+    BatchUpdate b = new BatchUpdate(newHri.getRegionName());
+    b.put(HConstants.COL_REGIONINFO, Writables.getBytes(newHri));
+    mr.batchUpdate(b);
+    if (LOG.isDebugEnabled()) {
+        LOG.debug("New " + Bytes.toString(HConstants.COL_REGIONINFO) +
+          " for " + oldHri.toString() + " in " + mr.toString() + " is: " +
+          newHri.toString());
+    }
+  }

  private FileStatus[] getRootDirFiles() throws IOException {
    FileStatus[] stats = fs.listStatus(FSUtils.getRootDir(this.conf));
@ -243,77 +329,6 @@ public class Migrate extends Configured implements Tool {
    }
  }

-  void migrateRegionDir(final byte [] tableName, String oldPath)
-  throws IOException {
-    // Create directory where table will live
-    Path rootdir = FSUtils.getRootDir(this.conf);
-    Path tableDir = new Path(rootdir, Bytes.toString(tableName));
-    fs.mkdirs(tableDir);
-
-    // Move the old region directory under the table directory
-
-    Path newPath = new Path(tableDir,
-        oldPath.substring(OLD_PREFIX.length()));
-    fs.rename(new Path(rootdir, oldPath), newPath);
-
-    processRegionSubDirs(fs, newPath);
-  }
-  
-  private void processRegionSubDirs(FileSystem fs, Path newPath)
-  throws IOException {
-    String newName = newPath.getName();
-    FileStatus[] children = fs.listStatus(newPath);
-    for (int i = 0; i < children.length; i++) {
-      String child = children[i].getPath().getName();
-      if (children[i].isDir()) {
-        processRegionSubDirs(fs, children[i].getPath());
-
-        // Rename old compaction directories
-
-        if (child.startsWith(OLD_PREFIX)) {
-          fs.rename(children[i].getPath(),
-              new Path(newPath, child.substring(OLD_PREFIX.length())));
-        }
-      } else {
-        if (newName.compareTo("mapfiles") == 0) {
-          // Check to see if this mapfile is a reference
-
-          if (HStore.isReference(children[i].getPath())) {
-            // Keep track of references in case we come across a region
-            // that we can't otherwise account for.
-            references.add(child.substring(child.indexOf(".") + 1));
-          }
-        }
-      }
-    }
-  }
-  
-  private void scanRootRegion() throws IOException {
-    final MetaUtils utils = new MetaUtils(this.conf);
-    try {
-      utils.scanRootRegion(new MetaUtils.ScannerListener() {
-        public boolean processRow(HRegionInfo info) throws IOException {
-          // First move the meta region to where it should be and rename
-          // subdirectories as necessary
-          migrateRegionDir(HConstants.META_TABLE_NAME, OLD_PREFIX
-              + info.getEncodedName());
-          utils.scanMetaRegion(info, new MetaUtils.ScannerListener() {
-            public boolean processRow(HRegionInfo tableInfo) throws IOException {
-              // Move the region to where it should be and rename
-              // subdirectories as necessary
-              migrateRegionDir(tableInfo.getTableDesc().getName(), OLD_PREFIX
-                  + tableInfo.getEncodedName());
-              return true;
-            }
-          });
-          return true;
-        }
-      });
-    } finally {
-      utils.shutdown();
-    }
-  }
-
  private void addHistorianFamilyToMeta() throws IOException {
    if (this.migrationNeeded) {
      // Be careful. We cannot use MetAutils if current hbase in the
@ -359,17 +374,16 @@ public class Migrate extends Configured implements Tool {
          // Scan every meta region
          final HRegion metaRegion = utils.getMetaRegion(info);
          utils.scanMetaRegion(info, new MetaUtils.ScannerListener() {
-            public boolean processRow(HRegionInfo tableInfo) throws IOException {
-              HTableDescriptor desc = tableInfo.getTableDesc();
+            public boolean processRow(HRegionInfo hri) throws IOException {
+              HTableDescriptor desc = hri.getTableDesc();
              Path tableDir =
                HTableDescriptor.getTableDir(rootDir, desc.getName()); 
              for (HColumnDescriptor column: desc.getFamilies()) {
-                if (column.isBloomFilterEnabled()) {
+                if (column.isBloomfilter()) {
                  // Column has a bloom filter
                  migrationNeeded = true;
-
                  Path filterDir = HStoreFile.getFilterDir(tableDir,
-                      tableInfo.getEncodedName(), column.getName());
+                      hri.getEncodedName(), column.getName());
                  if (fs.exists(filterDir)) {
                    // Filter dir exists
                    if (readOnly) {
@ -379,8 +393,10 @@ public class Migrate extends Configured implements Tool {
                    }
                    // Delete the filter
                    fs.delete(filterDir, true);
-                    // Update the HRegionInfo in meta
-                    utils.updateMETARegionInfo(metaRegion, tableInfo);
+                    // Update the HRegionInfo in meta setting the bloomfilter
+                    // to be disabled.
+                    column.setBloomfilter(false);
+                    utils.updateMETARegionInfo(metaRegion, hri);
                  }
                }
              }
--- a/src/java/org/apache/hadoop/hbase/util/migration/v5/FlushRequester.java
+++ b/src/java/org/apache/hadoop/hbase/util/migration/v5/FlushRequester.java
@ -0,0 +1,36 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util.migration.v5;
+
+/**
+ * Implementors of this interface want to be notified when an HRegion
+ * determines that a cache flush is needed. A FlushRequester (or null)
+ * must be passed to the HRegion constructor so it knows who to call when it
+ * has a filled memcache.
+ */
+public interface FlushRequester {
+  /**
+   * Tell the listener the cache needs to be flushed.
+   * 
+   * @param region the HRegion requesting the cache flush
+   */
+  void request(HRegion region);
+}
--- a/src/java/org/apache/hadoop/hbase/util/migration/v5/HColumnDescriptor.java
+++ b/src/java/org/apache/hadoop/hbase/util/migration/v5/HColumnDescriptor.java
@ -0,0 +1,449 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util.migration.v5;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.BloomFilterDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HStoreKey;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
+
+/**
+ * An HColumnDescriptor contains information about a column family such as the
+ * number of versions, compression settings, etc.
+ * 
+ * It is used as input when creating a table or adding a column. Once set, the
+ * parameters that specify a column cannot be changed without deleting the
+ * column and recreating it. If there is data stored in the column, it will be
+ * deleted when the column is deleted.
+ */
+public class HColumnDescriptor implements WritableComparable {
+  // For future backward compatibility
+
+  // Version 3 was when column names becaome byte arrays and when we picked up
+  // Time-to-live feature.
+  // Version 4 was when bloom filter descriptors were removed.
+  private static final byte COLUMN_DESCRIPTOR_VERSION = (byte)4;
+
+  /** 
+   * The type of compression.
+   * @see org.apache.hadoop.io.SequenceFile.Writer
+   */
+  public static enum CompressionType {
+    /** Do not compress records. */
+    NONE, 
+    /** Compress values only, each separately. */
+    RECORD,
+    /** Compress sequences of records together in blocks. */
+    BLOCK
+  }
+
+  // Defines for jruby/shell
+  public static final String COMPRESSION = "COMPRESSION";
+  public static final String IN_MEMORY = "IN_MEMORY";
+  public static final String BLOCKCACHE = "BLOCKCACHE";
+  public static final String LENGTH = "LENGTH";
+  public static final String TTL = "TTL";
+  public static final String BLOOMFILTER = "BLOOMFILTER";
+  public static final String FOREVER = "FOREVER";
+
+  /**
+   * Default compression type.
+   */
+  public static final CompressionType DEFAULT_COMPRESSION =
+    CompressionType.NONE;
+
+  /**
+   * Default number of versions of a record to keep.
+   */
+  public static final int DEFAULT_VERSIONS = 3;
+
+  /**
+   * Default maximum cell length.
+   */
+  public static final int DEFAULT_LENGTH = Integer.MAX_VALUE;
+
+  /**
+   * Default setting for whether to serve from memory or not.
+   */
+  public static final boolean DEFAULT_IN_MEMORY = false;
+
+  /**
+   * Default setting for whether to use a block cache or not.
+   */
+  public static final boolean DEFAULT_BLOCKCACHE = false;
+
+  /**
+   * Default setting for whether or not to use bloomfilters.
+   */
+  public static final boolean DEFAULT_BLOOMFILTER = false;
+  
+  /**
+   * Default time to live of cell contents.
+   */
+  public static final int DEFAULT_TTL = HConstants.FOREVER;
+
+  // Column family name
+  private byte [] name;
+  // Number of versions to keep
+  private int maxVersions = DEFAULT_VERSIONS;
+  // Compression setting if any
+  private CompressionType compressionType = DEFAULT_COMPRESSION;
+  // Serve reads from in-memory cache
+  private boolean inMemory = DEFAULT_IN_MEMORY;
+  // Serve reads from in-memory block cache
+  private boolean blockCacheEnabled = DEFAULT_BLOCKCACHE;
+  // Maximum value size
+  private int maxValueLength = DEFAULT_LENGTH;
+  // Time to live of cell contents, in seconds from last timestamp
+  private int timeToLive = DEFAULT_TTL;
+  // True if bloom filter was specified
+  private boolean bloomFilter = false;
+
+  /**
+   * Default constructor. Must be present for Writable.
+   */
+  public HColumnDescriptor() {
+    this.name = null;
+  }
+
+  /**
+   * Construct a column descriptor specifying only the family name 
+   * The other attributes are defaulted.
+   * 
+   * @param columnName - column family name
+   */
+  public HColumnDescriptor(final String columnName) {
+    this(Bytes.toBytes(columnName));
+  }
+
+  /**
+   * Construct a column descriptor specifying only the family name 
+   * The other attributes are defaulted.
+   * 
+   * @param columnName - column family name
+   */
+  public HColumnDescriptor(final Text columnName) {
+    this(columnName.getBytes());
+  }
+  
+  /**
+   * Construct a column descriptor specifying only the family name 
+   * The other attributes are defaulted.
+   * 
+   * @param columnName Column family name.  Must have the ':' ending.
+   */
+  public HColumnDescriptor(final byte [] columnName) {
+    this (columnName == null || columnName.length <= 0?
+      HConstants.EMPTY_BYTE_ARRAY: columnName, DEFAULT_VERSIONS,
+      DEFAULT_COMPRESSION, DEFAULT_IN_MEMORY, DEFAULT_BLOCKCACHE,
+      Integer.MAX_VALUE, DEFAULT_TTL, false);
+  }
+
+  /**
+   * Constructor
+   * @param columnName Column family name.  Must have the ':' ending.
+   * @param maxVersions Maximum number of versions to keep
+   * @param compression Compression type
+   * @param inMemory If true, column data should be kept in an HRegionServer's
+   * cache
+   * @param blockCacheEnabled If true, MapFile blocks should be cached
+   * @param maxValueLength Restrict values to &lt;= this value
+   * @param timeToLive Time-to-live of cell contents, in seconds from last timestamp
+   * (use HConstants.FOREVER for unlimited TTL)
+   * @param bloomFilter Enable the specified bloom filter for this column
+   * 
+   * @throws IllegalArgumentException if passed a family name that is made of 
+   * other than 'word' characters: i.e. <code>[a-zA-Z_0-9]</code> and does not
+   * end in a <code>:</code>
+   * @throws IllegalArgumentException if the number of versions is &lt;= 0
+   */
+  public HColumnDescriptor(final byte [] columnName, final int maxVersions,
+      final CompressionType compression, final boolean inMemory,
+      final boolean blockCacheEnabled, final int maxValueLength,
+      final int timeToLive, final boolean bloomFilter) {
+    isLegalFamilyName(columnName);
+    this.name = stripColon(columnName);
+    if (maxVersions <= 0) {
+      // TODO: Allow maxVersion of 0 to be the way you say "Keep all versions".
+      // Until there is support, consider 0 or < 0 -- a configuration error.
+      throw new IllegalArgumentException("Maximum versions must be positive");
+    }
+    this.maxVersions = maxVersions;
+    this.inMemory = inMemory;
+    this.blockCacheEnabled = blockCacheEnabled;
+    this.maxValueLength = maxValueLength;
+    this.timeToLive = timeToLive;
+    this.bloomFilter = bloomFilter;
+    this.compressionType = compression;
+  }
+  
+  private static byte [] stripColon(final byte [] n) {
+    byte [] result = new byte [n.length - 1];
+    // Have the stored family name be absent the colon delimiter
+    System.arraycopy(n, 0, result, 0, n.length - 1);
+    return result;
+  }
+  
+  /**
+   * @param b Family name.
+   * @return <code>b</code>
+   * @throws IllegalArgumentException If not null and not a legitimate family
+   * name: i.e. 'printable' and ends in a ':' (Null passes are allowed because
+   * <code>b</code> can be null when deserializing).
+   */
+  public static byte [] isLegalFamilyName(final byte [] b) {
+    if (b == null) {
+      return b;
+    }
+    if (b[b.length - 1] != ':') {
+      throw new IllegalArgumentException("Family names must end in a colon: " +
+        Bytes.toString(b));
+    }
+    for (int i = 0; i < (b.length - 1); i++) {
+      if (Character.isLetterOrDigit(b[i]) || b[i] == '_' || b[i] == '.') {
+        continue;
+      }
+      throw new IllegalArgumentException("Illegal character <" + b[i] +
+        ">. Family names  can only contain  'word characters' and must end" +
+        "with a colon: " + Bytes.toString(b));
+    }
+    return b;
+  }
+
+  /**
+   * @return Name of this column family
+   */
+  public byte [] getName() {
+    return name;
+  }
+
+  /**
+   * @return Name of this column family
+   */
+  public String getNameAsString() {
+    return Bytes.toString(this.name);
+  }
+
+  /** @return compression type being used for the column family */
+  public CompressionType getCompression() {
+    return this.compressionType;
+  }
+  
+  /** @return maximum number of versions */
+  public int getMaxVersions() {
+    return this.maxVersions;
+  }
+  
+  /**
+   * @return Compression type setting.
+   */
+  public CompressionType getCompressionType() {
+    return this.compressionType;
+  }
+
+  /**
+   * @return True if we are to keep all in use HRegionServer cache.
+   */
+  public boolean isInMemory() {
+    return this.inMemory;
+  }
+  
+  /**
+   * @return Maximum value length.
+   */
+  public int getMaxValueLength() {
+    return this.maxValueLength;
+  }
+
+  /**
+   * @return Time to live.
+   */
+  public int getTimeToLive() {
+    return this.timeToLive;
+  }
+
+  /**
+   * @return True if MapFile blocks should be cached.
+   */
+  public boolean isBlockCacheEnabled() {
+    return blockCacheEnabled;
+  }
+
+  /**
+   * @return true if a bloom filter is enabled
+   */
+  public boolean isBloomFilterEnabled() {
+    return this.bloomFilter;
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public String toString() {
+    return "{" + HConstants.NAME + " => '" + Bytes.toString(name) +
+      "', " + HConstants.VERSIONS + " => " + maxVersions +
+      ", " + COMPRESSION + " => '" + this.compressionType +
+      "', " + IN_MEMORY + " => " + inMemory +
+      ", " + BLOCKCACHE + " => " + blockCacheEnabled +
+      ", " + LENGTH + " => " + maxValueLength +
+      ", " + TTL + " => " +
+          (timeToLive == HConstants.FOREVER ? "FOREVER" : 
+              Integer.toString(timeToLive)) +
+      ", " + BLOOMFILTER + " => " + bloomFilter + "}";
+  }
+  
+  /** {@inheritDoc} */
+  @Override
+  public boolean equals(Object obj) {
+    return compareTo(obj) == 0;
+  }
+  
+  /** {@inheritDoc} */
+  @Override
+  public int hashCode() {
+    int result = Bytes.hashCode(this.name);
+    result ^= Integer.valueOf(this.maxVersions).hashCode();
+    result ^= this.compressionType.hashCode();
+    result ^= Boolean.valueOf(this.inMemory).hashCode();
+    result ^= Boolean.valueOf(this.blockCacheEnabled).hashCode();
+    result ^= Integer.valueOf(this.maxValueLength).hashCode();
+    result ^= Integer.valueOf(this.timeToLive).hashCode();
+    result ^= Boolean.valueOf(this.bloomFilter).hashCode();
+    result ^= Byte.valueOf(COLUMN_DESCRIPTOR_VERSION).hashCode();
+    return result;
+  }
+  
+  // Writable
+
+  /** {@inheritDoc} */
+  public void readFields(DataInput in) throws IOException {
+    int versionNumber = in.readByte();
+    if (versionNumber <= 2) {
+      Text t = new Text();
+      t.readFields(in);
+      this.name = t.getBytes();
+      if (HStoreKey.getFamilyDelimiterIndex(this.name) > 0) {
+        this.name = stripColon(this.name);
+      }
+    } else {
+      this.name = Bytes.readByteArray(in);
+    }
+    this.maxVersions = in.readInt();
+    int ordinal = in.readInt();
+    this.compressionType = CompressionType.values()[ordinal];
+    this.inMemory = in.readBoolean();
+    this.maxValueLength = in.readInt();
+    this.bloomFilter = in.readBoolean();
+    if (this.bloomFilter && versionNumber < 5) {
+      // If a bloomFilter is enabled and the column descriptor is less than
+      // version 5, we need to skip over it to read the rest of the column
+      // descriptor. There are no BloomFilterDescriptors written to disk for
+      // column descriptors with a version number >= 5
+      BloomFilterDescriptor junk = new BloomFilterDescriptor();
+      junk.readFields(in);
+    }
+    if (versionNumber > 1) {
+      this.blockCacheEnabled = in.readBoolean();
+    }
+
+    if (versionNumber > 2) {
+      this.timeToLive = in.readInt();
+    }
+  }
+
+  /** {@inheritDoc} */
+  public void write(DataOutput out) throws IOException {
+    out.writeByte(COLUMN_DESCRIPTOR_VERSION);
+    Bytes.writeByteArray(out, this.name);
+    out.writeInt(this.maxVersions);
+    out.writeInt(this.compressionType.ordinal());
+    out.writeBoolean(this.inMemory);
+    out.writeInt(this.maxValueLength);
+    out.writeBoolean(this.bloomFilter);
+    out.writeBoolean(this.blockCacheEnabled);
+    out.writeInt(this.timeToLive);
+  }
+
+  // Comparable
+
+  /** {@inheritDoc} */
+  public int compareTo(Object o) {
+    HColumnDescriptor other = (HColumnDescriptor)o;
+    int result = Bytes.compareTo(this.name, other.getName());
+    if(result == 0) {
+      result = Integer.valueOf(this.maxVersions).compareTo(
+          Integer.valueOf(other.maxVersions));
+    }
+    
+    if(result == 0) {
+      result = this.compressionType.compareTo(other.compressionType);
+    }
+    
+    if(result == 0) {
+      if(this.inMemory == other.inMemory) {
+        result = 0;
+        
+      } else if(this.inMemory) {
+        result = -1;
+        
+      } else {
+        result = 1;
+      }
+    }
+    
+    if(result == 0) {
+      if(this.blockCacheEnabled == other.blockCacheEnabled) {
+        result = 0;
+        
+      } else if(this.blockCacheEnabled) {
+        result = -1;
+        
+      } else {
+        result = 1;
+      }
+    }
+    
+    if(result == 0) {
+      result = other.maxValueLength - this.maxValueLength;
+    }
+
+    if(result == 0) {
+      result = other.timeToLive - this.timeToLive;
+    }
+
+    if(result == 0) {
+      if(this.bloomFilter == other.bloomFilter) {
+        result = 0;
+        
+      } else if(this.bloomFilter) {
+        result = -1;
+        
+      } else {
+        result = 1;
+      }
+    }
+    return result;
+  }
+}
--- a/src/java/org/apache/hadoop/hbase/util/migration/v5/HConstants.java
+++ b/src/java/org/apache/hadoop/hbase/util/migration/v5/HConstants.java
@ -0,0 +1,228 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util.migration.v5;
+
+import org.apache.hadoop.hbase.ipc.HRegionInterface;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * HConstants holds a bunch of HBase-related constants
+ */
+public interface HConstants {
+
+  /** long constant for zero */
+  static final Long ZERO_L = Long.valueOf(0L);
+  
+  static final String NINES = "99999999999999";
+  static final String ZEROES = "00000000000000";
+  
+  // For migration
+
+  /** name of version file */
+  static final String VERSION_FILE_NAME = "hbase.version";
+  
+  /**
+   * Current version of file system
+   * Version 4 supports only one kind of bloom filter
+   */
+  public static final String FILE_SYSTEM_VERSION = "4";
+  
+  // Configuration parameters
+  
+  // TODO: URL for hbase master like hdfs URLs with host and port.
+  // Like jdbc URLs?  URLs could be used to refer to table cells?
+  // jdbc:mysql://[host][,failoverhost...][:port]/[database]
+  // jdbc:mysql://[host][,failoverhost...][:port]/[database][?propertyName1][=propertyValue1][&propertyName2][=propertyValue2]...
+  
+  // Key into HBaseConfiguration for the hbase.master address.
+  // TODO: Support 'local': i.e. default of all running in single
+  // process.  Same for regionserver. TODO: Is having HBase homed
+  // on port 60k OK?
+  
+  /** Parameter name for master address */
+  static final String MASTER_ADDRESS = "hbase.master";
+
+  /** default host address */
+  static final String DEFAULT_HOST = "0.0.0.0";
+
+  /** default port that the master listens on */
+  static final int DEFAULT_MASTER_PORT = 60000;
+  
+  /** Default master address */
+  static final String DEFAULT_MASTER_ADDRESS = DEFAULT_HOST + ":" +
+    DEFAULT_MASTER_PORT;
+
+  /** default port for master web api */
+  static final int DEFAULT_MASTER_INFOPORT = 60010;
+
+  /** Parameter name for hbase.regionserver address. */
+  static final String REGIONSERVER_ADDRESS = "hbase.regionserver";
+  
+  /** Default region server address */
+  static final String DEFAULT_REGIONSERVER_ADDRESS = DEFAULT_HOST + ":60020";
+
+  /** default port for region server web api */
+  static final int DEFAULT_REGIONSERVER_INFOPORT = 60030;
+
+  /** Parameter name for what region server interface to use. */
+  static final String REGION_SERVER_CLASS = "hbase.regionserver.class";
+  
+  /** Parameter name for what region server implementation to use. */
+  static final String REGION_SERVER_IMPL= "hbase.regionserver.impl";
+  
+  /** Default region server interface class name. */
+  static final String DEFAULT_REGION_SERVER_CLASS = HRegionInterface.class.getName();
+
+  /** Parameter name for how often threads should wake up */
+  static final String THREAD_WAKE_FREQUENCY = "hbase.server.thread.wakefrequency";
+
+  /** Parameter name for HBase instance root directory */
+  static final String HBASE_DIR = "hbase.rootdir";
+  
+  /** Used to construct the name of the log directory for a region server */
+  static final String HREGION_LOGDIR_NAME = "log";
+
+  /** Name of old log file for reconstruction */
+  static final String HREGION_OLDLOGFILE_NAME = "oldlogfile.log";
+  
+  /** Default maximum file size */
+  static final long DEFAULT_MAX_FILE_SIZE = 256 * 1024 * 1024;
+  
+  /** Default size of a reservation block   */
+  static final int DEFAULT_SIZE_RESERVATION_BLOCK = 1024 * 1024 * 5;
+  
+  // Always store the location of the root table's HRegion.
+  // This HRegion is never split.
+  
+  // region name = table + startkey + regionid. This is the row key.
+  // each row in the root and meta tables describes exactly 1 region
+  // Do we ever need to know all the information that we are storing?
+
+  // Note that the name of the root table starts with "-" and the name of the
+  // meta table starts with "." Why? it's a trick. It turns out that when we
+  // store region names in memory, we use a SortedMap. Since "-" sorts before
+  // "." (and since no other table name can start with either of these
+  // characters, the root region will always be the first entry in such a Map,
+  // followed by all the meta regions (which will be ordered by their starting
+  // row key as well), followed by all user tables. So when the Master is 
+  // choosing regions to assign, it will always choose the root region first,
+  // followed by the meta regions, followed by user regions. Since the root
+  // and meta regions always need to be on-line, this ensures that they will
+  // be the first to be reassigned if the server(s) they are being served by
+  // should go down.
+
+  /** The root table's name.*/
+  static final byte [] ROOT_TABLE_NAME = Bytes.toBytes("-ROOT-");
+
+  /** The META table's name. */
+  static final byte [] META_TABLE_NAME = Bytes.toBytes(".META.");
+
+  // Defines for the column names used in both ROOT and META HBase 'meta' tables.
+  
+  /** The ROOT and META column family (string) */
+  static final String COLUMN_FAMILY_STR = "info:";
+  
+  /** The META historian column family (string) */
+  static final String COLUMN_FAMILY_HISTORIAN_STR = "historian:";
+
+  /** The ROOT and META column family */
+  static final byte [] COLUMN_FAMILY = Bytes.toBytes(COLUMN_FAMILY_STR);
+  
+  /** The META historian column family */
+  static final byte [] COLUMN_FAMILY_HISTORIAN = Bytes.toBytes(COLUMN_FAMILY_HISTORIAN_STR);
+
+  /** Array of meta column names */
+  static final byte[][] COLUMN_FAMILY_ARRAY = new byte[][] {COLUMN_FAMILY};
+  
+  /** ROOT/META column family member - contains HRegionInfo */
+  static final byte [] COL_REGIONINFO =
+    Bytes.toBytes(COLUMN_FAMILY_STR + "regioninfo");
+
+  /** Array of column - contains HRegionInfo */
+  static final byte[][] COL_REGIONINFO_ARRAY = new byte[][] {COL_REGIONINFO};
+  
+  /** ROOT/META column family member - contains HServerAddress.toString() */
+  static final byte[] COL_SERVER = Bytes.toBytes(COLUMN_FAMILY_STR + "server");
+  
+  /** ROOT/META column family member - contains server start code (a long) */
+  static final byte [] COL_STARTCODE =
+    Bytes.toBytes(COLUMN_FAMILY_STR + "serverstartcode");
+
+  /** the lower half of a split region */
+  static final byte [] COL_SPLITA = Bytes.toBytes(COLUMN_FAMILY_STR + "splitA");
+  
+  /** the upper half of a split region */
+  static final byte [] COL_SPLITB = Bytes.toBytes(COLUMN_FAMILY_STR + "splitB");
+  
+  /** All the columns in the catalog -ROOT- and .META. tables.
+   */
+  static final byte[][] ALL_META_COLUMNS = {COL_REGIONINFO, COL_SERVER,
+    COL_STARTCODE, COL_SPLITA, COL_SPLITB};
+
+  // Other constants
+
+  /**
+   * An empty instance.
+   */
+  static final byte [] EMPTY_BYTE_ARRAY = new byte [0];
+  
+  /**
+   * Used by scanners, etc when they want to start at the beginning of a region
+   */
+  static final byte [] EMPTY_START_ROW = EMPTY_BYTE_ARRAY;
+  
+  /**
+   * Last row in a table.
+   */
+  static final byte [] EMPTY_END_ROW = EMPTY_START_ROW;
+
+  /** 
+    * Used by scanners and others when they're trying to detect the end of a 
+    * table 
+    */
+  static final byte [] LAST_ROW = EMPTY_BYTE_ARRAY;
+  
+  /** When we encode strings, we always specify UTF8 encoding */
+  static final String UTF8_ENCODING = "UTF-8";
+
+  /**
+   * Timestamp to use when we want to refer to the latest cell.
+   * This is the timestamp sent by clients when no timestamp is specified on
+   * commit.
+   */
+  static final long LATEST_TIMESTAMP = Long.MAX_VALUE;
+
+  /**
+   * Define for 'return-all-versions'.
+   */
+  static final int ALL_VERSIONS = Integer.MAX_VALUE;
+  
+  /**
+   * Unlimited time-to-live.
+   */
+  static final int FOREVER = -1;
+  
+  public static final String HBASE_CLIENT_RETRIES_NUMBER_KEY =
+    "hbase.client.retries.number";
+  public static final int DEFAULT_CLIENT_RETRIES = 5;
+
+  public static final String NAME = "NAME";
+  public static final String VERSIONS = "VERSIONS";
+}
--- a/src/java/org/apache/hadoop/hbase/util/migration/v5/HLog.java
+++ b/src/java/org/apache/hadoop/hbase/util/migration/v5/HLog.java
@ -0,0 +1,698 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util.migration.v5;
+
+import java.io.EOFException;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HStoreKey;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.RemoteExceptionHandler;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
+import org.apache.hadoop.io.SequenceFile.Reader;
+
+/**
+ * HLog stores all the edits to the HStore.
+ *
+ * It performs logfile-rolling, so external callers are not aware that the
+ * underlying file is being rolled.
+ *
+ * <p>
+ * A single HLog is used by several HRegions simultaneously.
+ *
+ * <p>
+ * Each HRegion is identified by a unique long <code>int</code>. HRegions do
+ * not need to declare themselves before using the HLog; they simply include
+ * their HRegion-id in the <code>append</code> or
+ * <code>completeCacheFlush</code> calls.
+ *
+ * <p>
+ * An HLog consists of multiple on-disk files, which have a chronological order.
+ * As data is flushed to other (better) on-disk structures, the log becomes
+ * obsolete. We can destroy all the log messages for a given HRegion-id up to
+ * the most-recent CACHEFLUSH message from that HRegion.
+ *
+ * <p>
+ * It's only practical to delete entire files. Thus, we delete an entire on-disk
+ * file F when all of the messages in F have a log-sequence-id that's older
+ * (smaller) than the most-recent CACHEFLUSH message for every HRegion that has
+ * a message in F.
+ *
+ * <p>
+ * Synchronized methods can never execute in parallel. However, between the
+ * start of a cache flush and the completion point, appends are allowed but log
+ * rolling is not. To prevent log rolling taking place during this period, a
+ * separate reentrant lock is used.
+ *
+ * <p>
+ * TODO: Vuk Ercegovac also pointed out that keeping HBase HRegion edit logs in
+ * HDFS is currently flawed. HBase writes edits to logs and to a memcache. The
+ * 'atomic' write to the log is meant to serve as insurance against abnormal
+ * RegionServer exit: on startup, the log is rerun to reconstruct an HRegion's
+ * last wholesome state. But files in HDFS do not 'exist' until they are cleanly
+ * closed -- something that will not happen if RegionServer exits without
+ * running its 'close'.
+ */
+public class HLog implements HConstants {
+  private static final Log LOG = LogFactory.getLog(HLog.class);
+  private static final String HLOG_DATFILE = "hlog.dat.";
+  static final byte [] METACOLUMN = Bytes.toBytes("METACOLUMN:");
+  static final byte [] METAROW = Bytes.toBytes("METAROW");
+  final FileSystem fs;
+  final Path dir;
+  final Configuration conf;
+  final LogRollListener listener;
+  final long threadWakeFrequency;
+  private final int maxlogentries;
+
+  /*
+   * Current log file.
+   */
+  SequenceFile.Writer writer;
+
+  /*
+   * Map of all log files but the current one. 
+   */
+  final SortedMap<Long, Path> outputfiles = 
+    Collections.synchronizedSortedMap(new TreeMap<Long, Path>());
+
+  /*
+   * Map of region to last sequence/edit id. 
+   */
+  private final Map<byte [], Long> lastSeqWritten = Collections.
+    synchronizedSortedMap(new TreeMap<byte [], Long>(Bytes.BYTES_COMPARATOR));
+
+  private volatile boolean closed = false;
+
+  private final Integer sequenceLock = new Integer(0);
+  private volatile long logSeqNum = 0;
+
+  private volatile long filenum = 0;
+  private volatile long old_filenum = -1;
+  
+  private volatile int numEntries = 0;
+
+  // This lock prevents starting a log roll during a cache flush.
+  // synchronized is insufficient because a cache flush spans two method calls.
+  private final Lock cacheFlushLock = new ReentrantLock();
+
+  // We synchronize on updateLock to prevent updates and to prevent a log roll
+  // during an update
+  private final Integer updateLock = new Integer(0);
+
+  /**
+   * Create an edit log at the given <code>dir</code> location.
+   *
+   * You should never have to load an existing log. If there is a log at
+   * startup, it should have already been processed and deleted by the time the
+   * HLog object is started up.
+   *
+   * @param fs
+   * @param dir
+   * @param conf
+   * @param listener
+   * @throws IOException
+   */
+  public HLog(final FileSystem fs, final Path dir, final Configuration conf,
+      final LogRollListener listener) throws IOException {
+    this.fs = fs;
+    this.dir = dir;
+    this.conf = conf;
+    this.listener = listener;
+    this.threadWakeFrequency = conf.getLong(THREAD_WAKE_FREQUENCY, 10 * 1000);
+    this.maxlogentries =
+      conf.getInt("hbase.regionserver.maxlogentries", 30 * 1000);
+    if (fs.exists(dir)) {
+      throw new IOException("Target HLog directory already exists: " + dir);
+    }
+    fs.mkdirs(dir);
+    rollWriter();
+  }
+
+  /*
+   * Accessor for tests.
+   * @return Current state of the monotonically increasing file id.
+   */
+  long getFilenum() {
+    return this.filenum;
+  }
+
+  /**
+   * Get the compression type for the hlog files.
+   * @param c Configuration to use.
+   * @return the kind of compression to use
+   */
+  private static CompressionType getCompressionType(final Configuration c) {
+    String name = c.get("hbase.io.seqfile.compression.type");
+    return name == null? CompressionType.NONE: CompressionType.valueOf(name);
+  }
+
+  /**
+   * Called by HRegionServer when it opens a new region to ensure that log
+   * sequence numbers are always greater than the latest sequence number of the
+   * region being brought on-line.
+   *
+   * @param newvalue We'll set log edit/sequence number to this value if it
+   * is greater than the current value.
+   */
+  void setSequenceNumber(long newvalue) {
+    synchronized (sequenceLock) {
+      if (newvalue > logSeqNum) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("changing sequence number from " + logSeqNum + " to " +
+            newvalue);
+        }
+        logSeqNum = newvalue;
+      }
+    }
+  }
+
+  /**
+   * Roll the log writer. That is, start writing log messages to a new file.
+   *
+   * Because a log cannot be rolled during a cache flush, and a cache flush
+   * spans two method calls, a special lock needs to be obtained so that a cache
+   * flush cannot start when the log is being rolled and the log cannot be
+   * rolled during a cache flush.
+   *
+   * <p>Note that this method cannot be synchronized because it is possible that
+   * startCacheFlush runs, obtaining the cacheFlushLock, then this method could
+   * start which would obtain the lock on this but block on obtaining the
+   * cacheFlushLock and then completeCacheFlush could be called which would wait
+   * for the lock on this and consequently never release the cacheFlushLock
+   *
+   * @throws IOException
+   */
+  public void rollWriter() throws IOException {
+    this.cacheFlushLock.lock();
+    try {
+      if (closed) {
+        return;
+      }
+      synchronized (updateLock) {
+        if (this.writer != null) {
+          // Close the current writer, get a new one.
+          this.writer.close();
+          Path p = computeFilename(old_filenum);
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("Closing current log writer " + FSUtils.getPath(p));
+          }
+          if (filenum > 0) {
+            synchronized (this.sequenceLock) {
+              this.outputfiles.put(Long.valueOf(this.logSeqNum - 1), p);
+            }
+          }
+        }
+        old_filenum = filenum;
+        filenum = System.currentTimeMillis();
+        Path newPath = computeFilename(filenum);
+        this.writer = SequenceFile.createWriter(this.fs, this.conf, newPath,
+            HLogKey.class, HLogEdit.class, getCompressionType(this.conf));
+        LOG.info("New log writer created at " + FSUtils.getPath(newPath));
+
+        // Can we delete any of the old log files?
+        if (this.outputfiles.size() > 0) {
+          if (this.lastSeqWritten.size() <= 0) {
+            LOG.debug("Last sequence written is empty. Deleting all old hlogs");
+            // If so, then no new writes have come in since all regions were
+            // flushed (and removed from the lastSeqWritten map). Means can
+            // remove all but currently open log file.
+            for (Map.Entry<Long, Path> e : this.outputfiles.entrySet()) {
+              deleteLogFile(e.getValue(), e.getKey());
+            }
+            this.outputfiles.clear();
+          } else {
+            // Get oldest edit/sequence id.  If logs are older than this id,
+            // then safe to remove.
+            Long oldestOutstandingSeqNum =
+              Collections.min(this.lastSeqWritten.values());
+            // Get the set of all log files whose final ID is older than or
+            // equal to the oldest pending region operation
+            TreeSet<Long> sequenceNumbers =
+              new TreeSet<Long>(this.outputfiles.headMap(
+                (Long.valueOf(oldestOutstandingSeqNum.longValue() + 1L))).keySet());
+            // Now remove old log files (if any)
+            if (LOG.isDebugEnabled()) {
+              // Find region associated with oldest key -- helps debugging.
+              byte [] oldestRegion = null;
+              for (Map.Entry<byte [], Long> e: this.lastSeqWritten.entrySet()) {
+                if (e.getValue().longValue() == oldestOutstandingSeqNum.longValue()) {
+                  oldestRegion = e.getKey();
+                  break;
+                }
+              }
+              if (LOG.isDebugEnabled() && sequenceNumbers.size() > 0) {
+                LOG.debug("Found " + sequenceNumbers.size() +
+                  " logs to remove " +
+                  "using oldest outstanding seqnum of " +
+                  oldestOutstandingSeqNum + " from region " + oldestRegion);
+              }
+            }
+            if (sequenceNumbers.size() > 0) {
+              for (Long seq : sequenceNumbers) {
+                deleteLogFile(this.outputfiles.remove(seq), seq);
+              }
+            }
+          }
+        }
+        this.numEntries = 0;
+      }
+    } finally {
+      this.cacheFlushLock.unlock();
+    }
+  }
+  
+  private void deleteLogFile(final Path p, final Long seqno) throws IOException {
+    LOG.info("removing old log file " + FSUtils.getPath(p) +
+      " whose highest sequence/edit id is " + seqno);
+    this.fs.delete(p, true);
+  }
+
+  /**
+   * This is a convenience method that computes a new filename with a given
+   * file-number.
+   */
+  Path computeFilename(final long fn) {
+    return new Path(dir, HLOG_DATFILE + fn);
+  }
+
+  /**
+   * Shut down the log and delete the log directory
+   *
+   * @throws IOException
+   */
+  public void closeAndDelete() throws IOException {
+    close();
+    fs.delete(dir, true);
+  }
+
+  /**
+   * Shut down the log.
+   *
+   * @throws IOException
+   */
+  void close() throws IOException {
+    cacheFlushLock.lock();
+    try {
+      synchronized (updateLock) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("closing log writer in " + this.dir.toString());
+        }
+        this.writer.close();
+        this.closed = true;
+      }
+    } finally {
+      cacheFlushLock.unlock();
+    }
+  }
+
+  /**
+   * Append a set of edits to the log. Log edits are keyed by regionName,
+   * rowname, and log-sequence-id.
+   *
+   * Later, if we sort by these keys, we obtain all the relevant edits for a
+   * given key-range of the HRegion (TODO). Any edits that do not have a
+   * matching {@link HConstants#COMPLETE_CACHEFLUSH} message can be discarded.
+   *
+   * <p>
+   * Logs cannot be restarted once closed, or once the HLog process dies. Each
+   * time the HLog starts, it must create a new log. This means that other
+   * systems should process the log appropriately upon each startup (and prior
+   * to initializing HLog).
+   *
+   * synchronized prevents appends during the completion of a cache flush or for
+   * the duration of a log roll.
+   *
+   * @param regionName
+   * @param tableName
+   * @param row
+   * @param columns
+   * @param timestamp
+   * @throws IOException
+   */
+  void append(byte [] regionName, byte [] tableName,
+      TreeMap<HStoreKey, byte[]> edits)
+  throws IOException {
+    if (closed) {
+      throw new IOException("Cannot append; log is closed");
+    }
+    synchronized (updateLock) {
+      long seqNum[] = obtainSeqNum(edits.size());
+      // The 'lastSeqWritten' map holds the sequence number of the oldest
+      // write for each region. When the cache is flushed, the entry for the
+      // region being flushed is removed if the sequence number of the flush
+      // is greater than or equal to the value in lastSeqWritten.
+      if (!this.lastSeqWritten.containsKey(regionName)) {
+        this.lastSeqWritten.put(regionName, Long.valueOf(seqNum[0]));
+      }
+      int counter = 0;
+      for (Map.Entry<HStoreKey, byte[]> es : edits.entrySet()) {
+        HStoreKey key = es.getKey();
+        HLogKey logKey =
+          new HLogKey(regionName, tableName, key.getRow(), seqNum[counter++]);
+        HLogEdit logEdit =
+          new HLogEdit(key.getColumn(), es.getValue(), key.getTimestamp());
+        try {
+      	  this.writer.append(logKey, logEdit);
+      	} catch (IOException e) {
+          LOG.fatal("Could not append. Requesting close of log", e);
+          requestLogRoll();
+          throw e;
+      	}
+        this.numEntries++;
+      }
+    }
+    if (this.numEntries > this.maxlogentries) {
+        requestLogRoll();
+    }
+  }
+
+  private void requestLogRoll() {
+    if (this.listener != null) {
+      this.listener.logRollRequested();
+    }
+  }
+
+  /** @return How many items have been added to the log */
+  int getNumEntries() {
+    return numEntries;
+  }
+
+  /**
+   * Obtain a log sequence number.
+   */
+  private long obtainSeqNum() {
+    long value;
+    synchronized (sequenceLock) {
+      value = logSeqNum++;
+    }
+    return value;
+  }
+
+  /** @return the number of log files in use */
+  int getNumLogFiles() {
+    return outputfiles.size();
+  }
+
+  /**
+   * Obtain a specified number of sequence numbers
+   *
+   * @param num number of sequence numbers to obtain
+   * @return array of sequence numbers
+   */
+  private long[] obtainSeqNum(int num) {
+    long[] results = new long[num];
+    synchronized (this.sequenceLock) {
+      for (int i = 0; i < num; i++) {
+        results[i] = this.logSeqNum++;
+      }
+    }
+    return results;
+  }
+
+  /**
+   * By acquiring a log sequence ID, we can allow log messages to continue while
+   * we flush the cache.
+   *
+   * Acquire a lock so that we do not roll the log between the start and
+   * completion of a cache-flush. Otherwise the log-seq-id for the flush will
+   * not appear in the correct logfile.
+   *
+   * @return sequence ID to pass {@link #completeCacheFlush(Text, Text, long)}
+   * @see #completeCacheFlush(Text, Text, long)
+   * @see #abortCacheFlush()
+   */
+  long startCacheFlush() {
+    this.cacheFlushLock.lock();
+    return obtainSeqNum();
+  }
+
+  /**
+   * Complete the cache flush
+   *
+   * Protected by cacheFlushLock
+   *
+   * @param regionName
+   * @param tableName
+   * @param logSeqId
+   * @throws IOException
+   */
+  void completeCacheFlush(final byte [] regionName, final byte [] tableName,
+      final long logSeqId) throws IOException {
+
+    try {
+      if (this.closed) {
+        return;
+      }
+      synchronized (updateLock) {
+        this.writer.append(new HLogKey(regionName, tableName, HLog.METAROW, logSeqId),
+            new HLogEdit(HLog.METACOLUMN, HLogEdit.completeCacheFlush.get(),
+                System.currentTimeMillis()));
+        this.numEntries++;
+        Long seq = this.lastSeqWritten.get(regionName);
+        if (seq != null && logSeqId >= seq.longValue()) {
+          this.lastSeqWritten.remove(regionName);
+        }
+      }
+    } finally {
+      this.cacheFlushLock.unlock();
+    }
+  }
+
+  /**
+   * Abort a cache flush.
+   * Call if the flush fails. Note that the only recovery for an aborted flush
+   * currently is a restart of the regionserver so the snapshot content dropped
+   * by the failure gets restored to the memcache.
+   */
+  void abortCacheFlush() {
+    this.cacheFlushLock.unlock();
+  }
+
+  /**
+   * Split up a bunch of log files, that are no longer being written to, into
+   * new files, one per region. Delete the old log files when finished.
+   *
+   * @param rootDir qualified root directory of the HBase instance
+   * @param srcDir Directory of log files to split: e.g.
+   *                <code>${ROOTDIR}/log_HOST_PORT</code>
+   * @param fs FileSystem
+   * @param conf HBaseConfiguration
+   * @throws IOException
+   */
+  public static void splitLog(Path rootDir, Path srcDir, FileSystem fs,
+    Configuration conf) throws IOException {
+    if (!fs.exists(srcDir)) {
+      // Nothing to do
+      return;
+    }
+    FileStatus logfiles[] = fs.listStatus(srcDir);
+    if (logfiles == null || logfiles.length == 0) {
+      // Nothing to do
+      return;
+    }
+    LOG.info("splitting " + logfiles.length + " log(s) in " +
+      srcDir.toString());
+    Map<byte [], SequenceFile.Writer> logWriters =
+      new TreeMap<byte [], SequenceFile.Writer>(Bytes.BYTES_COMPARATOR);
+    try {
+      for (int i = 0; i < logfiles.length; i++) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Splitting " + i + " of " + logfiles.length + ": " +
+            logfiles[i].getPath());
+        }
+        // Check for empty file.
+        if (logfiles[i].getLen() <= 0) {
+          LOG.info("Skipping " + logfiles[i].toString() +
+              " because zero length");
+          continue;
+        }
+        HLogKey key = new HLogKey();
+        HLogEdit val = new HLogEdit();
+        SequenceFile.Reader in =
+          new SequenceFile.Reader(fs, logfiles[i].getPath(), conf);
+        try {
+          int count = 0;
+          for (; in.next(key, val); count++) {
+            byte [] tableName = key.getTablename();
+            byte [] regionName = key.getRegionName();
+            SequenceFile.Writer w = logWriters.get(regionName);
+            if (w == null) {
+              Path logfile = new Path(
+                HRegion.getRegionDir(
+                  HTableDescriptor.getTableDir(rootDir, tableName),
+                  HRegionInfo.encodeRegionName(regionName)),
+                HREGION_OLDLOGFILE_NAME);
+              Path oldlogfile = null;
+              SequenceFile.Reader old = null;
+              if (fs.exists(logfile)) {
+                LOG.warn("Old log file " + logfile +
+                    " already exists. Copying existing file to new file");
+                oldlogfile = new Path(logfile.toString() + ".old");
+                fs.rename(logfile, oldlogfile);
+                old = new SequenceFile.Reader(fs, oldlogfile, conf);
+              }
+              w = SequenceFile.createWriter(fs, conf, logfile, HLogKey.class,
+                HLogEdit.class, getCompressionType(conf));
+              // Use copy of regionName; regionName object is reused inside in
+              // HStoreKey.getRegionName so its content changes as we iterate.
+              logWriters.put(regionName, w);
+              if (LOG.isDebugEnabled()) {
+                LOG.debug("Creating new log file writer for path " + logfile +
+                  " and region " + regionName);
+              }
+              
+              if (old != null) {
+                // Copy from existing log file
+                HLogKey oldkey = new HLogKey();
+                HLogEdit oldval = new HLogEdit();
+                for (; old.next(oldkey, oldval); count++) {
+                  if (LOG.isDebugEnabled() && count > 0 && count % 10000 == 0) {
+                    LOG.debug("Copied " + count + " edits");
+                  }
+                  w.append(oldkey, oldval);
+                }
+                old.close();
+                fs.delete(oldlogfile, true);
+              }
+            }
+            w.append(key, val);
+          }
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("Applied " + count + " total edits from " +
+              logfiles[i].getPath().toString());
+          }
+        } catch (IOException e) {
+          e = RemoteExceptionHandler.checkIOException(e);
+          if (!(e instanceof EOFException)) {
+            LOG.warn("Exception processing " + logfiles[i].getPath() +
+                " -- continuing. Possible DATA LOSS!", e);
+          }
+        } finally {
+          try {
+            in.close();
+          } catch (IOException e) {
+            LOG.warn("Close in finally threw exception -- continuing", e);
+          }
+          // Delete the input file now so we do not replay edits.  We could
+          // have gotten here because of an exception.  If so, probably
+          // nothing we can do about it. Replaying it, it could work but we
+          // could be stuck replaying for ever. Just continue though we
+          // could have lost some edits.
+          fs.delete(logfiles[i].getPath(), true);
+        }
+      }
+    } finally {
+      for (SequenceFile.Writer w : logWriters.values()) {
+        w.close();
+      }
+    }
+
+    try {
+      fs.delete(srcDir, true);
+    } catch (IOException e) {
+      e = RemoteExceptionHandler.checkIOException(e);
+      IOException io = new IOException("Cannot delete: " + srcDir);
+      io.initCause(e);
+      throw io;
+    }
+    LOG.info("log file splitting completed for " + srcDir.toString());
+  }
+
+  private static void usage() {
+    System.err.println("Usage: java org.apache.hbase.HLog" +
+        " {--dump <logfile>... | --split <logdir>...}");
+  }
+
+  /**
+   * Pass one or more log file names and it will either dump out a text version
+   * on <code>stdout</code> or split the specified log files.
+   *
+   * @param args
+   * @throws IOException
+   */
+  public static void main(String[] args) throws IOException {
+    if (args.length < 2) {
+      usage();
+      System.exit(-1);
+    }
+    boolean dump = true;
+    if (args[0].compareTo("--dump") != 0) {
+      if (args[0].compareTo("--split") == 0) {
+        dump = false;
+
+      } else {
+        usage();
+        System.exit(-1);
+      }
+    }
+    Configuration conf = new HBaseConfiguration();
+    FileSystem fs = FileSystem.get(conf);
+    Path baseDir = new Path(conf.get(HBASE_DIR));
+
+    for (int i = 1; i < args.length; i++) {
+      Path logPath = new Path(args[i]);
+      if (!fs.exists(logPath)) {
+        throw new FileNotFoundException(args[i] + " does not exist");
+      }
+      if (dump) {
+        if (!fs.isFile(logPath)) {
+          throw new IOException(args[i] + " is not a file");
+        }
+        Reader log = new SequenceFile.Reader(fs, logPath, conf);
+        try {
+          HLogKey key = new HLogKey();
+          HLogEdit val = new HLogEdit();
+          while (log.next(key, val)) {
+            System.out.println(key.toString() + " " + val.toString());
+          }
+        } finally {
+          log.close();
+        }
+      } else {
+        if (!fs.getFileStatus(logPath).isDir()) {
+          throw new IOException(args[i] + " is not a directory");
+        }
+        splitLog(baseDir, logPath, fs, conf);
+      }
+    }
+  }
+}
--- a/src/java/org/apache/hadoop/hbase/util/migration/v5/HLogEdit.java
+++ b/src/java/org/apache/hadoop/hbase/util/migration/v5/HLogEdit.java
@ -0,0 +1,141 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util.migration.v5;
+
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.*;
+
+import java.io.*;
+
+import org.apache.hadoop.hbase.HConstants;
+
+/**
+ * A log value.
+ *
+ * These aren't sortable; you need to sort by the matching HLogKey.
+ * The table and row are already identified in HLogKey.
+ * This just indicates the column and value.
+ */
+public class HLogEdit implements Writable, HConstants {
+
+  /** Value stored for a deleted item */
+  public static ImmutableBytesWritable deleteBytes = null;
+
+  /** Value written to HLog on a complete cache flush */
+  public static ImmutableBytesWritable completeCacheFlush = null;
+
+  static {
+    try {
+      deleteBytes =
+        new ImmutableBytesWritable("HBASE::DELETEVAL".getBytes(UTF8_ENCODING));
+    
+      completeCacheFlush =
+        new ImmutableBytesWritable("HBASE::CACHEFLUSH".getBytes(UTF8_ENCODING));
+      
+    } catch (UnsupportedEncodingException e) {
+      assert(false);
+    }
+  }
+  
+  /**
+   * @param value
+   * @return True if an entry and its content is {@link #deleteBytes}.
+   */
+  public static boolean isDeleted(final byte [] value) {
+    return (value == null)? false: deleteBytes.compareTo(value) == 0;
+  }
+
+  private byte [] column;
+  private byte [] val;
+  private long timestamp;
+  private static final int MAX_VALUE_LEN = 128;
+
+  /**
+   * Default constructor used by Writable
+   */
+  public HLogEdit() {
+    super();
+  }
+
+  /**
+   * Construct a fully initialized HLogEdit
+   * @param c column name
+   * @param bval value
+   * @param timestamp timestamp for modification
+   */
+  public HLogEdit(byte [] c, byte [] bval, long timestamp) {
+    this.column = c;
+    this.val = bval;
+    this.timestamp = timestamp;
+  }
+
+  /** @return the column */
+  public byte [] getColumn() {
+    return this.column;
+  }
+
+  /** @return the value */
+  public byte [] getVal() {
+    return this.val;
+  }
+
+  /** @return the timestamp */
+  public long getTimestamp() {
+    return this.timestamp;
+  }
+
+  /**
+   * @return First column name, timestamp, and first 128 bytes of the value
+   * bytes as a String.
+   */
+  @Override
+  public String toString() {
+    String value = "";
+    try {
+      value = (this.val.length > MAX_VALUE_LEN)?
+        new String(this.val, 0, MAX_VALUE_LEN, HConstants.UTF8_ENCODING) +
+          "...":
+        new String(getVal(), HConstants.UTF8_ENCODING);
+    } catch (UnsupportedEncodingException e) {
+      throw new RuntimeException("UTF8 encoding not present?", e);
+    }
+    return "(" + Bytes.toString(getColumn()) + "/" + getTimestamp() + "/" +
+      value + ")";
+  }
+  
+  // Writable
+
+  /** {@inheritDoc} */
+  public void write(DataOutput out) throws IOException {
+    Bytes.writeByteArray(out, this.column);
+    out.writeInt(this.val.length);
+    out.write(this.val);
+    out.writeLong(timestamp);
+  }
+  
+  /** {@inheritDoc} */
+  public void readFields(DataInput in) throws IOException {
+    this.column = Bytes.readByteArray(in);
+    this.val = new byte[in.readInt()];
+    in.readFully(this.val);
+    this.timestamp = in.readLong();
+  }
+}
--- a/src/java/org/apache/hadoop/hbase/util/migration/v5/HLogKey.java
+++ b/src/java/org/apache/hadoop/hbase/util/migration/v5/HLogKey.java
@ -0,0 +1,161 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util.migration.v5;
+
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.*;
+
+import java.io.*;
+
+/**
+ * A Key for an entry in the change log.
+ * 
+ * The log intermingles edits to many tables and rows, so each log entry 
+ * identifies the appropriate table and row.  Within a table and row, they're 
+ * also sorted.
+ */
+public class HLogKey implements WritableComparable {
+  private byte [] regionName;
+  private byte [] tablename;
+  private byte [] row;
+  private long logSeqNum;
+
+  /** Create an empty key useful when deserializing */
+  public HLogKey() {
+    this(null, null, null, 0L);
+  }
+  
+  /**
+   * Create the log key!
+   * We maintain the tablename mainly for debugging purposes.
+   * A regionName is always a sub-table object.
+   *
+   * @param regionName  - name of region
+   * @param tablename   - name of table
+   * @param row         - row key
+   * @param logSeqNum   - log sequence number
+   */
+  public HLogKey(final byte [] regionName, final byte [] tablename,
+      final byte [] row, long logSeqNum) {
+    this.regionName = regionName;
+    this.tablename = tablename;
+    this.row = row;
+    this.logSeqNum = logSeqNum;
+  }
+
+  //////////////////////////////////////////////////////////////////////////////
+  // A bunch of accessors
+  //////////////////////////////////////////////////////////////////////////////
+
+  byte [] getRegionName() {
+    return regionName;
+  }
+  
+  byte [] getTablename() {
+    return tablename;
+  }
+  
+  byte [] getRow() {
+    return row;
+  }
+  
+  long getLogSeqNum() {
+    return logSeqNum;
+  }
+  
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public String toString() {
+    return Bytes.toString(tablename) + "/" + Bytes.toString(regionName) + "/" +
+      Bytes.toString(row) + "/" + logSeqNum;
+  }
+  
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public boolean equals(Object obj) {
+    return compareTo(obj) == 0;
+  }
+  
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public int hashCode() {
+    int result = this.regionName.hashCode();
+    result ^= this.row.hashCode(); 
+    result ^= this.logSeqNum;
+    return result;
+  }
+
+  //
+  // Comparable
+  //
+
+  /**
+   * {@inheritDoc}
+   */
+  public int compareTo(Object o) {
+    HLogKey other = (HLogKey) o;
+    int result = Bytes.compareTo(this.regionName, other.regionName);
+    
+    if(result == 0) {
+      result = Bytes.compareTo(this.row, other.row);
+      
+      if(result == 0) {
+        
+        if (this.logSeqNum < other.logSeqNum) {
+          result = -1;
+          
+        } else if (this.logSeqNum > other.logSeqNum) {
+          result = 1;
+        }
+      }
+    }
+    return result;
+  }
+
+  //
+  // Writable
+  //
+
+  /**
+   * {@inheritDoc}
+   */
+  public void write(DataOutput out) throws IOException {
+    Bytes.writeByteArray(out, this.regionName);
+    Bytes.writeByteArray(out, this.tablename);
+    Bytes.writeByteArray(out, this.row);
+    out.writeLong(logSeqNum);
+  }
+  
+  /**
+   * {@inheritDoc}
+   */
+  public void readFields(DataInput in) throws IOException {
+    this.regionName = Bytes.readByteArray(in);
+    this.tablename = Bytes.readByteArray(in);
+    this.row = Bytes.readByteArray(in);
+    this.logSeqNum = in.readLong();
+  }
+}
--- a/src/java/org/apache/hadoop/hbase/util/migration/v5/HRegion.java
+++ b/src/java/org/apache/hadoop/hbase/util/migration/v5/HRegion.java
--- a/src/java/org/apache/hadoop/hbase/util/migration/v5/HRegionInfo.java
+++ b/src/java/org/apache/hadoop/hbase/util/migration/v5/HRegionInfo.java
@ -0,0 +1,451 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util.migration.v5;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.JenkinsHash;
+import org.apache.hadoop.io.WritableComparable;
+
+/**
+ * HRegion information.
+ * Contains HRegion id, start and end keys, a reference to this
+ * HRegions' table descriptor, etc.
+ * 
+ * <p>This class has been modified so it instantiates using pre-v5 versions of
+ * the HTableDescriptor, etc: i.e. it will uses classes that in this
+ * migration v0_2 package.
+ */
+public class HRegionInfo implements WritableComparable {
+  /**
+   * @param regionName
+   * @return the encodedName
+   */
+  public static int encodeRegionName(final byte [] regionName) {
+    return Math.abs(JenkinsHash.hash(regionName, regionName.length, 0));
+  }
+
+  /** delimiter used between portions of a region name */
+  public static final int DELIMITER = ',';
+
+  /** HRegionInfo for root region */
+  public static final HRegionInfo ROOT_REGIONINFO =
+    new HRegionInfo(0L, HTableDescriptor.ROOT_TABLEDESC);
+
+  /** HRegionInfo for first meta region */
+  public static final HRegionInfo FIRST_META_REGIONINFO =
+    new HRegionInfo(1L, HTableDescriptor.META_TABLEDESC);
+
+  /**
+   * Extracts table name prefix from a metaregion row name.
+   * @param regionName A metaregion row name.
+   * @return The table prefix of a region name.
+   */
+  public static byte [] getTableNameFromRegionName(final byte [] regionName) {
+    return parseMetaRegionRow(regionName).get(0);
+  }
+
+  /**
+   * Parses passed metaregion row into its constituent parts.
+   * Presumes region names are ASCII characters only.
+   * @param regionName A metaregion row name.
+   * @return A list where first element is the tablename, second the row
+   * portion, and the third the id.
+   */
+  public static List<byte []> parseMetaRegionRow(final byte [] regionName) {
+    int offset = -1;
+    for (int i = 0; i < regionName.length; i++) {
+      if (regionName[i] == DELIMITER) {
+        offset = i;
+        break;
+      }
+    }
+    if (offset == -1) {
+      throw new IllegalArgumentException(Bytes.toString(regionName) +
+        " does not contain '" + DELIMITER + "' character");
+    }
+    byte [] tableName = new byte[offset];
+    System.arraycopy(regionName, 0, tableName, 0, offset);
+    // Now move in from the tail till we hit DELIMITER to find the id
+    offset = -1;
+    for (int i = regionName.length - 1; i > tableName.length; i--) {
+      if (regionName[i] == DELIMITER) {
+        offset = i;
+        break;
+      }
+    }
+    if (offset == -1) {
+      throw new IllegalArgumentException(Bytes.toString(regionName) +
+          " does not have parseable tail");
+    }
+    byte [] row = new byte[offset - (tableName.length + 1)];
+    System.arraycopy(regionName, tableName.length + 1, row, 0,
+      offset - (tableName.length + 1));
+    byte [] id = new byte[regionName.length - (offset + 1)];
+    System.arraycopy(regionName, offset + 1, id, 0,
+      regionName.length - (offset + 1));
+    // Now make up an array to hold the three parse pieces.
+    List<byte []> result = new ArrayList<byte []>(3);
+    result.add(tableName);
+    result.add(row);
+    result.add(id);
+    return result;
+  }
+
+  private byte [] endKey = HConstants.EMPTY_BYTE_ARRAY;
+  private boolean offLine = false;
+  private long regionId = -1;
+  private byte [] regionName = HConstants.EMPTY_BYTE_ARRAY;
+  private String regionNameStr = "";
+  private boolean split = false;
+  private byte [] startKey = HConstants.EMPTY_BYTE_ARRAY;
+  protected HTableDescriptor tableDesc = null;
+  private int hashCode = -1;
+  public static final int NO_HASH = -1;
+  private volatile int encodedName = NO_HASH;
+  
+  private void setHashCode() {
+    int result = this.regionName.hashCode();
+    result ^= this.regionId;
+    result ^= this.startKey.hashCode();
+    result ^= this.endKey.hashCode();
+    result ^= Boolean.valueOf(this.offLine).hashCode();
+    result ^= this.tableDesc.hashCode();
+    this.hashCode = result;
+  }
+  
+  /**
+   * Private constructor used constructing HRegionInfo for the catalog root and
+   * first meta regions
+   */
+  private HRegionInfo(long regionId, HTableDescriptor tableDesc) {
+    this.regionId = regionId;
+    this.tableDesc = tableDesc;
+    this.regionName = createRegionName(tableDesc.getName(), null, regionId);
+    this.regionNameStr = Bytes.toString(this.regionName);
+    setHashCode();
+  }
+
+  /** Default constructor - creates empty object */
+  public HRegionInfo() {
+    this.tableDesc = new HTableDescriptor();
+  }
+  
+  /**
+   * Construct HRegionInfo with explicit parameters
+   * 
+   * @param tableDesc the table descriptor
+   * @param startKey first key in region
+   * @param endKey end of key range
+   * @throws IllegalArgumentException
+   */
+  public HRegionInfo(final HTableDescriptor tableDesc, final byte [] startKey,
+      final byte [] endKey)
+  throws IllegalArgumentException {
+    this(tableDesc, startKey, endKey, false);
+  }
+
+  /**
+   * Construct HRegionInfo with explicit parameters
+   * 
+   * @param tableDesc the table descriptor
+   * @param startKey first key in region
+   * @param endKey end of key range
+   * @param split true if this region has split and we have daughter regions
+   * regions that may or may not hold references to this region.
+   * @throws IllegalArgumentException
+   */
+  public HRegionInfo(HTableDescriptor tableDesc, final byte [] startKey,
+      final byte [] endKey, final boolean split)
+  throws IllegalArgumentException {
+    this(tableDesc, startKey, endKey, split, System.currentTimeMillis());
+  }
+
+  /**
+   * Construct HRegionInfo with explicit parameters
+   * 
+   * @param tableDesc the table descriptor
+   * @param startKey first key in region
+   * @param endKey end of key range
+   * @param split true if this region has split and we have daughter regions
+   * regions that may or may not hold references to this region.
+   * @param regionid Region id to use.
+   * @throws IllegalArgumentException
+   */
+  public HRegionInfo(HTableDescriptor tableDesc, final byte [] startKey,
+    final byte [] endKey, final boolean split, final long regionid)
+  throws IllegalArgumentException {
+    if (tableDesc == null) {
+      throw new IllegalArgumentException("tableDesc cannot be null");
+    }
+    this.offLine = false;
+    this.regionId = regionid;
+    this.regionName = createRegionName(tableDesc.getName(), startKey, regionId);
+    this.regionNameStr = Bytes.toString(this.regionName);
+    this.split = split;
+    this.endKey = endKey == null? HConstants.EMPTY_END_ROW: endKey.clone();
+    this.startKey = startKey == null?
+      HConstants.EMPTY_START_ROW: startKey.clone();
+    this.tableDesc = tableDesc;
+    setHashCode();
+  }
+  
+  /**
+   * Costruct a copy of another HRegionInfo
+   * 
+   * @param other
+   */
+  public HRegionInfo(HRegionInfo other) {
+    this.endKey = other.getEndKey();
+    this.offLine = other.isOffline();
+    this.regionId = other.getRegionId();
+    this.regionName = other.getRegionName();
+    this.regionNameStr = Bytes.toString(this.regionName);
+    this.split = other.isSplit();
+    this.startKey = other.getStartKey();
+    this.tableDesc = other.getTableDesc();
+    this.hashCode = other.hashCode();
+    this.encodedName = other.getEncodedName();
+  }
+  
+  private static byte [] createRegionName(final byte [] tableName,
+      final byte [] startKey, final long regionid) {
+    return createRegionName(tableName, startKey, Long.toString(regionid));
+  }
+
+  /**
+   * Make a region name of passed parameters.
+   * @param tableName
+   * @param startKey Can be null
+   * @param id Region id.
+   * @return Region name made of passed tableName, startKey and id
+   */
+  public static byte [] createRegionName(final byte [] tableName,
+      final byte [] startKey, final String id) {
+    return createRegionName(tableName, startKey, Bytes.toBytes(id));
+  }
+  /**
+   * Make a region name of passed parameters.
+   * @param tableName
+   * @param startKey Can be null
+   * @param id Region id
+   * @return Region name made of passed tableName, startKey and id
+   */
+  public static byte [] createRegionName(final byte [] tableName,
+      final byte [] startKey, final byte [] id) {
+    byte [] b = new byte [tableName.length + 2 + id.length +
+       (startKey == null? 0: startKey.length)];
+    int offset = tableName.length;
+    System.arraycopy(tableName, 0, b, 0, offset);
+    b[offset++] = DELIMITER;
+    if (startKey != null && startKey.length > 0) {
+      System.arraycopy(startKey, 0, b, offset, startKey.length);
+      offset += startKey.length;
+    }
+    b[offset++] = DELIMITER;
+    System.arraycopy(id, 0, b, offset, id.length);
+    return b;
+  }
+  
+  /** @return the endKey */
+  public byte [] getEndKey(){
+    return endKey;
+  }
+
+  /** @return the regionId */
+  public long getRegionId(){
+    return regionId;
+  }
+
+  /**
+   * @return the regionName as an array of bytes.
+   * @see #getRegionNameAsString()
+   */
+  public byte [] getRegionName(){
+    return regionName;
+  }
+
+  /**
+   * @return Region name as a String for use in logging, etc.
+   */
+  public String getRegionNameAsString() {
+    return this.regionNameStr;
+  }
+  
+  /** @return the encoded region name */
+  public synchronized int getEncodedName() {
+    if (this.encodedName == NO_HASH) {
+      this.encodedName = encodeRegionName(this.regionName);
+    }
+    return this.encodedName;
+  }
+
+  /** @return the startKey */
+  public byte [] getStartKey(){
+    return startKey;
+  }
+
+  /** @return the tableDesc */
+  public HTableDescriptor getTableDesc(){
+    return tableDesc;
+  }
+
+  /** @return true if this is the root region */
+  public boolean isRootRegion() {
+    return this.tableDesc.isRootRegion();
+  }
+  
+  /** @return true if this is the meta table */
+  public boolean isMetaTable() {
+    return this.tableDesc.isMetaTable();
+  }
+
+  /** @return true if this region is a meta region */
+  public boolean isMetaRegion() {
+    return this.tableDesc.isMetaRegion();
+  }
+  
+  /**
+   * @return True if has been split and has daughters.
+   */
+  public boolean isSplit() {
+    return this.split;
+  }
+  
+  /**
+   * @param split set split status
+   */
+  public void setSplit(boolean split) {
+    this.split = split;
+  }
+
+  /**
+   * @return True if this region is offline.
+   */
+  public boolean isOffline() {
+    return this.offLine;
+  }
+
+  /**
+   * @param offLine set online - offline status
+   */
+  public void setOffline(boolean offLine) {
+    this.offLine = offLine;
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public String toString() {
+    return "REGION => {" + HConstants.NAME + " => '" +
+      this.regionNameStr +
+      "', STARTKEY => '" +
+      Bytes.toString(this.startKey) + "', ENDKEY => '" +
+      Bytes.toString(this.endKey) + 
+      "', ENCODED => " + getEncodedName() + "," +
+      (isOffline()? " OFFLINE => true,": "") + (isSplit()? " SPLIT => true,": "") +
+      " TABLE => {" + this.tableDesc.toString() + "}";
+  }
+    
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public boolean equals(Object o) {
+    return this.compareTo(o) == 0;
+  }
+  
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public int hashCode() {
+    return this.hashCode;
+  }
+
+  //
+  // Writable
+  //
+
+  /**
+   * {@inheritDoc}
+   */
+  public void write(DataOutput out) throws IOException {
+    Bytes.writeByteArray(out, endKey);
+    out.writeBoolean(offLine);
+    out.writeLong(regionId);
+    Bytes.writeByteArray(out, regionName);
+    out.writeBoolean(split);
+    Bytes.writeByteArray(out, startKey);
+    tableDesc.write(out);
+    out.writeInt(hashCode);
+  }
+  
+  /**
+   * {@inheritDoc}
+   */
+  public void readFields(DataInput in) throws IOException {
+    this.endKey = Bytes.readByteArray(in);
+    this.offLine = in.readBoolean();
+    this.regionId = in.readLong();
+    this.regionName = Bytes.readByteArray(in);
+    this.regionNameStr = Bytes.toString(this.regionName);
+    this.split = in.readBoolean();
+    this.startKey = Bytes.readByteArray(in);
+    this.tableDesc.readFields(in);
+    this.hashCode = in.readInt();
+  }
+  
+  //
+  // Comparable
+  //
+  
+  /**
+   * {@inheritDoc}
+   */
+  public int compareTo(Object o) {
+    HRegionInfo other = (HRegionInfo) o;
+    if (other == null) {
+      return 1;
+    }
+    
+    // Are regions of same table?
+    int result = this.tableDesc.compareTo(other.tableDesc);
+    if (result != 0) {
+      return result;
+    }
+
+    // Compare start keys.
+    result = Bytes.compareTo(this.startKey, other.startKey);
+    if (result != 0) {
+      return result;
+    }
+    
+    // Compare end keys.
+    return Bytes.compareTo(this.endKey, other.endKey);
+  }
+}
--- a/src/java/org/apache/hadoop/hbase/util/migration/v5/HStore.java
+++ b/src/java/org/apache/hadoop/hbase/util/migration/v5/HStore.java
--- a/src/java/org/apache/hadoop/hbase/util/migration/v5/HStoreFile.java
+++ b/src/java/org/apache/hadoop/hbase/util/migration/v5/HStoreFile.java
--- a/src/java/org/apache/hadoop/hbase/util/migration/v5/HStoreScanner.java
+++ b/src/java/org/apache/hadoop/hbase/util/migration/v5/HStoreScanner.java
@ -0,0 +1,269 @@
+/**
+ * Copyright 2008 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util.migration.v5;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HStoreKey;
+import org.apache.hadoop.hbase.filter.RowFilterInterface;
+import org.apache.hadoop.hbase.io.Cell;
+import org.apache.hadoop.hbase.regionserver.InternalScanner;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * Scanner scans both the memcache and the HStore
+ */
+class HStoreScanner implements InternalScanner {
+  static final Log LOG = LogFactory.getLog(HStoreScanner.class);
+
+  private InternalScanner[] scanners;
+  private TreeMap<byte [], Cell>[] resultSets;
+  private HStoreKey[] keys;
+  private boolean wildcardMatch = false;
+  private boolean multipleMatchers = false;
+  private RowFilterInterface dataFilter;
+  private HStore store;
+  
+  /** Create an Scanner with a handle on the memcache and HStore files. */
+  @SuppressWarnings("unchecked")
+  HStoreScanner(HStore store, byte [][] targetCols, byte [] firstRow,
+    long timestamp, RowFilterInterface filter) 
+  throws IOException {
+    this.store = store;
+    this.dataFilter = filter;
+    if (null != dataFilter) {
+      dataFilter.reset();
+    }
+    this.scanners = new InternalScanner[2];
+    this.resultSets = new TreeMap[scanners.length];
+    this.keys = new HStoreKey[scanners.length];
+
+    try {
+      scanners[0] = store.memcache.getScanner(timestamp, targetCols, firstRow);
+      scanners[1] = new StoreFileScanner(store, timestamp, targetCols, firstRow);
+      for (int i = 0; i < scanners.length; i++) {
+        if (scanners[i].isWildcardScanner()) {
+          this.wildcardMatch = true;
+        }
+        if (scanners[i].isMultipleMatchScanner()) {
+          this.multipleMatchers = true;
+        }
+      }
+    } catch(IOException e) {
+      for (int i = 0; i < this.scanners.length; i++) {
+        if(scanners[i] != null) {
+          closeScanner(i);
+        }
+      }
+      throw e;
+    }
+    
+    // Advance to the first key in each scanner.
+    // All results will match the required column-set and scanTime.
+    for (int i = 0; i < scanners.length; i++) {
+      keys[i] = new HStoreKey();
+      resultSets[i] = new TreeMap<byte [], Cell>(Bytes.BYTES_COMPARATOR);
+      if(scanners[i] != null && !scanners[i].next(keys[i], resultSets[i])) {
+        closeScanner(i);
+      }
+    }
+  }
+
+  /** @return true if the scanner is a wild card scanner */
+  public boolean isWildcardScanner() {
+    return wildcardMatch;
+  }
+
+  /** @return true if the scanner is a multiple match scanner */
+  public boolean isMultipleMatchScanner() {
+    return multipleMatchers;
+  }
+
+  /** {@inheritDoc} */
+  public boolean next(HStoreKey key, SortedMap<byte [], Cell> results)
+    throws IOException {
+
+    // Filtered flag is set by filters.  If a cell has been 'filtered out'
+    // -- i.e. it is not to be returned to the caller -- the flag is 'true'.
+    boolean filtered = true;
+    boolean moreToFollow = true;
+    while (filtered && moreToFollow) {
+      // Find the lowest-possible key.
+      byte [] chosenRow = null;
+      long chosenTimestamp = -1;
+      for (int i = 0; i < this.keys.length; i++) {
+        if (scanners[i] != null &&
+            (chosenRow == null ||
+            (Bytes.compareTo(keys[i].getRow(), chosenRow) < 0) ||
+            ((Bytes.compareTo(keys[i].getRow(), chosenRow) == 0) &&
+            (keys[i].getTimestamp() > chosenTimestamp)))) {
+          chosenRow = keys[i].getRow();
+          chosenTimestamp = keys[i].getTimestamp();
+        }
+      }
+      
+      // Filter whole row by row key?
+      filtered = dataFilter != null? dataFilter.filterRowKey(chosenRow) : false;
+
+      // Store the key and results for each sub-scanner. Merge them as
+      // appropriate.
+      if (chosenTimestamp >= 0 && !filtered) {
+        // Here we are setting the passed in key with current row+timestamp
+        key.setRow(chosenRow);
+        key.setVersion(chosenTimestamp);
+        key.setColumn(HConstants.EMPTY_BYTE_ARRAY);
+        // Keep list of deleted cell keys within this row.  We need this
+        // because as we go through scanners, the delete record may be in an
+        // early scanner and then the same record with a non-delete, non-null
+        // value in a later. Without history of what we've seen, we'll return
+        // deleted values. This List should not ever grow too large since we
+        // are only keeping rows and columns that match those set on the
+        // scanner and which have delete values.  If memory usage becomes a
+        // problem, could redo as bloom filter.
+        List<HStoreKey> deletes = new ArrayList<HStoreKey>();
+        for (int i = 0; i < scanners.length && !filtered; i++) {
+          while ((scanners[i] != null
+              && !filtered
+              && moreToFollow)
+              && (Bytes.compareTo(keys[i].getRow(), chosenRow) == 0)) {
+            // If we are doing a wild card match or there are multiple
+            // matchers per column, we need to scan all the older versions of 
+            // this row to pick up the rest of the family members
+            if (!wildcardMatch
+                && !multipleMatchers
+                && (keys[i].getTimestamp() != chosenTimestamp)) {
+              break;
+            }
+
+            // NOTE: We used to do results.putAll(resultSets[i]);
+            // but this had the effect of overwriting newer
+            // values with older ones. So now we only insert
+            // a result if the map does not contain the key.
+            HStoreKey hsk = new HStoreKey(key.getRow(), HConstants.EMPTY_BYTE_ARRAY,
+              key.getTimestamp());
+            for (Map.Entry<byte [], Cell> e : resultSets[i].entrySet()) {
+              hsk.setColumn(e.getKey());
+              if (HLogEdit.isDeleted(e.getValue().getValue())) {
+                if (!deletes.contains(hsk)) {
+                  // Key changes as we cycle the for loop so add a copy to
+                  // the set of deletes.
+                  deletes.add(new HStoreKey(hsk));
+                }
+              } else if (!deletes.contains(hsk) &&
+                  !filtered &&
+                  moreToFollow &&
+                  !results.containsKey(e.getKey())) {
+                if (dataFilter != null) {
+                  // Filter whole row by column data?
+                  filtered = dataFilter.filterColumn(chosenRow, e.getKey(),
+                      e.getValue().getValue());
+                  if (filtered) {
+                    results.clear();
+                    break;
+                  }
+                }
+                results.put(e.getKey(), e.getValue());
+              }
+            }
+            resultSets[i].clear();
+            if (!scanners[i].next(keys[i], resultSets[i])) {
+              closeScanner(i);
+            }
+          }
+        }          
+      }
+      
+      for (int i = 0; i < scanners.length; i++) {
+        // If the current scanner is non-null AND has a lower-or-equal
+        // row label, then its timestamp is bad. We need to advance it.
+        while ((scanners[i] != null) &&
+            (Bytes.compareTo(keys[i].getRow(), chosenRow) <= 0)) {
+          resultSets[i].clear();
+          if (!scanners[i].next(keys[i], resultSets[i])) {
+            closeScanner(i);
+          }
+        }
+      }
+
+      moreToFollow = chosenTimestamp >= 0;
+      
+      if (dataFilter != null) {
+        if (dataFilter.filterAllRemaining()) {
+          moreToFollow = false;
+        }
+      }
+      
+      if (results.size() <= 0 && !filtered) {
+        // There were no results found for this row.  Marked it as 
+        // 'filtered'-out otherwise we will not move on to the next row.
+        filtered = true;
+      }
+    }
+    
+    // If we got no results, then there is no more to follow.
+    if (results == null || results.size() <= 0) {
+      moreToFollow = false;
+    }
+    
+    // Make sure scanners closed if no more results
+    if (!moreToFollow) {
+      for (int i = 0; i < scanners.length; i++) {
+        if (null != scanners[i]) {
+          closeScanner(i);
+        }
+      }
+    }
+    
+    return moreToFollow;
+  }
+  
+  /** Shut down a single scanner */
+  void closeScanner(int i) {
+    try {
+      try {
+        scanners[i].close();
+      } catch (IOException e) {
+        LOG.warn(store.storeName + " failed closing scanner " + i, e);
+      }
+    } finally {
+      scanners[i] = null;
+      keys[i] = null;
+      resultSets[i] = null;
+    }
+  }
+
+  /** {@inheritDoc} */
+  public void close() {
+    for(int i = 0; i < scanners.length; i++) {
+      if(scanners[i] != null) {
+        closeScanner(i);
+      }
+    }
+  }
+}
--- a/src/java/org/apache/hadoop/hbase/util/migration/v5/HTableDescriptor.java
+++ b/src/java/org/apache/hadoop/hbase/util/migration/v5/HTableDescriptor.java
@ -0,0 +1,328 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util.migration.v5;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HStoreKey;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.WritableComparable;
+
+/**
+ * HTableDescriptor contains the name of an HTable, and its
+ * column families.
+ */
+public class HTableDescriptor implements WritableComparable {
+  /** Table descriptor for <core>-ROOT-</code> catalog table */
+  public static final HTableDescriptor ROOT_TABLEDESC = new HTableDescriptor(
+      HConstants.ROOT_TABLE_NAME,
+      new HColumnDescriptor[] { new HColumnDescriptor(HConstants.COLUMN_FAMILY,
+          1, HColumnDescriptor.CompressionType.NONE, false, false,
+          Integer.MAX_VALUE, HConstants.FOREVER, false) });
+  
+  /** Table descriptor for <code>.META.</code> catalog table */
+  public static final HTableDescriptor META_TABLEDESC = new HTableDescriptor(
+      HConstants.META_TABLE_NAME, new HColumnDescriptor[] {
+          new HColumnDescriptor(HConstants.COLUMN_FAMILY, 1,
+              HColumnDescriptor.CompressionType.NONE, false, false,
+              Integer.MAX_VALUE, HConstants.FOREVER, false),
+          new HColumnDescriptor(HConstants.COLUMN_FAMILY_HISTORIAN,
+              HConstants.ALL_VERSIONS, HColumnDescriptor.CompressionType.NONE,
+              false, false, Integer.MAX_VALUE, HConstants.FOREVER, false) });
+  
+  private boolean rootregion = false;
+  private boolean metaregion = false;
+  private byte [] name = HConstants.EMPTY_BYTE_ARRAY;
+  private String nameAsString = "";
+  
+  public static final String FAMILIES = "FAMILIES";
+  
+  // Key is hash of the family name.
+  private final Map<Integer, HColumnDescriptor> families =
+    new HashMap<Integer, HColumnDescriptor>();
+
+  /**
+   * Private constructor used internally creating table descriptors for 
+   * catalog tables: e.g. .META. and -ROOT-.
+   */
+  private HTableDescriptor(final byte [] name, HColumnDescriptor[] families) {
+    this.name = name.clone();
+    setMetaFlags(name);
+    for(HColumnDescriptor descriptor : families) {
+      this.families.put(Bytes.mapKey(descriptor.getName()), descriptor);
+    }
+  }
+
+  /**
+   * Constructs an empty object.
+   * For deserializing an HTableDescriptor instance only.
+   * @see #HTableDescriptor(byte[])
+   */
+  public HTableDescriptor() {
+    super();
+  }
+
+  /**
+   * Constructor.
+   * @param name Table name.
+   * @throws IllegalArgumentException if passed a table name
+   * that is made of other than 'word' characters, underscore or period: i.e.
+   * <code>[a-zA-Z_0-9.].
+   * @see <a href="HADOOP-1581">HADOOP-1581 HBASE: Un-openable tablename bug</a>
+   */
+  public HTableDescriptor(final String name) {
+    this(Bytes.toBytes(name));
+  }
+
+  /**
+   * Constructor.
+   * @param name Table name.
+   * @throws IllegalArgumentException if passed a table name
+   * that is made of other than 'word' characters, underscore or period: i.e.
+   * <code>[a-zA-Z_0-9.].
+   * @see <a href="HADOOP-1581">HADOOP-1581 HBASE: Un-openable tablename bug</a>
+   */
+  public HTableDescriptor(final byte [] name) {
+    setMetaFlags(name);
+    this.name = this.metaregion? name: isLegalTableName(name);
+    this.nameAsString = Bytes.toString(this.name);
+  }
+
+  /*
+   * Set meta flags on this table.
+   * Called by constructors.
+   * @param name
+   */
+  private void setMetaFlags(final byte [] name) {
+    this.rootregion = Bytes.equals(name, HConstants.ROOT_TABLE_NAME);
+    this.metaregion =
+      this.rootregion? true: Bytes.equals(name, HConstants.META_TABLE_NAME);
+  }
+  
+  /**
+   * Check passed buffer is legal user-space table name.
+   * @param b Table name.
+   * @return Returns passed <code>b</code> param
+   * @throws NullPointerException If passed <code>b</code> is null
+   * @throws IllegalArgumentException if passed a table name
+   * that is made of other than 'word' characters or underscores: i.e.
+   * <code>[a-zA-Z_0-9].
+   */
+  public static byte [] isLegalTableName(final byte [] b) {
+    if (b == null || b.length <= 0) {
+      throw new IllegalArgumentException("Name is null or empty");
+    }
+    for (int i = 0; i < b.length; i++) {
+      if (Character.isLetterOrDigit(b[i]) || b[i] == '_') {
+        continue;
+      }
+      throw new IllegalArgumentException("Illegal character <" + b[i] + ">. " +
+        "User-space table names can only contain 'word characters':" +
+        "i.e. [a-zA-Z_0-9]: " + Bytes.toString(b));
+    }
+    return b;
+  }
+
+  /** @return true if this is the root region */
+  public boolean isRootRegion() {
+    return rootregion;
+  }
+  
+  /** @return true if table is the meta table */
+  public boolean isMetaTable() {
+    return metaregion && !rootregion;
+  }
+  
+  /** @return true if this is a meta region (part of the root or meta tables) */
+  public boolean isMetaRegion() {
+    return metaregion;
+  }
+
+  /** @return name of table */
+  public byte [] getName() {
+    return name;
+  }
+
+  /** @return name of table */
+  public String getNameAsString() {
+    return this.nameAsString;
+  }
+
+  /**
+   * Adds a column family.
+   * @param family HColumnDescriptor of familyto add.
+   */
+  public void addFamily(final HColumnDescriptor family) {
+    if (family.getName() == null || family.getName().length <= 0) {
+      throw new NullPointerException("Family name cannot be null or empty");
+    }
+    this.families.put(Bytes.mapKey(family.getName()), family);
+  }
+
+  /**
+   * Checks to see if this table contains the given column family
+   * @param c Family name or column name.
+   * @return true if the table contains the specified family name
+   */
+  public boolean hasFamily(final byte [] c) {
+    return hasFamily(c, HStoreKey.getFamilyDelimiterIndex(c));
+  }
+
+  /**
+   * Checks to see if this table contains the given column family
+   * @param c Family name or column name.
+   * @param index Index to column family delimiter
+   * @return true if the table contains the specified family name
+   */
+  public boolean hasFamily(final byte [] c, final int index) {
+    // If index is -1, then presume we were passed a column family name minus
+    // the colon delimiter.
+    return families.containsKey(Bytes.mapKey(c, index == -1? c.length: index));
+  }
+
+  /**
+   * @return Name of this table and then a map of all of the column family
+   * descriptors.
+   * @see #getNameAsString()
+   */
+  @Override
+  public String toString() {
+    return HConstants.NAME + " => '" + Bytes.toString(this.name) +
+      "', " + FAMILIES + " => " + this.families.values();
+  }
+  
+  /** {@inheritDoc} */
+  @Override
+  public boolean equals(Object obj) {
+    return compareTo(obj) == 0;
+  }
+  
+  /** {@inheritDoc} */
+  @Override
+  public int hashCode() {
+    // TODO: Cache.
+    int result = Bytes.hashCode(this.name);
+    if (this.families != null && this.families.size() > 0) {
+      for (HColumnDescriptor e: this.families.values()) {
+        result ^= e.hashCode();
+      }
+    }
+    return result;
+  }
+  
+  // Writable
+
+  /** {@inheritDoc} */
+  public void write(DataOutput out) throws IOException {
+    out.writeBoolean(rootregion);
+    out.writeBoolean(metaregion);
+    Bytes.writeByteArray(out, name);
+    out.writeInt(families.size());
+    for(Iterator<HColumnDescriptor> it = families.values().iterator();
+        it.hasNext(); ) {
+      it.next().write(out);
+    }
+  }
+
+  /** {@inheritDoc} */
+  public void readFields(DataInput in) throws IOException {
+    this.rootregion = in.readBoolean();
+    this.metaregion = in.readBoolean();
+    this.name = Bytes.readByteArray(in);
+    this.nameAsString = Bytes.toString(this.name);
+    int numCols = in.readInt();
+    this.families.clear();
+    for (int i = 0; i < numCols; i++) {
+      HColumnDescriptor c = new HColumnDescriptor();
+      c.readFields(in);
+      this.families.put(Bytes.mapKey(c.getName()), c);
+    }
+  }
+
+  // Comparable
+
+  /** {@inheritDoc} */
+  public int compareTo(Object o) {
+    HTableDescriptor other = (HTableDescriptor) o;
+    int result = Bytes.compareTo(this.name, other.name);
+    if (result == 0) {
+      result = families.size() - other.families.size();
+    }
+    
+    if (result == 0 && families.size() != other.families.size()) {
+      result = Integer.valueOf(families.size()).compareTo(
+          Integer.valueOf(other.families.size()));
+    }
+    
+    if (result == 0) {
+      for (Iterator<HColumnDescriptor> it = families.values().iterator(),
+          it2 = other.families.values().iterator(); it.hasNext(); ) {
+        result = it.next().compareTo(it2.next());
+        if (result != 0) {
+          break;
+        }
+      }
+    }
+    return result;
+  }
+
+  /**
+   * @return Immutable sorted map of families.
+   */
+  public Collection<HColumnDescriptor> getFamilies() {
+    return Collections.unmodifiableCollection(this.families.values());
+  }
+
+  /**
+   * @param column
+   * @return Column descriptor for the passed family name or the family on
+   * passed in column.
+   */
+  public HColumnDescriptor getFamily(final byte [] column) {
+    return this.families.get(HStoreKey.getFamilyMapKey(column));
+  }
+
+  /**
+   * @param column
+   * @return Column descriptor for the passed family name or the family on
+   * passed in column.
+   */
+  public HColumnDescriptor removeFamily(final byte [] column) {
+    return this.families.remove(HStoreKey.getFamilyMapKey(column));
+  }
+
+  /**
+   * @param rootdir qualified path of HBase root directory
+   * @param tableName name of table
+   * @return path for table
+   */
+  public static Path getTableDir(Path rootdir, final byte [] tableName) {
+    return new Path(rootdir, Bytes.toString(tableName));
+  }
+}
--- a/src/java/org/apache/hadoop/hbase/util/migration/v5/LogRollListener.java
+++ b/src/java/org/apache/hadoop/hbase/util/migration/v5/LogRollListener.java
@ -0,0 +1,29 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util.migration.v5;
+
+/**
+ * Mechanism by which the HLog requests a log roll
+ */
+public interface LogRollListener {
+  /** Request that the log be rolled */
+  public void logRollRequested();
+}
--- a/src/java/org/apache/hadoop/hbase/util/migration/v5/Memcache.java
+++ b/src/java/org/apache/hadoop/hbase/util/migration/v5/Memcache.java
@ -0,0 +1,760 @@
+/**
+ * Copyright 2008 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util.migration.v5;
+
+import java.io.IOException;
+import java.rmi.UnexpectedException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HStoreKey;
+import org.apache.hadoop.hbase.io.Cell;
+import org.apache.hadoop.hbase.regionserver.HAbstractScanner;
+import org.apache.hadoop.hbase.regionserver.InternalScanner;
+import org.apache.hadoop.hbase.util.Bytes;
+
+
+/**
+ * The Memcache holds in-memory modifications to the HRegion.
+ * Keeps a current map.  When asked to flush the map, current map is moved
+ * to snapshot and is cleared.  We continue to serve edits out of new map
+ * and backing snapshot until flusher reports in that the flush succeeded. At
+ * this point we let the snapshot go.
+ */
+class Memcache {
+  private final Log LOG = LogFactory.getLog(this.getClass().getName());
+  
+  private long ttl;
+
+  // Note that since these structures are always accessed with a lock held,
+  // so no additional synchronization is required.
+  
+  // The currently active sorted map of edits.
+  private volatile SortedMap<HStoreKey, byte[]> memcache =
+    createSynchronizedSortedMap();
+
+  // Snapshot of memcache.  Made for flusher.
+  private volatile SortedMap<HStoreKey, byte[]> snapshot =
+    createSynchronizedSortedMap();
+
+  private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
+
+  /**
+   * Default constructor. Used for tests.
+   */
+  public Memcache()
+  {
+    ttl = HConstants.FOREVER;
+  }
+
+  /**
+   * Constructor.
+   * @param ttl The TTL for cache entries, in milliseconds.
+   */
+  public Memcache(long ttl) {
+    this.ttl = ttl;
+  }
+
+  /*
+   * Utility method.
+   * @return sycnhronized sorted map of HStoreKey to byte arrays.
+   */
+  private static SortedMap<HStoreKey, byte[]> createSynchronizedSortedMap() {
+    return Collections.synchronizedSortedMap(new TreeMap<HStoreKey, byte []>());
+  }
+
+  /**
+   * Creates a snapshot of the current Memcache.
+   * Snapshot must be cleared by call to {@link #clearSnapshot(SortedMap)}
+   * To get the snapshot made by this method, use
+   * {@link #getSnapshot}.
+   */
+  void snapshot() {
+    this.lock.writeLock().lock();
+    try {
+      // If snapshot currently has entries, then flusher failed or didn't call
+      // cleanup.  Log a warning.
+      if (this.snapshot.size() > 0) {
+        LOG.debug("Snapshot called again without clearing previous. " +
+          "Doing nothing. Another ongoing flush or did we fail last attempt?");
+      } else {
+        // We used to synchronize on the memcache here but we're inside a
+        // write lock so removed it. Comment is left in case removal was a
+        // mistake. St.Ack
+        if (this.memcache.size() != 0) {
+          this.snapshot = this.memcache;
+          this.memcache = createSynchronizedSortedMap();
+        }
+      }
+    } finally {
+      this.lock.writeLock().unlock();
+    }
+  }
+  
+  /**
+   * Return the current snapshot.
+   * Called by flusher to get current snapshot made by a previous
+   * call to {@link snapshot}.
+   * @return Return snapshot.
+   * @see {@link #snapshot()}
+   * @see {@link #clearSnapshot(SortedMap)}
+   */
+  SortedMap<HStoreKey, byte[]> getSnapshot() {
+    return this.snapshot;
+  }
+
+  /**
+   * The passed snapshot was successfully persisted; it can be let go.
+   * @param ss The snapshot to clean out.
+   * @throws UnexpectedException
+   * @see {@link #snapshot()}
+   */
+  void clearSnapshot(final SortedMap<HStoreKey, byte []> ss)
+  throws UnexpectedException {
+    this.lock.writeLock().lock();
+    try {
+      if (this.snapshot != ss) {
+        throw new UnexpectedException("Current snapshot is " +
+          this.snapshot + ", was passed " + ss);
+      }
+      // OK. Passed in snapshot is same as current snapshot.  If not-empty,
+      // create a new snapshot and let the old one go.
+      if (ss.size() != 0) {
+        this.snapshot = createSynchronizedSortedMap();
+      }
+    } finally {
+      this.lock.writeLock().unlock();
+    }
+  }
+
+  /**
+   * Write an update
+   * @param key
+   * @param value
+   * @return memcache size delta
+   */
+  long add(final HStoreKey key, final byte[] value) {
+    this.lock.readLock().lock();
+    try {
+      byte[] oldValue = this.memcache.remove(key);
+      this.memcache.put(key, value);
+      return key.getSize() + (value == null ? 0 : value.length) -
+          (oldValue == null ? 0 : oldValue.length);
+    } finally {
+      this.lock.readLock().unlock();
+    }
+  }
+
+  /**
+   * Look back through all the backlog TreeMaps to find the target.
+   * @param key
+   * @param numVersions
+   * @return An array of byte arrays ordered by timestamp.
+   */
+  List<Cell> get(final HStoreKey key, final int numVersions) {
+    this.lock.readLock().lock();
+    try {
+      List<Cell> results;
+      // The synchronizations here are because internalGet iterates
+      synchronized (this.memcache) {
+        results = internalGet(this.memcache, key, numVersions);
+      }
+      synchronized (this.snapshot) {
+        results.addAll(results.size(),
+          internalGet(this.snapshot, key, numVersions - results.size()));
+      }
+      return results;
+    } finally {
+      this.lock.readLock().unlock();
+    }
+  }
+  
+  /**
+   * @param a
+   * @param b
+   * @return Return lowest of a or b or null if both a and b are null
+   */
+  @SuppressWarnings("unchecked")
+  private byte [] getLowest(final byte [] a,
+      final byte [] b) {
+    if (a == null) {
+      return b;
+    }
+    if (b == null) {
+      return a;
+    }
+    return Bytes.compareTo(a, b) <= 0? a: b;
+  }
+
+  /**
+   * @param row Find the row that comes after this one.
+   * @return Next row or null if none found
+   */
+  byte [] getNextRow(final byte [] row) {
+    this.lock.readLock().lock();
+    try {
+      return getLowest(getNextRow(row, this.memcache),
+        getNextRow(row, this.snapshot));
+    } finally {
+      this.lock.readLock().unlock();
+    }
+  }
+  
+  /*
+   * @param row Find row that follows this one.
+   * @param map Map to look in for a row beyond <code>row</code>.
+   * This method synchronizes on passed map while iterating it.
+   * @return Next row or null if none found.
+   */
+  private byte [] getNextRow(final byte [] row,
+      final SortedMap<HStoreKey, byte []> map) {
+    byte [] result = null;
+    // Synchronize on the map to make the tailMap making 'safe'.
+    synchronized (map) {
+      // Make an HSK with maximum timestamp so we get past most of the current
+      // rows cell entries.
+      HStoreKey hsk = new HStoreKey(row, HConstants.LATEST_TIMESTAMP);
+      SortedMap<HStoreKey, byte []> tailMap = map.tailMap(hsk);
+      // Iterate until we fall into the next row; i.e. move off current row
+      for (Map.Entry<HStoreKey, byte []> es: tailMap.entrySet()) {
+        HStoreKey itKey = es.getKey();
+        if (Bytes.compareTo(itKey.getRow(), row) <= 0) {
+          continue;
+        }
+        // Note: Not suppressing deletes or expired cells.
+        result = itKey.getRow();
+        break;
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Return all the available columns for the given key.  The key indicates a 
+   * row and timestamp, but not a column name.
+   * @param key
+   * @param columns Pass null for all columns else the wanted subset.
+   * @param deletes Map to accumulate deletes found.
+   * @param results Where to stick row results found.
+   */
+  void getFull(HStoreKey key, Set<byte []> columns, Map<byte [], Long> deletes, 
+    Map<byte [], Cell> results) {
+    this.lock.readLock().lock();
+    try {
+      // The synchronizations here are because internalGet iterates
+      synchronized (this.memcache) {
+        internalGetFull(this.memcache, key, columns, deletes, results);
+      }
+      synchronized (this.snapshot) {
+        internalGetFull(this.snapshot, key, columns, deletes, results);
+      }
+    } finally {
+      this.lock.readLock().unlock();
+    }
+  }
+
+  private void internalGetFull(SortedMap<HStoreKey, byte[]> map, HStoreKey key, 
+      Set<byte []> columns, Map<byte [], Long> deletes,
+      Map<byte [], Cell> results) {
+    if (map.isEmpty() || key == null) {
+      return;
+    }
+    List<HStoreKey> victims = new ArrayList<HStoreKey>();
+    SortedMap<HStoreKey, byte[]> tailMap = map.tailMap(key);
+    long now = System.currentTimeMillis();
+    for (Map.Entry<HStoreKey, byte []> es: tailMap.entrySet()) {
+      HStoreKey itKey = es.getKey();
+      byte [] itCol = itKey.getColumn();
+      if (results.get(itCol) == null && key.matchesWithoutColumn(itKey)) {
+        if (columns == null || columns.contains(itKey.getColumn())) {
+          byte [] val = tailMap.get(itKey);
+          if (HLogEdit.isDeleted(val)) {
+            if (!deletes.containsKey(itCol) 
+              || deletes.get(itCol).longValue() < itKey.getTimestamp()) {
+              deletes.put(itCol, Long.valueOf(itKey.getTimestamp()));
+            }
+          } else if (!(deletes.containsKey(itCol) 
+              && deletes.get(itCol).longValue() >= itKey.getTimestamp())) {
+            // Skip expired cells
+            if (ttl == HConstants.FOREVER ||
+                  now < itKey.getTimestamp() + ttl) {
+              results.put(itCol, new Cell(val, itKey.getTimestamp()));
+            } else {
+              victims.add(itKey);
+              if (LOG.isDebugEnabled()) {
+                LOG.debug("internalGetFull: " + itKey + ": expired, skipped");
+              }
+            }
+          }
+        }
+      } else if (Bytes.compareTo(key.getRow(), itKey.getRow()) < 0) {
+        break;
+      }
+    }
+    // Remove expired victims from the map.
+    for (HStoreKey v: victims)
+      map.remove(v);
+  }
+
+  /**
+   * @param row Row to look for.
+   * @param candidateKeys Map of candidate keys (Accumulation over lots of
+   * lookup over stores and memcaches)
+   */
+  void getRowKeyAtOrBefore(final byte [] row, 
+    SortedMap<HStoreKey, Long> candidateKeys) {
+    this.lock.readLock().lock();
+    try {
+      synchronized (memcache) {
+        internalGetRowKeyAtOrBefore(memcache, row, candidateKeys);
+      }
+      synchronized (snapshot) {
+        internalGetRowKeyAtOrBefore(snapshot, row, candidateKeys);
+      }
+    } finally {
+      this.lock.readLock().unlock();
+    }
+  }
+
+  private void internalGetRowKeyAtOrBefore(SortedMap<HStoreKey, byte []> map,
+    byte [] key, SortedMap<HStoreKey, Long> candidateKeys) {
+    HStoreKey strippedKey = null;
+    
+    // we want the earliest possible to start searching from
+    HStoreKey search_key = candidateKeys.isEmpty() ? 
+      new HStoreKey(key) : new HStoreKey(candidateKeys.firstKey().getRow());
+    Iterator<HStoreKey> key_iterator = null;
+    HStoreKey found_key = null;
+    ArrayList<HStoreKey> victims = new ArrayList<HStoreKey>();
+    long now = System.currentTimeMillis();
+    // get all the entries that come equal or after our search key
+    SortedMap<HStoreKey, byte []> tailMap = map.tailMap(search_key);
+    
+    // if there are items in the tail map, there's either a direct match to
+    // the search key, or a range of values between the first candidate key
+    // and the ultimate search key (or the end of the cache)
+    if (!tailMap.isEmpty() &&
+        Bytes.compareTo(tailMap.firstKey().getRow(), key) <= 0) {
+      key_iterator = tailMap.keySet().iterator();
+
+      // keep looking at cells as long as they are no greater than the 
+      // ultimate search key and there's still records left in the map.
+      do {
+        found_key = key_iterator.next();
+        if (Bytes.compareTo(found_key.getRow(), key) <= 0) {
+          strippedKey = stripTimestamp(found_key);
+          if (HLogEdit.isDeleted(tailMap.get(found_key))) {
+            if (candidateKeys.containsKey(strippedKey)) {
+              long bestCandidateTs = 
+                candidateKeys.get(strippedKey).longValue();
+              if (bestCandidateTs <= found_key.getTimestamp()) {
+                candidateKeys.remove(strippedKey);
+              }
+            }
+          } else {
+            if (ttl == HConstants.FOREVER ||
+                  now < found_key.getTimestamp() + ttl) {
+              candidateKeys.put(strippedKey, 
+                new Long(found_key.getTimestamp()));
+            } else {
+              victims.add(found_key);
+              if (LOG.isDebugEnabled()) {
+                LOG.debug(":" + found_key + ": expired, skipped");
+              }
+            }
+          }
+        }
+      } while (Bytes.compareTo(found_key.getRow(), key) <= 0 
+        && key_iterator.hasNext());
+    } else {
+      // the tail didn't contain any keys that matched our criteria, or was 
+      // empty. examine all the keys that preceed our splitting point.
+      SortedMap<HStoreKey, byte []> headMap = map.headMap(search_key);
+      
+      // if we tried to create a headMap and got an empty map, then there are
+      // no keys at or before the search key, so we're done.
+      if (headMap.isEmpty()) {
+        return;
+      }        
+      
+      // if there aren't any candidate keys at this point, we need to search
+      // backwards until we find at least one candidate or run out of headMap.
+      if (candidateKeys.isEmpty()) {
+        HStoreKey[] cells = 
+          headMap.keySet().toArray(new HStoreKey[headMap.keySet().size()]);
+           
+        byte [] lastRowFound = null;
+        for(int i = cells.length - 1; i >= 0; i--) {
+          HStoreKey thisKey = cells[i];
+          
+          // if the last row we found a candidate key for is different than
+          // the row of the current candidate, we can stop looking.
+          if (lastRowFound != null &&
+              !Bytes.equals(lastRowFound, thisKey.getRow())) {
+            break;
+          }
+          
+          // if this isn't a delete, record it as a candidate key. also 
+          // take note of the row of this candidate so that we'll know when
+          // we cross the row boundary into the previous row.
+          if (!HLogEdit.isDeleted(headMap.get(thisKey))) {
+            if (ttl == HConstants.FOREVER) {
+              lastRowFound = thisKey.getRow();
+              candidateKeys.put(stripTimestamp(thisKey), 
+                new Long(thisKey.getTimestamp()));
+            } else {
+              victims.add(found_key);
+              if (LOG.isDebugEnabled()) {
+                LOG.debug("internalGetRowKeyAtOrBefore: " + found_key +
+                  ": expired, skipped");
+              }
+            }
+          }
+        }
+      } else {
+        // if there are already some candidate keys, we only need to consider
+        // the very last row's worth of keys in the headMap, because any 
+        // smaller acceptable candidate keys would have caused us to start
+        // our search earlier in the list, and we wouldn't be searching here.
+        SortedMap<HStoreKey, byte[]> thisRowTailMap = 
+          headMap.tailMap(new HStoreKey(headMap.lastKey().getRow()));
+          
+        key_iterator = thisRowTailMap.keySet().iterator();
+  
+        do {
+          found_key = key_iterator.next();
+          
+          if (HLogEdit.isDeleted(thisRowTailMap.get(found_key))) {
+            strippedKey = stripTimestamp(found_key);              
+            if (candidateKeys.containsKey(strippedKey)) {
+              long bestCandidateTs = 
+                candidateKeys.get(strippedKey).longValue();
+              if (bestCandidateTs <= found_key.getTimestamp()) {
+                candidateKeys.remove(strippedKey);
+              }
+            }
+          } else {
+            if (ttl == HConstants.FOREVER ||
+                    now < found_key.getTimestamp() + ttl) {
+              candidateKeys.put(stripTimestamp(found_key), 
+                Long.valueOf(found_key.getTimestamp()));
+            } else {
+              victims.add(found_key);
+              if (LOG.isDebugEnabled()) {
+                LOG.debug("internalGetRowKeyAtOrBefore: " + found_key +
+                  ": expired, skipped");
+              }
+            }
+          }
+        } while (key_iterator.hasNext());
+      }
+    }
+    // Remove expired victims from the map.
+    for (HStoreKey victim: victims)
+      map.remove(victim);
+  }
+  
+  static HStoreKey stripTimestamp(HStoreKey key) {
+    return new HStoreKey(key.getRow(), key.getColumn());
+  }
+  
+  /**
+   * Examine a single map for the desired key.
+   *
+   * TODO - This is kinda slow.  We need a data structure that allows for 
+   * proximity-searches, not just precise-matches.
+   * 
+   * @param map
+   * @param key
+   * @param numVersions
+   * @return Ordered list of items found in passed <code>map</code>.  If no
+   * matching values, returns an empty list (does not return null).
+   */
+  private ArrayList<Cell> internalGet(
+      final SortedMap<HStoreKey, byte []> map, final HStoreKey key,
+      final int numVersions) {
+
+    ArrayList<Cell> result = new ArrayList<Cell>();
+    
+    // TODO: If get is of a particular version -- numVersions == 1 -- we
+    // should be able to avoid all of the tailmap creations and iterations
+    // below.
+    long now = System.currentTimeMillis();
+    List<HStoreKey> victims = new ArrayList<HStoreKey>();
+    SortedMap<HStoreKey, byte []> tailMap = map.tailMap(key);
+    for (Map.Entry<HStoreKey, byte []> es: tailMap.entrySet()) {
+      HStoreKey itKey = es.getKey();
+      if (itKey.matchesRowCol(key)) {
+        if (!HLogEdit.isDeleted(es.getValue())) { 
+          // Filter out expired results
+          if (ttl == HConstants.FOREVER ||
+                now < itKey.getTimestamp() + ttl) {
+            result.add(new Cell(tailMap.get(itKey), itKey.getTimestamp()));
+            if (numVersions > 0 && result.size() >= numVersions) {
+              break;
+            }
+          } else {
+            victims.add(itKey);
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("internalGet: " + itKey + ": expired, skipped");
+            }
+          }
+        }
+      } else {
+        // By L.N. HBASE-684, map is sorted, so we can't find match any more.
+        break;
+      }
+    }
+    // Remove expired victims from the map.
+    for (HStoreKey v: victims) {
+      map.remove(v);
+    }
+    return result;
+  }
+
+  /**
+   * Get <code>versions</code> keys matching the origin key's
+   * row/column/timestamp and those of an older vintage
+   * Default access so can be accessed out of {@link HRegionServer}.
+   * @param origin Where to start searching.
+   * @param versions How many versions to return. Pass
+   * {@link HConstants.ALL_VERSIONS} to retrieve all.
+   * @return Ordered list of <code>versions</code> keys going from newest back.
+   * @throws IOException
+   */
+  List<HStoreKey> getKeys(final HStoreKey origin, final int versions) {
+    this.lock.readLock().lock();
+    try {
+      List<HStoreKey> results;
+      synchronized (memcache) {
+        results = internalGetKeys(this.memcache, origin, versions);
+      }
+      synchronized (snapshot) {
+        results.addAll(results.size(), internalGetKeys(snapshot, origin,
+            versions == HConstants.ALL_VERSIONS ? versions :
+              (versions - results.size())));
+      }
+      return results;
+      
+    } finally {
+      this.lock.readLock().unlock();
+    }
+  }
+
+  /*
+   * @param origin Where to start searching.
+   * @param versions How many versions to return. Pass
+   * {@link HConstants.ALL_VERSIONS} to retrieve all.
+   * @return List of all keys that are of the same row and column and of
+   * equal or older timestamp.  If no keys, returns an empty List. Does not
+   * return null.
+   */
+  private List<HStoreKey> internalGetKeys(
+      final SortedMap<HStoreKey, byte []> map, final HStoreKey origin,
+      final int versions) {
+
+    long now = System.currentTimeMillis();
+    List<HStoreKey> result = new ArrayList<HStoreKey>();
+    List<HStoreKey> victims = new ArrayList<HStoreKey>();
+    SortedMap<HStoreKey, byte []> tailMap = map.tailMap(origin);
+    for (Map.Entry<HStoreKey, byte []> es: tailMap.entrySet()) {
+      HStoreKey key = es.getKey();
+  
+      // if there's no column name, then compare rows and timestamps
+      if (origin.getColumn() != null && origin.getColumn().length == 0) {
+        // if the current and origin row don't match, then we can jump
+        // out of the loop entirely.
+        if (!Bytes.equals(key.getRow(), origin.getRow())) {
+          break;
+        }
+        // if the rows match but the timestamp is newer, skip it so we can
+        // get to the ones we actually want.
+        if (key.getTimestamp() > origin.getTimestamp()) {
+          continue;
+        }
+      }
+      else{ // compare rows and columns
+        // if the key doesn't match the row and column, then we're done, since 
+        // all the cells are ordered.
+        if (!key.matchesRowCol(origin)) {
+          break;
+        }
+      }
+      if (!HLogEdit.isDeleted(es.getValue())) {
+        if (ttl == HConstants.FOREVER || now < key.getTimestamp() + ttl) {
+          result.add(key);
+        } else {
+          victims.add(key);
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("internalGetKeys: " + key + ": expired, skipped");
+          }
+        }
+        if (result.size() >= versions) {
+          // We have enough results.  Return.
+          break;
+        }
+      }
+    }
+
+    // Clean expired victims from the map.
+    for (HStoreKey v: victims)
+       map.remove(v);
+
+    return result;
+  }
+
+
+  /**
+   * @param key
+   * @return True if an entry and its content is {@link HGlobals.deleteBytes}.
+   * Use checking values in store. On occasion the memcache has the fact that
+   * the cell has been deleted.
+   */
+  boolean isDeleted(final HStoreKey key) {
+    return HLogEdit.isDeleted(this.memcache.get(key));
+  }
+
+  /**
+   * @return a scanner over the keys in the Memcache
+   */
+  InternalScanner getScanner(long timestamp,
+    final byte [][] targetCols, final byte [] firstRow)
+  throws IOException {
+    this.lock.readLock().lock();
+    try {
+      return new MemcacheScanner(timestamp, targetCols, firstRow);
+    } finally {
+      this.lock.readLock().unlock();
+    }
+  }
+
+  //////////////////////////////////////////////////////////////////////////////
+  // MemcacheScanner implements the InternalScanner.
+  // It lets the caller scan the contents of the Memcache.
+  //////////////////////////////////////////////////////////////////////////////
+
+  private class MemcacheScanner extends HAbstractScanner {
+    private byte [] currentRow;
+    private Set<byte []> columns = null;
+    
+    MemcacheScanner(final long timestamp, final byte [] targetCols[],
+      final byte [] firstRow)
+    throws IOException {
+      // Call to super will create ColumnMatchers and whether this is a regex
+      // scanner or not.  Will also save away timestamp.  Also sorts rows.
+      super(timestamp, targetCols);
+      this.currentRow = firstRow;
+      // If we're being asked to scan explicit columns rather than all in 
+      // a family or columns that match regexes, cache the sorted array of
+      // columns.
+      this.columns = null;
+      if (!isWildcardScanner()) {
+        this.columns = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
+        for (int i = 0; i < targetCols.length; i++) {
+          this.columns.add(targetCols[i]);
+        }
+      }
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public boolean next(HStoreKey key, SortedMap<byte [], Cell> results)
+    throws IOException {
+      if (this.scannerClosed) {
+        return false;
+      }
+      // This is a treemap rather than a Hashmap because then I can have a
+      // byte array as key -- because I can independently specify a comparator.
+      Map<byte [], Long> deletes =
+        new TreeMap<byte [], Long>(Bytes.BYTES_COMPARATOR);
+      // Catch all row results in here.  These results are ten filtered to
+      // ensure they match column name regexes, or if none, added to results.
+      Map<byte [], Cell> rowResults =
+        new TreeMap<byte [], Cell>(Bytes.BYTES_COMPARATOR);
+      if (results.size() > 0) {
+        results.clear();
+      }
+      long latestTimestamp = -1;
+      while (results.size() <= 0 && this.currentRow != null) {
+        if (deletes.size() > 0) {
+          deletes.clear();
+        }
+        if (rowResults.size() > 0) {
+          rowResults.clear();
+        }
+        key.setRow(this.currentRow);
+        key.setVersion(this.timestamp);
+        getFull(key, isWildcardScanner() ? null : this.columns, deletes,
+            rowResults);
+        for (Map.Entry<byte [], Long> e: deletes.entrySet()) {
+          rowResults.put(e.getKey(),
+            new Cell(HLogEdit.deleteBytes.get(), e.getValue().longValue()));
+        }
+        for (Map.Entry<byte [], Cell> e: rowResults.entrySet()) {
+          byte [] column = e.getKey();
+          Cell c = e.getValue();
+          if (isWildcardScanner()) {
+            // Check the results match.  We only check columns, not timestamps.
+            // We presume that timestamps have been handled properly when we
+            // called getFull.
+            if (!columnMatch(column)) {
+              continue;
+            }
+          }
+          // We should never return HConstants.LATEST_TIMESTAMP as the time for
+          // the row. As a compromise, we return the largest timestamp for the
+          // entries that we find that match.
+          if (c.getTimestamp() != HConstants.LATEST_TIMESTAMP &&
+              c.getTimestamp() > latestTimestamp) {
+            latestTimestamp = c.getTimestamp();
+          }
+          results.put(column, c);
+        }
+        this.currentRow = getNextRow(this.currentRow);
+
+      }
+      // Set the timestamp to the largest one for the row if we would otherwise
+      // return HConstants.LATEST_TIMESTAMP
+      if (key.getTimestamp() == HConstants.LATEST_TIMESTAMP) {
+        key.setVersion(latestTimestamp);
+      }
+      return results.size() > 0;
+    }
+
+    /** {@inheritDoc} */
+    public void close() {
+      if (!scannerClosed) {
+        scannerClosed = true;
+      }
+    }
+  }
+}
--- a/src/java/org/apache/hadoop/hbase/util/migration/v5/MetaUtils.java
+++ b/src/java/org/apache/hadoop/hbase/util/migration/v5/MetaUtils.java
@ -0,0 +1,452 @@
+/**
+ * Copyright 2008 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util.migration.v5;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HStoreKey;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.io.BatchUpdate;
+import org.apache.hadoop.hbase.io.Cell;
+import org.apache.hadoop.hbase.regionserver.InternalScanner;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.Writables;
+
+/**
+ * Contains utility methods for manipulating HBase meta tables.
+ * Be sure to call {@link #shutdown()} when done with this class so it closes
+ * resources opened during meta processing (ROOT, META, etc.).  Be careful
+ * how you use this class.  If used during migrations, be careful when using
+ * this class to check whether migration is needed.
+ */
+public class MetaUtils {
+  private static final Log LOG = LogFactory.getLog(MetaUtils.class);
+  private final HBaseConfiguration conf;
+  private FileSystem fs;
+  private Path rootdir;
+  private HLog log;
+  private HRegion rootRegion;
+  private Map<byte [], HRegion> metaRegions = Collections.synchronizedSortedMap(
+    new TreeMap<byte [], HRegion>(Bytes.BYTES_COMPARATOR));
+  
+  /** Default constructor 
+   * @throws IOException */
+  public MetaUtils() throws IOException {
+    this(new HBaseConfiguration());
+  }
+  
+  /** @param conf HBaseConfiguration 
+   * @throws IOException */
+  public MetaUtils(HBaseConfiguration conf) throws IOException {
+    this.conf = conf;
+    conf.setInt("hbase.client.retries.number", 1);
+    this.rootRegion = null;
+    initialize();
+  }
+
+  /**
+   * Verifies that DFS is available and that HBase is off-line.
+   * @throws IOException
+   */
+  private void initialize() throws IOException {
+    this.fs = FileSystem.get(this.conf);              // get DFS handle
+    // Get root directory of HBase installation
+    this.rootdir = fs.makeQualified(new Path(this.conf.get(HConstants.HBASE_DIR)));
+    if (!fs.exists(rootdir)) {
+      String message = "HBase root directory " + rootdir.toString() +
+        " does not exist.";
+      LOG.error(message);
+      throw new FileNotFoundException(message);
+    }
+  }
+
+  /** @return the HLog 
+   * @throws IOException */
+  public synchronized HLog getLog() throws IOException {
+    if (this.log == null) {
+      Path logdir = new Path(this.fs.getHomeDirectory(),
+          HConstants.HREGION_LOGDIR_NAME + "_" + System.currentTimeMillis());
+      this.log = new HLog(this.fs, logdir, this.conf, null);
+    }
+    return this.log;
+  }
+  
+  /**
+   * @return HRegion for root region
+   * @throws IOException
+   */
+  public HRegion getRootRegion() throws IOException {
+    if (this.rootRegion == null) {
+      openRootRegion();
+    }
+    return this.rootRegion;
+  }
+  
+  /**
+   * Open or return cached opened meta region
+   * 
+   * @param metaInfo HRegionInfo for meta region
+   * @return meta HRegion
+   * @throws IOException
+   */
+  public HRegion getMetaRegion(HRegionInfo metaInfo) throws IOException {
+    HRegion meta = metaRegions.get(metaInfo.getRegionName());
+    if (meta == null) {
+      meta = openMetaRegion(metaInfo);
+      this.metaRegions.put(metaInfo.getRegionName(), meta);
+    }
+    return meta;
+  }
+  
+  /**
+   * Closes catalog regions if open. Also closes and deletes the HLog. You
+   * must call this method if you want to persist changes made during a
+   * MetaUtils edit session.
+   */
+  public void shutdown() {
+    if (this.rootRegion != null) {
+      try {
+        this.rootRegion.close();
+      } catch (IOException e) {
+        LOG.error("closing root region", e);
+      } finally {
+        this.rootRegion = null;
+      }
+    }
+    try {
+      for (HRegion r: metaRegions.values()) {
+        r.close();
+      }
+    } catch (IOException e) {
+      LOG.error("closing meta region", e);
+    } finally {
+      metaRegions.clear();
+    }
+    try {
+      if (this.log != null) {
+        this.log.rollWriter();
+        this.log.closeAndDelete();
+      }
+    } catch (IOException e) {
+      LOG.error("closing HLog", e);
+    } finally {
+      this.log = null;
+    }
+  }
+
+  /**
+   * Used by scanRootRegion and scanMetaRegion to call back the caller so it
+   * can process the data for a row.
+   */
+  public interface ScannerListener {
+    /**
+     * Callback so client of scanner can process row contents
+     * 
+     * @param info HRegionInfo for row
+     * @return false to terminate the scan
+     * @throws IOException
+     */
+    public boolean processRow(HRegionInfo info) throws IOException;
+  }
+  
+  /**
+   * Scans the root region. For every meta region found, calls the listener with
+   * the HRegionInfo of the meta region.
+   * 
+   * @param listener method to be called for each meta region found
+   * @throws IOException
+   */
+  public void scanRootRegion(ScannerListener listener) throws IOException {
+    // Open root region so we can scan it
+    if (this.rootRegion == null) {
+      openRootRegion();
+    }
+    InternalScanner rootScanner = rootRegion.getScanner(
+        HConstants.COL_REGIONINFO_ARRAY, HConstants.EMPTY_START_ROW,
+        HConstants.LATEST_TIMESTAMP, null);
+    try {
+      HStoreKey key = new HStoreKey();
+      SortedMap<byte [], Cell> results =
+        new TreeMap<byte [], Cell>(Bytes.BYTES_COMPARATOR);
+      while (rootScanner.next(key, results)) {
+        HRegionInfo info = (HRegionInfo)Writables.getWritable(
+            results.get(HConstants.COL_REGIONINFO).getValue(),
+            new HRegionInfo());
+        if (info == null) {
+          LOG.warn("region info is null for row " + key.getRow() +
+              " in table " + HConstants.ROOT_TABLE_NAME);
+          continue;
+        }
+        if (!listener.processRow(info)) {
+          break;
+        }
+        results.clear();
+      }
+    } finally {
+      rootScanner.close();
+    }
+  }
+
+  /**
+   * Scans a meta region. For every region found, calls the listener with
+   * the HRegionInfo of the region.
+   * TODO: Use Visitor rather than Listener pattern.  Allow multiple Visitors.
+   * Use this everywhere we scan meta regions: e.g. in metascanners, in close
+   * handling, etc.  Have it pass in the whole row, not just HRegionInfo.
+   * 
+   * @param metaRegionInfo HRegionInfo for meta region
+   * @param listener method to be called for each meta region found
+   * @throws IOException
+   */
+  public void scanMetaRegion(HRegionInfo metaRegionInfo,
+    ScannerListener listener)
+  throws IOException {
+    // Open meta region so we can scan it
+    HRegion metaRegion = openMetaRegion(metaRegionInfo);
+    scanMetaRegion(metaRegion, listener);
+  }
+
+  /**
+   * Scan the passed in metaregion <code>m</code> invoking the passed
+   * <code>listener</code> per row found.
+   * @param m
+   * @param listener
+   * @throws IOException
+   */
+  public void scanMetaRegion(final HRegion m, final ScannerListener listener)
+  throws IOException {
+    InternalScanner metaScanner = m.getScanner(HConstants.COL_REGIONINFO_ARRAY,
+      HConstants.EMPTY_START_ROW, HConstants.LATEST_TIMESTAMP, null);
+    try {
+      HStoreKey key = new HStoreKey();
+      SortedMap<byte[], Cell> results =
+        new TreeMap<byte[], Cell>(Bytes.BYTES_COMPARATOR);
+      while (metaScanner.next(key, results)) {
+        HRegionInfo info = (HRegionInfo)Writables.getWritable(
+            results.get(HConstants.COL_REGIONINFO).getValue(),
+            new HRegionInfo());
+        if (info == null) {
+          LOG.warn("regioninfo null for row " + key.getRow() + " in table " +
+            Bytes.toString(m.getTableDesc().getName()));
+          continue;
+        }
+        if (!listener.processRow(info)) {
+          break;
+        }
+        results.clear();
+      }
+    } finally {
+      metaScanner.close();
+    }
+  }
+
+  private synchronized HRegion openRootRegion() throws IOException {
+    if (this.rootRegion != null) {
+      return this.rootRegion;
+    }
+    this.rootRegion = HRegion.openHRegion(HRegionInfo.ROOT_REGIONINFO,
+      this.rootdir, getLog(), this.conf);
+    this.rootRegion.compactStores();
+    return this.rootRegion;
+  }
+
+  private HRegion openMetaRegion(HRegionInfo metaInfo) throws IOException {
+    HRegion meta =
+      HRegion.openHRegion(metaInfo, this.rootdir, getLog(), this.conf);
+    meta.compactStores();
+    return meta;
+  }
+ 
+  /**
+   * Set a single region on/offline.
+   * This is a tool to repair tables that have offlined tables in their midst.
+   * Can happen on occasion.  Use at your own risk.  Call from a bit of java
+   * or jython script.  This method is 'expensive' in that it creates a
+   * {@link HTable} instance per invocation to go against <code>.META.</code>
+   * @param c A configuration that has its <code>hbase.master</code>
+   * properly set.
+   * @param row Row in the catalog .META. table whose HRegionInfo's offline
+   * status we want to change.
+   * @param onlineOffline Pass <code>true</code> to OFFLINE the region.
+   * @throws IOException
+   */
+  public static void changeOnlineStatus (final HBaseConfiguration c,
+      final byte [] row, final boolean onlineOffline)
+  throws IOException {
+    HTable t = new HTable(c, HConstants.META_TABLE_NAME);
+    Cell cell = t.get(row, HConstants.COL_REGIONINFO);
+    if (cell == null) {
+      throw new IOException("no information for row " + row);
+    }
+    // Throws exception if null.
+    HRegionInfo info = (HRegionInfo)Writables.
+      getWritable(cell.getValue(), new HRegionInfo());
+    BatchUpdate b = new BatchUpdate(row);
+    info.setOffline(onlineOffline);
+    b.put(HConstants.COL_REGIONINFO, Writables.getBytes(info));
+    b.delete(HConstants.COL_SERVER);
+    b.delete(HConstants.COL_STARTCODE);
+    t.commit(b);
+  }
+  
+  /**
+   * Offline version of the online TableOperation,
+   * org.apache.hadoop.hbase.master.AddColumn.
+   * @param tableName
+   * @param hcd Add this column to <code>tableName</code>
+   * @throws IOException 
+   */
+  public void addColumn(final byte [] tableName,
+      final HColumnDescriptor hcd)
+  throws IOException {
+    List<HRegionInfo> metas = getMETARows(tableName);
+    for (HRegionInfo hri: metas) {
+      final HRegion m = getMetaRegion(hri);
+      scanMetaRegion(m, new ScannerListener() {
+        private boolean inTable = true;
+        
+        @SuppressWarnings("synthetic-access")
+        public boolean processRow(HRegionInfo info) throws IOException {
+          LOG.debug("Testing " + Bytes.toString(tableName) + " against " +
+            Bytes.toString(info.getTableDesc().getName()));
+          if (Bytes.equals(info.getTableDesc().getName(), tableName)) {
+            this.inTable = false;
+            info.getTableDesc().addFamily(hcd);
+            updateMETARegionInfo(m, info);
+            return true;
+          }
+          // If we got here and we have not yet encountered the table yet,
+          // inTable will be false.  Otherwise, we've passed out the table.
+          // Stop the scanner.
+          return this.inTable;
+        }});
+    }
+  }
+  
+  /**
+   * Offline version of the online TableOperation,
+   * org.apache.hadoop.hbase.master.DeleteColumn.
+   * @param tableName
+   * @param columnFamily Name of column name to remove.
+   * @throws IOException
+   */
+  public void deleteColumn(final byte [] tableName,
+      final byte [] columnFamily) throws IOException {
+    List<HRegionInfo> metas = getMETARows(tableName);
+    final Path tabledir = new Path(rootdir, Bytes.toString(tableName));
+    for (HRegionInfo hri: metas) {
+      final HRegion m = getMetaRegion(hri);
+      scanMetaRegion(m, new ScannerListener() {
+        private boolean inTable = true;
+        
+        @SuppressWarnings("synthetic-access")
+        public boolean processRow(HRegionInfo info) throws IOException {
+          if (Bytes.equals(info.getTableDesc().getName(), tableName)) {
+            this.inTable = false;
+            info.getTableDesc().removeFamily(columnFamily);
+            updateMETARegionInfo(m, info);
+            FSUtils.deleteColumnFamily(fs, tabledir, info.getEncodedName(),
+              HStoreKey.getFamily(columnFamily));
+            return false;
+          }
+          // If we got here and we have not yet encountered the table yet,
+          // inTable will be false.  Otherwise, we've passed out the table.
+          // Stop the scanner.
+          return this.inTable;
+        }});
+    }
+  }
+  
+  /**
+   * Update COL_REGIONINFO in meta region r with HRegionInfo hri
+   * 
+   * @param r
+   * @param hri
+   * @throws IOException
+   */
+  public void updateMETARegionInfo(HRegion r, final HRegionInfo hri) 
+  throws IOException {
+    if (LOG.isDebugEnabled()) {
+      HRegionInfo h = (HRegionInfo)Writables.getWritable(
+        r.get(hri.getRegionName(), HConstants.COL_REGIONINFO).getValue(),
+        new HRegionInfo());
+      LOG.debug("Old " + Bytes.toString(HConstants.COL_REGIONINFO) +
+        " for " + hri.toString() + " in " + r.toString() + " is: " +
+        h.toString());
+    }
+    BatchUpdate b = new BatchUpdate(hri.getRegionName());
+    b.put(HConstants.COL_REGIONINFO, Writables.getBytes(hri));
+    r.batchUpdate(b);
+    if (LOG.isDebugEnabled()) {
+      HRegionInfo h = (HRegionInfo)Writables.getWritable(
+          r.get(hri.getRegionName(), HConstants.COL_REGIONINFO).getValue(),
+          new HRegionInfo());
+        LOG.debug("New " + Bytes.toString(HConstants.COL_REGIONINFO) +
+          " for " + hri.toString() + " in " + r.toString() + " is: " +
+          h.toString());
+    }
+  }
+
+  /**
+   * @return List of {@link HRegionInfo} rows found in the ROOT or META
+   * catalog table.
+   * @param tableName Name of table to go looking for.
+   * @throws IOException
+   * @see #getMetaRegion(HRegionInfo)
+   */
+  public List<HRegionInfo> getMETARows(final byte [] tableName)
+  throws IOException {
+    final List<HRegionInfo> result = new ArrayList<HRegionInfo>();
+    // If passed table name is META, then  return the root region.
+    if (Bytes.equals(HConstants.META_TABLE_NAME, tableName)) {
+      result.add(openRootRegion().getRegionInfo());
+      return result;
+    }
+    // Return all meta regions that contain the passed tablename.
+    scanRootRegion(new ScannerListener() {
+      private final Log SL_LOG = LogFactory.getLog(this.getClass());
+      
+      @SuppressWarnings("unused")
+      public boolean processRow(HRegionInfo info) throws IOException {
+        SL_LOG.debug("Testing " + info);
+        if (Bytes.equals(info.getTableDesc().getName(),
+            HConstants.META_TABLE_NAME)) {
+          result.add(info);
+          return false;
+        }
+        return true;
+      }});
+    return result;
+  }
+}
--- a/src/java/org/apache/hadoop/hbase/util/migration/v5/RegionHistorian.java
+++ b/src/java/org/apache/hadoop/hbase/util/migration/v5/RegionHistorian.java
@ -0,0 +1,322 @@
+/**
+ * Copyright 2008 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util.migration.v5;
+
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.GregorianCalendar;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HServerAddress;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.io.BatchUpdate;
+import org.apache.hadoop.hbase.io.Cell;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * The Region Historian task is to keep track of every modification a region
+ * has to go through. Public methods are used to update the information in the
+ * <code>.META.</code> table and to retrieve it.  This is a Singleton.  By
+ * default, the Historian is offline; it will not log.  Its enabled in the
+ * regionserver and master down in their guts after there's some certainty the
+ * .META. has been deployed.
+ */
+public class RegionHistorian implements HConstants {
+  private static final Log LOG = LogFactory.getLog(RegionHistorian.class);
+  
+  private HTable metaTable;
+
+  private GregorianCalendar cal = new GregorianCalendar();
+
+  /** Singleton reference */
+  private static RegionHistorian historian;
+
+  /** Date formater for the timestamp in RegionHistoryInformation */
+  private static SimpleDateFormat dateFormat = new SimpleDateFormat(
+  "EEE, d MMM yyyy HH:mm:ss");
+
+  public static enum HistorianColumnKey  {
+    REGION_CREATION ( Bytes.toBytes(COLUMN_FAMILY_HISTORIAN_STR+"creation")),
+    REGION_OPEN ( Bytes.toBytes(COLUMN_FAMILY_HISTORIAN_STR+"open")),
+    REGION_SPLIT ( Bytes.toBytes(COLUMN_FAMILY_HISTORIAN_STR+"split")),
+    REGION_COMPACTION ( Bytes.toBytes(COLUMN_FAMILY_HISTORIAN_STR+"compaction")),
+    REGION_FLUSH ( Bytes.toBytes(COLUMN_FAMILY_HISTORIAN_STR+"flush")),
+    REGION_ASSIGNMENT ( Bytes.toBytes(COLUMN_FAMILY_HISTORIAN_STR+"assignment"));
+
+    public byte[] key;
+
+    HistorianColumnKey(byte[] key) {
+      this.key = key;
+    }
+  } 
+
+  /**
+   * Default constructor. Initializes reference to .META. table.  Inaccessible.
+   * Use {@link #getInstance(HBaseConfiguration)} to obtain the Singleton
+   * instance of this class.
+   */
+  private RegionHistorian() {
+    super();
+  }
+
+  /**
+   * Get the RegionHistorian Singleton instance.
+   * @return The region historian
+   */
+  public static RegionHistorian getInstance() {
+    if (historian == null) {
+      historian = new RegionHistorian();
+    }
+    return historian;
+  }
+
+  /**
+   * Returns, for a given region name, an ordered list by timestamp of all
+   * values in the historian column of the .META. table.
+   * @param regionName
+   *          Region name as a string
+   * @return List of RegionHistoryInformation or null if we're offline.
+   */
+  public List<RegionHistoryInformation> getRegionHistory(String regionName) {
+    if (!isOnline()) {
+      return null;
+    }
+    List<RegionHistoryInformation> informations =
+      new ArrayList<RegionHistoryInformation>();
+    try {
+      /*
+       * TODO REGION_HISTORIAN_KEYS is used because there is no other for the
+       * moment to retrieve all version and to have the column key information.
+       * To be changed when HTable.getRow handles versions.
+       */
+      for (HistorianColumnKey keyEnu : HistorianColumnKey.values()) {
+        byte[] columnKey = keyEnu.key;
+        Cell[] cells = this.metaTable.get(Bytes.toBytes(regionName),
+            columnKey, ALL_VERSIONS);
+        if (cells != null) {
+          for (Cell cell : cells) {
+            informations.add(historian.new RegionHistoryInformation(cell
+                .getTimestamp(), Bytes.toString(columnKey).split(":")[1], Bytes
+                .toString(cell.getValue())));
+          }
+        }
+      }
+    } catch (IOException ioe) {
+      LOG.warn("Unable to retrieve region history", ioe);
+    }
+    Collections.sort(informations);
+    return informations;
+  }
+  
+  /**
+   * Method to add a creation event to the row in the .META table
+   * @param info
+   */
+  public void addRegionAssignment(HRegionInfo info, String serverName) {
+    add(HistorianColumnKey.REGION_ASSIGNMENT.key, "Region assigned to server "
+        + serverName, info);
+  }
+
+  /**
+   * Method to add a creation event to the row in the .META table
+   * @param info
+   */
+  public void addRegionCreation(HRegionInfo info) {
+    add(HistorianColumnKey.REGION_CREATION.key, "Region creation", info);
+  }
+
+  /**
+   * Method to add a opening event to the row in the .META table
+   * @param info
+   * @param address
+   */
+  public void addRegionOpen(HRegionInfo info, HServerAddress address) {
+    add(HistorianColumnKey.REGION_OPEN.key, "Region opened on server : "
+        + address.getHostname(), info);
+  }
+
+  /**
+   * Method to add a split event to the rows in the .META table with
+   * information from oldInfo.
+   * @param oldInfo
+   * @param newInfo1 
+   * @param newInfo2
+   */
+  public void addRegionSplit(HRegionInfo oldInfo, HRegionInfo newInfo1,
+     HRegionInfo newInfo2) {
+    HRegionInfo[] infos = new HRegionInfo[] { newInfo1, newInfo2 };
+    for (HRegionInfo info : infos) {
+      add(HistorianColumnKey.REGION_SPLIT.key, "Region split from  : "
+          + oldInfo.getRegionNameAsString(), info);
+    }
+  }
+
+  /**
+   * Method to add a compaction event to the row in the .META table
+   * @param info
+   */
+  public void addRegionCompaction(final HRegionInfo info,
+      final String timeTaken) {
+    // While historian can not log flushes because it could deadlock the
+    // regionserver -- see the note in addRegionFlush -- there should be no
+    // such danger compacting; compactions are not allowed when
+    // Flusher#flushSomeRegions is run.
+    if (LOG.isDebugEnabled()) {
+      add(HistorianColumnKey.REGION_COMPACTION.key,
+        "Region compaction completed in " + timeTaken, info);
+    }
+  }
+
+  /**
+   * Method to add a flush event to the row in the .META table
+   * @param info
+   */
+  public void addRegionFlush(HRegionInfo info,
+    @SuppressWarnings("unused") String timeTaken) {
+    // Disabled.  Noop.  If this regionserver is hosting the .META. AND is
+    // holding the reclaimMemcacheMemory global lock --
+    // see Flusher#flushSomeRegions --  we deadlock.  For now, just disable
+    // logging of flushes.
+  }
+
+  /**
+   * Method to add an event with LATEST_TIMESTAMP.
+   * @param column
+   * @param text
+   * @param info
+   */
+  private void add(byte[] column,
+      String text, HRegionInfo info) {
+    add(column, text, info, LATEST_TIMESTAMP);
+  }
+
+  /**
+   * Method to add an event with provided information.
+   * @param column
+   * @param text
+   * @param info
+   * @param timestamp
+   */
+  private void add(byte[] column,
+      String text, HRegionInfo info, long timestamp) {
+    if (!isOnline()) {
+      // Its a noop
+      return;
+    }
+    if (!info.isMetaRegion()) {
+      BatchUpdate batch = new BatchUpdate(info.getRegionName());
+      batch.setTimestamp(timestamp);
+      batch.put(column, Bytes.toBytes(text));
+      try {
+        this.metaTable.commit(batch);
+      } catch (IOException ioe) {
+        LOG.warn("Unable to '" + text + "'", ioe);
+      }
+    }
+  }
+
+  /**
+   * Inner class that only contains information about an event.
+   * 
+   */
+  public class RegionHistoryInformation implements
+  Comparable<RegionHistoryInformation> {
+
+    private long timestamp;
+
+    private String event;
+
+    private String description;
+
+    public RegionHistoryInformation(long timestamp, String event,
+        String description) {
+      this.timestamp = timestamp;
+      this.event = event;
+      this.description = description;
+    }
+
+    /**
+     * Returns the inverse value of Long.compareTo
+     */
+    public int compareTo(RegionHistoryInformation otherInfo) {
+      return -1 * Long.valueOf(timestamp).compareTo(otherInfo.getTimestamp());
+    }
+
+    public String getEvent() {
+      return event;
+    }
+
+    public String getDescription() {
+      return description;
+    }
+
+    public long getTimestamp() {
+      return timestamp;
+    }
+
+    /**
+     * @return The value of the timestamp processed with the date formater.
+     */
+    public String getTimestampAsString() {
+      cal.setTimeInMillis(timestamp);
+      return dateFormat.format(cal.getTime());
+    }
+  }
+
+  /**
+   * @return True if the historian is online. When offline, will not add
+   * updates to the .META. table.
+   */
+  public boolean isOnline() {
+    return this.metaTable != null;
+  }
+
+  /**
+   * @param c Online the historian.  Invoke after cluster has spun up.
+   */
+  public void online(final HBaseConfiguration c) {
+    try {
+      this.metaTable = new HTable(c, META_TABLE_NAME);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Onlined");
+      }
+    } catch (IOException ioe) {
+      LOG.error("Unable to create RegionHistorian", ioe);
+    }
+  }
+  
+  /**
+   * Offlines the historian.
+   * @see #online(HBaseConfiguration)
+   */
+  public void offline() {
+    this.metaTable = null;
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Offlined");
+    }
+  }
+}
--- a/src/java/org/apache/hadoop/hbase/util/migration/v5/StoreFileScanner.java
+++ b/src/java/org/apache/hadoop/hbase/util/migration/v5/StoreFileScanner.java
@ -0,0 +1,391 @@
+/**
+ * Copyright 2008 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util.migration.v5;
+
+import java.io.IOException;
+import java.util.SortedMap;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HStoreKey;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.io.Cell;
+import org.apache.hadoop.hbase.regionserver.ChangedReadersObserver;
+import org.apache.hadoop.hbase.regionserver.HAbstractScanner;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.MapFile;
+
+/**
+ * A scanner that iterates through HStore files
+ */
+class StoreFileScanner extends HAbstractScanner
+implements ChangedReadersObserver {
+  private final Log LOG = LogFactory.getLog(this.getClass());
+  
+    // Keys retrieved from the sources
+  private volatile HStoreKey keys[];
+  // Values that correspond to those keys
+  private volatile byte [][] vals;
+  
+  // Readers we go against.
+  private volatile MapFile.Reader[] readers;
+  
+  // Store this scanner came out of.
+  private final HStore store;
+  
+  // Used around replacement of Readers if they change while we're scanning.
+  private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
+  
+  /**
+   * @param store
+   * @param timestamp
+   * @param targetCols
+   * @param firstRow
+   * @throws IOException
+   */
+  public StoreFileScanner(final HStore store, final long timestamp,
+    final byte [][] targetCols, final byte [] firstRow)
+  throws IOException {
+    super(timestamp, targetCols);
+    this.store = store;
+    this.store.addChangedReaderObserver(this);
+    this.store.lock.readLock().lock();
+    try {
+      openReaders(firstRow);
+    } catch (Exception ex) {
+      close();
+      IOException e = new IOException("HStoreScanner failed construction");
+      e.initCause(ex);
+      throw e;
+    } finally {
+      this.store.lock.readLock().unlock();
+    }
+  }
+  
+  /*
+   * Go open new Reader iterators and cue them at <code>firstRow</code>.
+   * Closes existing Readers if any.
+   * @param firstRow
+   * @throws IOException
+   */
+  private void openReaders(final byte [] firstRow) throws IOException {
+    if (this.readers != null) {
+      for (int i = 0; i < this.readers.length; i++) {
+        if (this.readers[i] != null) {
+          this.readers[i].close();
+        }
+      }
+    }
+    // Open our own copies of the Readers here inside in the scanner.
+    this.readers = new MapFile.Reader[this.store.getStorefiles().size()];
+    
+    // Most recent map file should be first
+    int i = readers.length - 1;
+    for(HStoreFile curHSF: store.getStorefiles().values()) {
+      readers[i--] = curHSF.getReader(store.fs, false, false);
+    }
+    
+    this.keys = new HStoreKey[readers.length];
+    this.vals = new byte[readers.length][];
+    
+    // Advance the readers to the first pos.
+    for (i = 0; i < readers.length; i++) {
+      keys[i] = new HStoreKey();
+      if (firstRow != null && firstRow.length != 0) {
+        if (findFirstRow(i, firstRow)) {
+          continue;
+        }
+      }
+      while (getNext(i)) {
+        if (columnMatch(i)) {
+          break;
+        }
+      }
+    }
+  }
+
+  /**
+   * For a particular column i, find all the matchers defined for the column.
+   * Compare the column family and column key using the matchers. The first one
+   * that matches returns true. If no matchers are successful, return false.
+   * 
+   * @param i index into the keys array
+   * @return true if any of the matchers for the column match the column family
+   * and the column key.
+   * @throws IOException
+   */
+  boolean columnMatch(int i) throws IOException {
+    return columnMatch(keys[i].getColumn());
+  }
+
+  /**
+   * Get the next set of values for this scanner.
+   * 
+   * @param key The key that matched
+   * @param results All the results for <code>key</code>
+   * @return true if a match was found
+   * @throws IOException
+   * 
+   * @see org.apache.hadoop.hbase.regionserver.InternalScanner#next(org.apache.hadoop.hbase.HStoreKey, java.util.SortedMap)
+   */
+  @Override
+  public boolean next(HStoreKey key, SortedMap<byte [], Cell> results)
+  throws IOException {
+    if (this.scannerClosed) {
+      return false;
+    }
+    this.lock.readLock().lock();
+    try {
+      // Find the next viable row label (and timestamp).
+      ViableRow viableRow = getNextViableRow();
+      
+      // Grab all the values that match this row/timestamp
+      boolean insertedItem = false;
+      if (viableRow.getRow() != null) {
+        key.setRow(viableRow.getRow());
+        key.setVersion(viableRow.getTimestamp());
+
+        for (int i = 0; i < keys.length; i++) {
+          // Fetch the data
+          while ((keys[i] != null)
+              && (Bytes.compareTo(keys[i].getRow(), viableRow.getRow()) == 0)) {
+
+            // If we are doing a wild card match or there are multiple matchers
+            // per column, we need to scan all the older versions of this row
+            // to pick up the rest of the family members
+            if(!isWildcardScanner()
+                && !isMultipleMatchScanner()
+                && (keys[i].getTimestamp() != viableRow.getTimestamp())) {
+              break;
+            }
+
+            if(columnMatch(i)) {              
+              // We only want the first result for any specific family member
+              if(!results.containsKey(keys[i].getColumn())) {
+                results.put(keys[i].getColumn(), 
+                    new Cell(vals[i], keys[i].getTimestamp()));
+                insertedItem = true;
+              }
+            }
+
+            if (!getNext(i)) {
+              closeSubScanner(i);
+            }
+          }
+
+          // Advance the current scanner beyond the chosen row, to
+          // a valid timestamp, so we're ready next time.
+          while ((keys[i] != null)
+              && ((Bytes.compareTo(keys[i].getRow(), viableRow.getRow()) <= 0)
+                  || (keys[i].getTimestamp() > this.timestamp)
+                  || (! columnMatch(i)))) {
+            getNext(i);
+          }
+        }
+      }
+      return insertedItem;
+    } finally {
+      this.lock.readLock().unlock();
+    }
+  }
+  
+  // Data stucture to hold next, viable row (and timestamp).
+  class ViableRow {
+    private final byte [] row;
+    private final long ts;
+
+    ViableRow(final byte [] r, final long t) {
+      this.row = r;
+      this.ts = t;
+    }
+
+    byte [] getRow() {
+      return this.row;
+    }
+
+    long getTimestamp() {
+      return this.ts;
+    }
+  }
+
+  /*
+   * @return An instance of <code>ViableRow</code>
+   * @throws IOException
+   */
+  private ViableRow getNextViableRow() throws IOException {
+    // Find the next viable row label (and timestamp).
+    byte [] viableRow = null;
+    long viableTimestamp = -1;
+    long now = System.currentTimeMillis();
+    long ttl = store.ttl;
+    for(int i = 0; i < keys.length; i++) {
+      // The first key that we find that matches may have a timestamp greater
+      // than the one we're looking for. We have to advance to see if there
+      // is an older version present, since timestamps are sorted descending
+      while (keys[i] != null &&
+          keys[i].getTimestamp() > this.timestamp &&
+          columnMatch(i) &&
+          getNext(i)) {
+        if (columnMatch(i)) {
+          break;
+        }
+      }
+      if((keys[i] != null)
+          // If we get here and keys[i] is not null, we already know that the
+          // column matches and the timestamp of the row is less than or equal
+          // to this.timestamp, so we do not need to test that here
+          && ((viableRow == null)
+              || (Bytes.compareTo(keys[i].getRow(), viableRow) < 0)
+              || ((Bytes.compareTo(keys[i].getRow(), viableRow) == 0)
+                  && (keys[i].getTimestamp() > viableTimestamp)))) {
+        if (ttl == HConstants.FOREVER || now < keys[i].getTimestamp() + ttl) {
+          viableRow = keys[i].getRow();
+          viableTimestamp = keys[i].getTimestamp();
+        } else {
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("getNextViableRow :" + keys[i] + ": expired, skipped");
+          }
+        }
+      }
+    }
+    return new ViableRow(viableRow, viableTimestamp);
+  }
+
+  /**
+   * The user didn't want to start scanning at the first row. This method
+   * seeks to the requested row.
+   *
+   * @param i which iterator to advance
+   * @param firstRow seek to this row
+   * @return true if this is the first row or if the row was not found
+   */
+  private boolean findFirstRow(int i, final byte [] firstRow) throws IOException {
+    ImmutableBytesWritable ibw = new ImmutableBytesWritable();
+    HStoreKey firstKey
+      = (HStoreKey)readers[i].getClosest(new HStoreKey(firstRow), ibw);
+    if (firstKey == null) {
+      // Didn't find it. Close the scanner and return TRUE
+      closeSubScanner(i);
+      return true;
+    }
+    long now = System.currentTimeMillis();
+    long ttl = store.ttl;
+    if (ttl != HConstants.FOREVER && now >= firstKey.getTimestamp() + ttl) {
+      // Didn't find it. Close the scanner and return TRUE
+      closeSubScanner(i);
+      return true;
+    }
+    this.vals[i] = ibw.get();
+    keys[i].setRow(firstKey.getRow());
+    keys[i].setColumn(firstKey.getColumn());
+    keys[i].setVersion(firstKey.getTimestamp());
+    return columnMatch(i);
+  }
+  
+  /**
+   * Get the next value from the specified reader.
+   * 
+   * @param i which reader to fetch next value from
+   * @return true if there is more data available
+   */
+  private boolean getNext(int i) throws IOException {
+    boolean result = false;
+    ImmutableBytesWritable ibw = new ImmutableBytesWritable();
+    long now = System.currentTimeMillis();
+    long ttl = store.ttl;
+    while (true) {
+      if (!readers[i].next(keys[i], ibw)) {
+        closeSubScanner(i);
+        break;
+      }
+      if (keys[i].getTimestamp() <= this.timestamp) {
+        if (ttl == HConstants.FOREVER || now < keys[i].getTimestamp() + ttl) {
+          vals[i] = ibw.get();
+          result = true;
+          break;
+        }
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("getNext: " + keys[i] + ": expired, skipped");
+        }
+      }
+    }
+    return result;
+  }
+  
+  /** Close down the indicated reader. */
+  private void closeSubScanner(int i) {
+    try {
+      if(readers[i] != null) {
+        try {
+          readers[i].close();
+        } catch(IOException e) {
+          LOG.error(store.storeName + " closing sub-scanner", e);
+        }
+      }
+      
+    } finally {
+      readers[i] = null;
+      keys[i] = null;
+      vals[i] = null;
+    }
+  }
+
+  /** Shut it down! */
+  public void close() {
+    if (!this.scannerClosed) {
+      this.store.deleteChangedReaderObserver(this);
+      try {
+        for(int i = 0; i < readers.length; i++) {
+          if(readers[i] != null) {
+            try {
+              readers[i].close();
+            } catch(IOException e) {
+              LOG.error(store.storeName + " closing scanner", e);
+            }
+          }
+        }
+        
+      } finally {
+        this.scannerClosed = true;
+      }
+    }
+  }
+
+  // Implementation of ChangedReadersObserver
+  
+  /** {@inheritDoc} */
+  public void updateReaders() throws IOException {
+    this.lock.writeLock().lock();
+    try {
+      // The keys are currently lined up at the next row to fetch.  Pass in
+      // the current row as 'first' row and readers will be opened and cue'd
+      // up so future call to next will start here.
+      ViableRow viableRow = getNextViableRow();
+      openReaders(viableRow.getRow());
+      LOG.debug("Replaced Scanner Readers at row " +
+        Bytes.toString(viableRow.getRow()));
+    } finally {
+      this.lock.writeLock().unlock();
+    }
+  }
+}
--- a/src/java/org/apache/hadoop/hbase/util/migration/v5/package.html
+++ b/src/java/org/apache/hadoop/hbase/util/migration/v5/package.html
@ -0,0 +1,36 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<head />
+<body bgcolor="white">
+Package of classes used instantiating objects written with pre-version 5
+versions of HBase.
+
+Under the <code>hbase.rootdir</code>, a file named <code>hbase.version</code>
+holds the version number for the data persisted by HBase.  The version number
+is upped every time a change is made in HBase on-filesystem formats.  Version
+0.2.0 of HBase shipped with an on-filesystem version of <code>5</code>.  This
+package holds classes from previous to version 5 used during the migration of
+an HBase instance up to version 5.  See
+<a href="http://wiki.apache.org/hadoop/Hbase/HowToMigrate">How To Migrate</a>
+for more on the migration of HBase across versions and for notes on design
+of the HBase migration system.
+</body>
+</html>
--- a/src/test/org/apache/hadoop/hbase/TestTable.java
+++ b/src/test/org/apache/hadoop/hbase/TestTable.java
@ -23,6 +23,7 @@ import java.util.concurrent.atomic.AtomicInteger;

 import org.apache.hadoop.hbase.client.HBaseAdmin;
 import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.io.BatchUpdate;

 /** Tests table creation restrictions*/
 public class TestTable extends HBaseClusterTestCase {
@ -123,4 +124,26 @@ public class TestTable extends HBaseClusterTestCase {
    @SuppressWarnings("unused")
    HTable table = new HTable(conf, getName());
  }
+
+  /**
+   * Test read only tables
+   */
+  public void testReadOnlyTable() throws Exception {
+    HBaseAdmin admin = new HBaseAdmin(conf);
+    HTableDescriptor desc = new HTableDescriptor(getName());
+    byte[] colName = "test:".getBytes();
+    desc.addFamily(new HColumnDescriptor(colName));
+    desc.setReadOnly(true);
+    admin.createTable(desc);
+    HTable table = new HTable(conf, getName());
+    try {
+      byte[] value = "somedata".getBytes();
+      BatchUpdate update = new BatchUpdate();
+      update.put(colName, value);
+      table.commit(update);
+      fail("BatchUpdate on read only table succeeded");  
+    } catch (Exception e) {
+      // expected
+    }
+  }
 }
--- a/src/test/org/apache/hadoop/hbase/client/TestHTable.java
+++ b/src/test/org/apache/hadoop/hbase/client/TestHTable.java
@ -21,6 +21,9 @@ package org.apache.hadoop.hbase.client;

 import java.io.IOException;
 import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hbase.HBaseClusterTestCase;
 import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.HConstants;
@ -31,11 +34,14 @@ import org.apache.hadoop.hbase.io.Cell;
 import org.apache.hadoop.hbase.io.RowResult;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HRegionLocation;

 /**
 * Tests HTable
 */
 public class TestHTable extends HBaseClusterTestCase implements HConstants {
+  private static final Log LOG = LogFactory.getLog(TestHTable.class);
  private static final HColumnDescriptor column =
    new HColumnDescriptor(COLUMN_FAMILY);

@ -45,6 +51,9 @@ public class TestHTable extends HBaseClusterTestCase implements HConstants {
  
  private static final byte [] row = Bytes.toBytes("row");
 
+  private static final byte [] attrName = Bytes.toBytes("TESTATTR");
+  private static final byte [] attrValue = Bytes.toBytes("somevalue");
+
  /**
   * the test
   * @throws IOException
@ -123,7 +132,57 @@ public class TestHTable extends HBaseClusterTestCase implements HConstants {
    // We can still access A through newA because it has the table information
    // cached. And if it needs to recalibrate, that will cause the information
    // to be reloaded.
-    
+
+    // Test user metadata
+
+    try {
+      // make a modifiable descriptor
+      HTableDescriptor desc = new HTableDescriptor(a.getMetadata());
+      // offline the table
+      admin.disableTable(tableAname);
+      // add a user attribute to HTD
+      desc.setValue(attrName, attrValue);
+      // add a user attribute to HCD
+      for (HColumnDescriptor c: desc.getFamilies())
+        c.setValue(attrName, attrValue);
+      // update metadata for all regions of this table
+      admin.modifyTableMeta(tableAname, desc);
+      // enable the table
+      admin.enableTable(tableAname);
+
+      // Use a metascanner to avoid client API caching (HConnection has a
+      // metadata cache)
+      MetaScanner.MetaScannerVisitor visitor = new MetaScanner.MetaScannerVisitor() {
+          public boolean processRow(
+              @SuppressWarnings("unused") RowResult rowResult,
+              HRegionLocation regionLocation,
+              HRegionInfo info) {
+            LOG.info("visiting " + regionLocation.toString());
+            HTableDescriptor desc = info.getTableDesc();
+            if (Bytes.compareTo(desc.getName(), tableAname) == 0) {
+              // check HTD attribute
+              byte[] value = desc.getValue(attrName);
+              if (value == null)
+                fail("missing HTD attribute value");
+              if (Bytes.compareTo(value, attrValue) != 0)
+                fail("HTD attribute value is incorrect");
+              // check HCD attribute
+              for (HColumnDescriptor c: desc.getFamilies()) {
+                value = c.getValue(attrName);
+                if (value == null)
+                  fail("missing HCD attribute value");
+                if (Bytes.compareTo(value, attrValue) != 0)
+                  fail("HCD attribute value is incorrect");
+              }
+            }
+            return true;
+          }
+        };
+        MetaScanner.metaScan(conf, visitor);
+    } catch (Exception e) {
+      e.printStackTrace();
+      fail();
+    }
  }
  
  /**