HBASE-855 compaction can return less versions then we should in some cases

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@690630 13f79535-47bb-0310-9956-ffa450edef68
2008-08-31 03:35:33 +00:00 · 2008-08-31 03:35:33 +00:00 · 15406ddda7
parent bc577ede4a
commit 15406ddda7
4 changed files with 26 additions and 41 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -49,6 +49,8 @@ Release 0.18.0 - Unreleased
               (Toby White via Stack)
   HBASE-854   hbase-841 broke build on hudson? - makes sure that proxies are
               closed. (Andrew Purtell via Jim Kellerman)
+   HBASE-855   compaction can return less versions then we should in some cases
+               (Billy Pearson via Stack)

  IMPROVEMENTS
   HBASE-801  When a table haven't disable, shell could response in a "user
--- a/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++ b/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@ -858,7 +858,7 @@ public class HRegion implements HConstants {
    return compactStores(false);
  }

-  /**
+  /*
   * Called by compaction thread and after region is opened to compact the
   * HStores if necessary.
   *
@ -873,7 +873,8 @@ public class HRegion implements HConstants {
   * @return mid key if split is needed
   * @throws IOException
   */
-  private byte [] compactStores(final boolean majorCompaction) throws IOException {
+  byte [] compactStores(final boolean majorCompaction)
+  throws IOException {
    splitsAndClosesLock.readLock().lock();
    try {
      byte [] midKey = null;
--- a/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
+++ b/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
@ -821,7 +821,8 @@ public class HStore implements HConstants {
        // size of the second, skip the largest, and continue to next...,
        // until we meet the compactionThreshold limit.
        for (point = 0; point < compactionThreshold - 1; point++) {
-          if (fileSizes[point] < fileSizes[point + 1] * 2 && maxFilesToCompact < (countOfFiles - point)) {
+          if (fileSizes[point] < fileSizes[point + 1] * 2 &&
+              maxFilesToCompact < (countOfFiles - point)) {
            break;
          }
          skipped += fileSizes[point];
@ -917,10 +918,10 @@ public class HStore implements HConstants {
  private void compact(final MapFile.Writer compactedOut,
      final List<MapFile.Reader> pReaders, final boolean majorCompaction)
  throws IOException {
-    // Reverse order so we newest is first.
+    // Reverse order so newest is first.
    List<MapFile.Reader> copy = new ArrayList<MapFile.Reader>(pReaders);
    Collections.reverse(copy);
-    MapFile.Reader[] rdrs = pReaders.toArray(new MapFile.Reader[copy.size()]);
+    MapFile.Reader[] rdrs = copy.toArray(new MapFile.Reader[0]);
    try {
      HStoreKey[] keys = new HStoreKey[rdrs.length];
      ImmutableBytesWritable[] vals = new ImmutableBytesWritable[rdrs.length];
@ -947,11 +948,6 @@ public class HStore implements HConstants {
      byte [] lastRow = null;
      byte [] lastColumn = null;
      while (numDone < done.length) {
-        // Find the reader with the smallest key.  If two files have same key
-        // but different values -- i.e. one is delete and other is non-delete
-        // value -- we will find the first, the one that was written later and
-        // therefore the one whose value should make it out to the compacted
-        // store file.
        int smallestKey = -1;
        for (int i = 0; i < rdrs.length; i++) {
          if (done[i]) {
@ -970,7 +966,7 @@ public class HStore implements HConstants {
            && Bytes.equals(lastColumn, sk.getColumn())) {
          timesSeen++;
        } else {
-          timesSeen = 0;
+          timesSeen = 1;
        }

        // Added majorCompaction here to make sure all versions make it to 
@ -980,10 +976,13 @@ public class HStore implements HConstants {
          // Keep old versions until we have maxVersions worth.
          // Then just skip them.
          if (sk.getRow().length != 0 && sk.getColumn().length != 0) {
-            // Only write out objects which have a non-zero length key and
-            // value
+            // Only write out objects with non-zero length key and value
            if (!isExpired(sk, ttl, now)) {
              compactedOut.append(sk, vals[smallestKey]);
+            } else {
+              // HBASE-855 remove one from timesSeen because it did not make it
+              // past expired check -- don't count against max versions.
+              timesSeen--;
            }
          }
        }
--- a/src/test/org/apache/hadoop/hbase/regionserver/TestCompaction.java
+++ b/src/test/org/apache/hadoop/hbase/regionserver/TestCompaction.java
@ -96,8 +96,7 @@ public class TestCompaction extends HBaseTestCase {
    }
    // Add more content.  Now there are about 5 versions of each column.
    // Default is that there only 3 (MAXVERSIONS) versions allowed per column.
-    // Assert > 3 and then after compaction, assert that only 3 versions
-    // available.
+    // Assert == 3 when we ask for versions.
    addContent(new HRegionIncommon(r), Bytes.toString(COLUMN_FAMILY));
    Cell[] cellValues = 
      r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/);
@ -105,49 +104,33 @@ public class TestCompaction extends HBaseTestCase {
    assertTrue(cellValues.length == 3);
    r.flushcache();
    r.compactStores();
-    assertEquals(r.getStore(COLUMN_FAMILY_TEXT).getStorefiles().size(), 1);
-    // Now assert that there are 4 versions of a record only: thats the
-    // 3 versions that should be in the compacted store and then the one more
-    // we added when we flushed. But could be 3 only if the flush happened
-    // before the compaction started though we tried to have the threads run
-    // concurrently (On hudson this happens).
+    // Always 3 version if that is what max versions is.
    byte [] secondRowBytes = START_KEY.getBytes(HConstants.UTF8_ENCODING);
    // Increment the least significant character so we get to next row.
    secondRowBytes[START_KEY_BYTES.length - 1]++;
    cellValues = r.get(secondRowBytes, COLUMN_FAMILY_TEXT, -1, 100/*Too many*/);
    LOG.info("Count of " + Bytes.toString(secondRowBytes) + ": " + cellValues.length);
-    // Commented out because fails on an hp+ubuntu single-processor w/ 1G and
-    // "Intel(R) Pentium(R) 4 CPU 3.20GHz" though passes on all local
-    // machines and even on hudson.  On said machine, its reporting in the
-    // LOG line above that there are 3 items in row so it should pass the
-    // below test.
-    assertTrue(cellValues.length == 3 || cellValues.length == 4);
+    assertTrue(cellValues.length == 3);

    // Now add deletes to memcache and then flush it.  That will put us over
    // the compaction threshold of 3 store files.  Compacting these store files
    // should result in a compacted store file that has no references to the
    // deleted row.
-    r.deleteAll(STARTROW, COLUMN_FAMILY_TEXT, System.currentTimeMillis(),null);
-    // Now, before compacting, remove all instances of the first row so can
-    // verify that it is removed as we compact.
-    // Assert all delted.
+    r.deleteAll(STARTROW, COLUMN_FAMILY_TEXT, System.currentTimeMillis(), null);
+    // Assert deleted.
    assertNull(r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
    r.flushcache();
-    assertEquals(r.getStore(COLUMN_FAMILY_TEXT).getStorefiles().size(), 2);
    assertNull(r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
-    // Add a bit of data and flush it so we for sure have the compaction limit
-    // for store files.  Usually by this time we will have but if compaction
-    // included the flush that ran 'concurrently', there may be just the
-    // compacted store and the flush above when we added deletes.  Add more
-    // content to be certain.
+    // Add a bit of data and flush.  Start adding at 'bbb'.
    createSmallerStoreFile(this.r);
    r.flushcache();
-    assertEquals(r.getStore(COLUMN_FAMILY_TEXT).getStorefiles().size(), 3);
-    r.compactStores();
-    assertEquals(r.getStore(COLUMN_FAMILY_TEXT).getStorefiles().size(), 2);
    // Assert that the first row is still deleted.
    cellValues = r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/);
-    assertNull(cellValues);
+    assertNull(r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
+    // Force major compaction.
+    r.compactStores(true);
+    assertEquals(r.getStore(COLUMN_FAMILY_TEXT).getStorefiles().size(), 1);
+    assertNull(r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
    // Make sure the store files do have some 'aaa' keys in them.
    boolean containsStartRow = false;
    for (MapFile.Reader reader: this.r.stores.