HBASE-855 compaction can return less versions then we should in some cases

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@690630 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2008-08-31 03:35:33 +00:00
parent bc577ede4a
commit 15406ddda7
4 changed files with 26 additions and 41 deletions

View File

@ -49,6 +49,8 @@ Release 0.18.0 - Unreleased
(Toby White via Stack)
HBASE-854 hbase-841 broke build on hudson? - makes sure that proxies are
closed. (Andrew Purtell via Jim Kellerman)
HBASE-855 compaction can return less versions then we should in some cases
(Billy Pearson via Stack)
IMPROVEMENTS
HBASE-801 When a table haven't disable, shell could response in a "user

View File

@ -858,7 +858,7 @@ public class HRegion implements HConstants {
return compactStores(false);
}
/**
/*
* Called by compaction thread and after region is opened to compact the
* HStores if necessary.
*
@ -873,7 +873,8 @@ public class HRegion implements HConstants {
* @return mid key if split is needed
* @throws IOException
*/
private byte [] compactStores(final boolean majorCompaction) throws IOException {
byte [] compactStores(final boolean majorCompaction)
throws IOException {
splitsAndClosesLock.readLock().lock();
try {
byte [] midKey = null;

View File

@ -821,7 +821,8 @@ public class HStore implements HConstants {
// size of the second, skip the largest, and continue to next...,
// until we meet the compactionThreshold limit.
for (point = 0; point < compactionThreshold - 1; point++) {
if (fileSizes[point] < fileSizes[point + 1] * 2 && maxFilesToCompact < (countOfFiles - point)) {
if (fileSizes[point] < fileSizes[point + 1] * 2 &&
maxFilesToCompact < (countOfFiles - point)) {
break;
}
skipped += fileSizes[point];
@ -917,10 +918,10 @@ public class HStore implements HConstants {
private void compact(final MapFile.Writer compactedOut,
final List<MapFile.Reader> pReaders, final boolean majorCompaction)
throws IOException {
// Reverse order so we newest is first.
// Reverse order so newest is first.
List<MapFile.Reader> copy = new ArrayList<MapFile.Reader>(pReaders);
Collections.reverse(copy);
MapFile.Reader[] rdrs = pReaders.toArray(new MapFile.Reader[copy.size()]);
MapFile.Reader[] rdrs = copy.toArray(new MapFile.Reader[0]);
try {
HStoreKey[] keys = new HStoreKey[rdrs.length];
ImmutableBytesWritable[] vals = new ImmutableBytesWritable[rdrs.length];
@ -947,11 +948,6 @@ public class HStore implements HConstants {
byte [] lastRow = null;
byte [] lastColumn = null;
while (numDone < done.length) {
// Find the reader with the smallest key. If two files have same key
// but different values -- i.e. one is delete and other is non-delete
// value -- we will find the first, the one that was written later and
// therefore the one whose value should make it out to the compacted
// store file.
int smallestKey = -1;
for (int i = 0; i < rdrs.length; i++) {
if (done[i]) {
@ -970,7 +966,7 @@ public class HStore implements HConstants {
&& Bytes.equals(lastColumn, sk.getColumn())) {
timesSeen++;
} else {
timesSeen = 0;
timesSeen = 1;
}
// Added majorCompaction here to make sure all versions make it to
@ -980,10 +976,13 @@ public class HStore implements HConstants {
// Keep old versions until we have maxVersions worth.
// Then just skip them.
if (sk.getRow().length != 0 && sk.getColumn().length != 0) {
// Only write out objects which have a non-zero length key and
// value
// Only write out objects with non-zero length key and value
if (!isExpired(sk, ttl, now)) {
compactedOut.append(sk, vals[smallestKey]);
} else {
// HBASE-855 remove one from timesSeen because it did not make it
// past expired check -- don't count against max versions.
timesSeen--;
}
}
}

View File

@ -96,8 +96,7 @@ public class TestCompaction extends HBaseTestCase {
}
// Add more content. Now there are about 5 versions of each column.
// Default is that there only 3 (MAXVERSIONS) versions allowed per column.
// Assert > 3 and then after compaction, assert that only 3 versions
// available.
// Assert == 3 when we ask for versions.
addContent(new HRegionIncommon(r), Bytes.toString(COLUMN_FAMILY));
Cell[] cellValues =
r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/);
@ -105,49 +104,33 @@ public class TestCompaction extends HBaseTestCase {
assertTrue(cellValues.length == 3);
r.flushcache();
r.compactStores();
assertEquals(r.getStore(COLUMN_FAMILY_TEXT).getStorefiles().size(), 1);
// Now assert that there are 4 versions of a record only: thats the
// 3 versions that should be in the compacted store and then the one more
// we added when we flushed. But could be 3 only if the flush happened
// before the compaction started though we tried to have the threads run
// concurrently (On hudson this happens).
// Always 3 version if that is what max versions is.
byte [] secondRowBytes = START_KEY.getBytes(HConstants.UTF8_ENCODING);
// Increment the least significant character so we get to next row.
secondRowBytes[START_KEY_BYTES.length - 1]++;
cellValues = r.get(secondRowBytes, COLUMN_FAMILY_TEXT, -1, 100/*Too many*/);
LOG.info("Count of " + Bytes.toString(secondRowBytes) + ": " + cellValues.length);
// Commented out because fails on an hp+ubuntu single-processor w/ 1G and
// "Intel(R) Pentium(R) 4 CPU 3.20GHz" though passes on all local
// machines and even on hudson. On said machine, its reporting in the
// LOG line above that there are 3 items in row so it should pass the
// below test.
assertTrue(cellValues.length == 3 || cellValues.length == 4);
assertTrue(cellValues.length == 3);
// Now add deletes to memcache and then flush it. That will put us over
// the compaction threshold of 3 store files. Compacting these store files
// should result in a compacted store file that has no references to the
// deleted row.
r.deleteAll(STARTROW, COLUMN_FAMILY_TEXT, System.currentTimeMillis(),null);
// Now, before compacting, remove all instances of the first row so can
// verify that it is removed as we compact.
// Assert all delted.
r.deleteAll(STARTROW, COLUMN_FAMILY_TEXT, System.currentTimeMillis(), null);
// Assert deleted.
assertNull(r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
r.flushcache();
assertEquals(r.getStore(COLUMN_FAMILY_TEXT).getStorefiles().size(), 2);
assertNull(r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
// Add a bit of data and flush it so we for sure have the compaction limit
// for store files. Usually by this time we will have but if compaction
// included the flush that ran 'concurrently', there may be just the
// compacted store and the flush above when we added deletes. Add more
// content to be certain.
// Add a bit of data and flush. Start adding at 'bbb'.
createSmallerStoreFile(this.r);
r.flushcache();
assertEquals(r.getStore(COLUMN_FAMILY_TEXT).getStorefiles().size(), 3);
r.compactStores();
assertEquals(r.getStore(COLUMN_FAMILY_TEXT).getStorefiles().size(), 2);
// Assert that the first row is still deleted.
cellValues = r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/);
assertNull(cellValues);
assertNull(r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
// Force major compaction.
r.compactStores(true);
assertEquals(r.getStore(COLUMN_FAMILY_TEXT).getStorefiles().size(), 1);
assertNull(r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
// Make sure the store files do have some 'aaa' keys in them.
boolean containsStartRow = false;
for (MapFile.Reader reader: this.r.stores.