HBASE-947 [Optimization] Major compaction should remove deletes as well as the deleted cell

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@718430 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2008-11-17 23:08:56 +00:00
parent 6aec3c3e37
commit 9b4ca73d59
3 changed files with 116 additions and 60 deletions

View File

@ -124,6 +124,8 @@ Release 0.19.0 - Unreleased
HBASE-999 Up versions on historian and keep history of deleted regions for a
while rather than delete immediately
HBASE-938 Major compaction period is not checked periodically
HBASE-947 [Optimization] Major compaction should remove deletes as well as
the deleted cell
NEW FEATURES
HBASE-875 Use MurmurHash instead of JenkinsHash [in bloomfilters]

View File

@ -435,7 +435,6 @@ public class HStore implements HConstants {
curfile = new HStoreFile(conf, fs, basedir, this.info,
family.getName(), fid, reference);
long storeSeqId = -1;
boolean majorCompaction = false;
try {
storeSeqId = curfile.loadInfo(fs);
if (storeSeqId > this.maxSeqId) {
@ -1043,12 +1042,46 @@ public class HStore implements HConstants {
return nrows;
}
/*
* @param r List to reverse
* @return A reversed array of content of <code>readers</code>
*/
private MapFile.Reader [] reverse(final List<MapFile.Reader> r) {
List<MapFile.Reader> copy = new ArrayList<MapFile.Reader>(r);
Collections.reverse(copy);
return copy.toArray(new MapFile.Reader[0]);
}
/*
* @param rdrs List of readers
* @param keys Current keys
* @param done Which readers are done
* @return The lowest current key in passed <code>rdrs</code>
*/
private int getLowestKey(final MapFile.Reader [] rdrs,
final HStoreKey [] keys, final boolean [] done) {
int lowestKey = -1;
for (int i = 0; i < rdrs.length; i++) {
if (done[i]) {
continue;
}
if (lowestKey < 0) {
lowestKey = i;
} else {
if (keys[i].compareTo(keys[lowestKey]) < 0) {
lowestKey = i;
}
}
}
return lowestKey;
}
/*
* Compact a list of MapFile.Readers into MapFile.Writer.
*
* We work by iterating through the readers in parallel. We always increment
* the lowest-ranked one. Updates to a single row/column will appear ranked
* by timestamp.
* We work by iterating through the readers in parallel looking at newest
* store file first. We always increment the lowest-ranked one. Updates to a
* single row/column will appear ranked by timestamp.
* @param compactedOut Where to write compaction.
* @param pReaders List of readers sorted oldest to newest.
* @param majorCompaction True to force a major compaction regardless of
@ -1058,14 +1091,12 @@ public class HStore implements HConstants {
private void compact(final MapFile.Writer compactedOut,
final List<MapFile.Reader> pReaders, final boolean majorCompaction)
throws IOException {
// Reverse order so newest is first.
List<MapFile.Reader> copy = new ArrayList<MapFile.Reader>(pReaders);
Collections.reverse(copy);
MapFile.Reader[] rdrs = copy.toArray(new MapFile.Reader[0]);
// Reverse order so newest store file is first.
MapFile.Reader[] rdrs = reverse(pReaders);
try {
HStoreKey[] keys = new HStoreKey[rdrs.length];
ImmutableBytesWritable[] vals = new ImmutableBytesWritable[rdrs.length];
boolean[] done = new boolean[rdrs.length];
HStoreKey [] keys = new HStoreKey[rdrs.length];
ImmutableBytesWritable [] vals = new ImmutableBytesWritable[rdrs.length];
boolean [] done = new boolean[rdrs.length];
for(int i = 0; i < rdrs.length; i++) {
keys[i] = new HStoreKey(HConstants.EMPTY_BYTE_ARRAY, this.info);
vals[i] = new ImmutableBytesWritable();
@ -1085,56 +1116,67 @@ public class HStore implements HConstants {
long now = System.currentTimeMillis();
int timesSeen = 0;
byte [] lastRow = null;
byte [] lastColumn = null;
HStoreKey lastSeen = new HStoreKey();
HStoreKey lastDelete = null;
while (numDone < done.length) {
int smallestKey = -1;
for (int i = 0; i < rdrs.length; i++) {
if (done[i]) {
continue;
}
if (smallestKey < 0) {
smallestKey = i;
} else {
if (keys[i].compareTo(keys[smallestKey]) < 0) {
smallestKey = i;
}
}
}
HStoreKey sk = keys[smallestKey];
if (HStoreKey.equalsTwoRowKeys(info,lastRow, sk.getRow())
&& Bytes.equals(lastColumn, sk.getColumn())) {
// Get lowest key in all store files.
int lowestKey = getLowestKey(rdrs, keys, done);
HStoreKey sk = keys[lowestKey];
// If its same row and column as last key, increment times seen.
if (HStoreKey.equalsTwoRowKeys(info, lastSeen.getRow(), sk.getRow())
&& Bytes.equals(lastSeen.getColumn(), sk.getColumn())) {
timesSeen++;
// Reset last delete if not exact timestamp -- lastDelete only stops
// exactly the same key making it out to the compacted store file.
if (lastDelete != null &&
lastDelete.getTimestamp() != sk.getTimestamp()) {
lastDelete = null;
}
} else {
timesSeen = 1;
lastDelete = null;
}
// Don't write empty rows or columns. Only remove cells on major
// compaction. Remove if expired of > VERSIONS
if (sk.getRow().length != 0 && sk.getColumn().length != 0) {
boolean expired = false;
if (!majorCompaction ||
(timesSeen <= family.getMaxVersions() &&
!(expired = isExpired(sk, ttl, now)))) {
compactedOut.append(sk, vals[smallestKey]);
}
if (expired) {
// HBASE-855 remove one from timesSeen because it did not make it
// past expired check -- don't count against max versions.
timesSeen--;
ImmutableBytesWritable value = vals[lowestKey];
if (!majorCompaction) {
// Write out all values if not a major compaction.
compactedOut.append(sk, value);
} else {
boolean expired = false;
boolean deleted = false;
if (timesSeen <= family.getMaxVersions() &&
!(expired = isExpired(sk, ttl, now))) {
// If this value key is same as a deleted key, skip
if (lastDelete != null && sk.equals(lastDelete)) {
deleted = true;
} else if (HLogEdit.isDeleted(value.get())) {
// If a deleted value, skip
deleted = true;
lastDelete = new HStoreKey(sk);
} else {
compactedOut.append(sk, vals[lowestKey]);
}
}
if (expired || deleted) {
// HBASE-855 remove one from timesSeen because it did not make it
// past expired check -- don't count against max versions.
timesSeen--;
}
}
}
// Update last-seen items
lastRow = sk.getRow();
lastColumn = sk.getColumn();
lastSeen = new HStoreKey(sk);
// Advance the smallest key. If that reader's all finished, then
// mark it as done.
if (!rdrs[smallestKey].next(keys[smallestKey], vals[smallestKey])) {
done[smallestKey] = true;
rdrs[smallestKey].close();
rdrs[smallestKey] = null;
if (!rdrs[lowestKey].next(keys[lowestKey], vals[lowestKey])) {
done[lowestKey] = true;
rdrs[lowestKey].close();
rdrs[lowestKey] = null;
numDone++;
}
}

View File

@ -103,34 +103,39 @@ public class TestCompaction extends HBaseTestCase {
assertTrue(cellValues.length == 3);
r.flushcache();
r.compactStores();
// Always 3 version if that is what max versions is.
// Always 3 versions if that is what max versions is.
byte [] secondRowBytes = START_KEY.getBytes(HConstants.UTF8_ENCODING);
// Increment the least significant character so we get to next row.
secondRowBytes[START_KEY_BYTES.length - 1]++;
cellValues = r.get(secondRowBytes, COLUMN_FAMILY_TEXT, -1, 100/*Too many*/);
LOG.info("Count of " + Bytes.toString(secondRowBytes) + ": " + cellValues.length);
LOG.info("Count of " + Bytes.toString(secondRowBytes) + ": " +
cellValues.length);
assertTrue(cellValues.length == 3);
// Now add deletes to memcache and then flush it. That will put us over
// the compaction threshold of 3 store files. Compacting these store files
// should result in a compacted store file that has no references to the
// deleted row.
r.deleteAll(STARTROW, COLUMN_FAMILY_TEXT, System.currentTimeMillis(), null);
r.deleteAll(secondRowBytes, COLUMN_FAMILY_TEXT, System.currentTimeMillis(),
null);
// Assert deleted.
assertNull(r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
assertNull(r.get(secondRowBytes, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
r.flushcache();
assertNull(r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
assertNull(r.get(secondRowBytes, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
// Add a bit of data and flush. Start adding at 'bbb'.
createSmallerStoreFile(this.r);
r.flushcache();
// Assert that the first row is still deleted.
cellValues = r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/);
assertNull(r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
// Assert that the second row is still deleted.
cellValues = r.get(secondRowBytes, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/);
assertNull(r.get(secondRowBytes, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
// Force major compaction.
r.compactStores(true);
assertEquals(r.getStore(COLUMN_FAMILY_TEXT).getStorefiles().size(), 1);
assertNull(r.get(STARTROW, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
// Make sure the store files do have some 'aaa' keys in them.
assertNull(r.get(secondRowBytes, COLUMN_FAMILY_TEXT, -1, 100 /*Too many*/));
// Make sure the store files do have some 'aaa' keys in them -- exactly 3.
// Also, that compacted store files do not have any secondRowBytes because
// they were deleted.
int count = 0;
boolean containsStartRow = false;
for (MapFile.Reader reader: this.r.stores.
get(Bytes.mapKey(COLUMN_FAMILY_TEXT_MINUS_COLON)).getReaders()) {
@ -140,14 +145,16 @@ public class TestCompaction extends HBaseTestCase {
while(reader.next(key, val)) {
if (Bytes.equals(key.getRow(), STARTROW)) {
containsStartRow = true;
break;
count++;
} else {
// After major compaction, should be none of these rows in compacted
// file.
assertFalse(Bytes.equals(key.getRow(), secondRowBytes));
}
}
if (containsStartRow) {
break;
}
}
assertTrue(containsStartRow);
assertTrue(count == 3);
// Do a simple TTL test.
final int ttlInSeconds = 1;
for (HStore store: this.r.stores.values()) {
@ -155,6 +162,11 @@ public class TestCompaction extends HBaseTestCase {
}
Thread.sleep(ttlInSeconds * 1000);
r.compactStores(true);
count = count();
assertTrue(count == 0);
}
private int count() throws IOException {
int count = 0;
for (MapFile.Reader reader: this.r.stores.
get(Bytes.mapKey(COLUMN_FAMILY_TEXT_MINUS_COLON)).getReaders()) {
@ -165,7 +177,7 @@ public class TestCompaction extends HBaseTestCase {
count++;
}
}
assertTrue(count == 0);
return count;
}
private void createStoreFile(final HRegion region) throws IOException {