diff --git a/CHANGES.txt b/CHANGES.txt index c147c17365a..70b1bbe4061 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1128,6 +1128,8 @@ Release 0.90.0 - Unreleased TTL to allow major even if single file HBASE-3194 HBase should run on both secure and vanilla versions of Hadoop 0.20 (Gary Helmling via Stack) + HBASE-3209 HBASE-3209 : New Compaction Algorithm + (Nicolas Spiegelberg via Stack) NEW FEATURES diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java b/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java index a5d3c36b87b..ef2a27dc222 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java @@ -90,7 +90,11 @@ public class Store implements HeapSize { // ttl in milliseconds. protected long ttl; private long majorCompactionTime; - private int maxFilesToCompact; + private final int maxFilesToCompact; + private final long minCompactSize; + // compactRatio: double on purpose! Float.MAX < Long.MAX < Double.MAX + // With float, java will downcast your long to float for comparisons (bad) + private double compactRatio; private long lastCompactSize = 0; /* how many bytes to write between status checks */ static int closeCheckInterval = 0; @@ -174,8 +178,8 @@ public class Store implements HeapSize { // By default, we compact if an HStore has more than // MIN_COMMITS_FOR_COMPACTION map files - this.compactionThreshold = - conf.getInt("hbase.hstore.compactionThreshold", 3); + this.compactionThreshold = Math.max(2, + conf.getInt("hbase.hstore.compactionThreshold", 3)); // Check if this is in-memory store this.inMemory = family.isInMemory(); @@ -193,6 +197,9 @@ public class Store implements HeapSize { this.majorCompactionTime = getNextMajorCompactTime(); this.maxFilesToCompact = conf.getInt("hbase.hstore.compaction.max", 10); + this.minCompactSize = this.region.memstoreFlushSize * 3 / 2; // +50% pad + this.compactRatio = conf.getFloat("hbase.hstore.compaction.ratio", 1.2F); + if (Store.closeCheckInterval == 0) { Store.closeCheckInterval = conf.getInt( "hbase.hstore.close.check.interval", 10*1000*1000 /* 10 MB */); @@ -595,13 +602,13 @@ public class Store implements HeapSize { *
We don't want to hold the structureLock for the whole time, as a compact()
* can be lengthy and we want to allow cache-flushes during this period.
*
- * @param mc True to force a major compaction regardless of thresholds
+ * @param forceMajor True to force a major compaction regardless of thresholds
* @return row to split around if a split is needed, null otherwise
* @throws IOException
*/
- StoreSize compact(final boolean mc) throws IOException {
+ StoreSize compact(final boolean forceMajor) throws IOException {
boolean forceSplit = this.region.shouldSplit(false);
- boolean majorcompaction = mc;
+ boolean majorcompaction = forceMajor;
synchronized (compactLock) {
this.lastCompactSize = 0;
@@ -612,9 +619,6 @@ public class Store implements HeapSize {
return null;
}
- // Max-sequenceID is the last key of the storefiles TreeMap
- long maxId = StoreFile.getMaxSequenceIdInList(storefiles);
-
// Check to see if we need to do a major compaction on this region.
// If so, change doMajorCompaction to true to skip the incremental
// compacting below. Only check if doMajorCompaction is not true.
@@ -628,73 +632,104 @@ public class Store implements HeapSize {
return checkSplit(forceSplit);
}
- // HBASE-745, preparing all store file sizes for incremental compacting
- // selection.
+ /* get store file sizes for incremental compacting selection.
+ * normal skew:
+ *
+ * older ----> newer
+ * _
+ * | | _
+ * | | | | _
+ * --|-|- |-|- |-|---_-------_------- minCompactSize
+ * | | | | | | | | _ | |
+ * | | | | | | | | | | | |
+ * | | | | | | | | | | | |
+ */
int countOfFiles = filesToCompact.size();
- long totalSize = 0;
long [] fileSizes = new long[countOfFiles];
- long skipped = 0;
- int point = 0;
- for (int i = 0; i < countOfFiles; i++) {
+ long [] sumSize = new long[countOfFiles];
+ for (int i = countOfFiles-1; i >= 0; --i) {
StoreFile file = filesToCompact.get(i);
Path path = file.getPath();
if (path == null) {
- LOG.warn("Path is null for " + file);
+ LOG.error("Path is null for " + file);
return null;
}
StoreFile.Reader r = file.getReader();
if (r == null) {
- LOG.warn("StoreFile " + file + " has a null Reader");
+ LOG.error("StoreFile " + file + " has a null Reader");
return null;
}
- long len = file.getReader().length();
- fileSizes[i] = len;
- totalSize += len;
+ fileSizes[i] = file.getReader().length();
+ // calculate the sum of fileSizes[i,i+maxFilesToCompact-1) for algo
+ int tooFar = i + this.maxFilesToCompact - 1;
+ sumSize[i] = fileSizes[i]
+ + ((i+1 < countOfFiles) ? sumSize[i+1] : 0)
+ - ((tooFar < countOfFiles) ? fileSizes[tooFar] : 0);
}
+ long totalSize = 0;
if (!majorcompaction && !references) {
- // Here we select files for incremental compaction.
- // The rule is: if the largest(oldest) one is more than twice the
- // size of the second, skip the largest, and continue to next...,
- // until we meet the compactionThreshold limit.
-
- // A problem with the above heuristic is that we could go through all of
- // filesToCompact and the above condition could hold for all files and
- // we'd end up with nothing to compact. To protect against this, we'll
- // compact the tail -- up to the last 4 files -- of filesToCompact
- // regardless.
- int tail = Math.min(countOfFiles, 4);
- for (point = 0; point < (countOfFiles - tail); point++) {
- if (((fileSizes[point] < fileSizes[point + 1] * 2) &&
- (countOfFiles - point) <= maxFilesToCompact)) {
- break;
- }
- skipped += fileSizes[point];
+ // we're doing a minor compaction, let's see what files are applicable
+ int start = 0;
+ double r = this.compactRatio;
+
+ /* Start at the oldest file and stop when you find the first file that
+ * meets compaction criteria:
+ * (1) a recently-flushed, small file (i.e. <= minCompactSize)
+ * OR
+ * (2) within the compactRatio of sum(newer_files)
+ * Given normal skew, any newer files will also meet this criteria
+ *
+ * Additional Note:
+ * If fileSizes.size() >> maxFilesToCompact, we will recurse on
+ * compact(). Consider the oldest files first to avoid a
+ * situation where we always compact [end-threshold,end). Then, the
+ * last file becomes an aggregate of the previous compactions.
+ */
+ while(countOfFiles - start >= this.compactionThreshold &&
+ fileSizes[start] >
+ Math.max(minCompactSize, (long)(sumSize[start+1] * r))) {
+ ++start;
}
- filesToCompact = new ArrayList