diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/ExploringCompactionPolicy.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/ExploringCompactionPolicy.java index 2af46b08b29..7230ed0e1e6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/ExploringCompactionPolicy.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/ExploringCompactionPolicy.java @@ -97,9 +97,7 @@ public class ExploringCompactionPolicy extends RatioBasedCompactionPolicy { } ++optsInRatio; - // Keep if this gets rid of more files. Or the same number of files for less io. - if (potentialMatchFiles.size() > bestSelection.size() - || (potentialMatchFiles.size() == bestSelection.size() && size < bestSize)) { + if (isBetterSelection(bestSelection, bestSize, potentialMatchFiles, size, mightBeStuck)) { bestSelection = potentialMatchFiles; bestSize = size; bestStart = start; @@ -117,6 +115,22 @@ public class ExploringCompactionPolicy extends RatioBasedCompactionPolicy { return new ArrayList(bestSelection); } + private boolean isBetterSelection(List bestSelection, + long bestSize, List selection, long size, boolean mightBeStuck) { + if (mightBeStuck && bestSize > 0 && size > 0) { + // Keep the selection that removes most files for least size. That penaltizes adding + // large files to compaction, but not small files, so we don't become totally inefficient + // (might want to tweak that in future). Also, given the current order of looking at + // permutations, prefer earlier files and smaller selection if the difference is small. + final double REPLACE_IF_BETTER_BY = 1.05; + double thresholdQuality = ((double)bestSelection.size() / bestSize) * REPLACE_IF_BETTER_BY; + return thresholdQuality < ((double)selection.size() / size); + } + // Keep if this gets rid of more files. Or the same number of files for less io. + return selection.size() > bestSelection.size() + || (selection.size() == bestSelection.size() && size < bestSize); + } + /** * Find the total size of a list of store files. * @param potentialMatchFiles StoreFile list. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDefaultCompactSelection.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDefaultCompactSelection.java index b647ff8a838..c009682111f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDefaultCompactSelection.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDefaultCompactSelection.java @@ -345,4 +345,18 @@ public class TestDefaultCompactSelection extends TestCase { compactEquals(sfCreate(999, 50, 12, 12, 1), false, true, 50, 12, 12, 1); compactEquals(sfCreate(999, 50, 12, 12, 1), 12, 12, 1); } + + public void testStuckStoreCompaction() throws IOException { + // Select the smallest compaction if the store is stuck. + compactEquals(sfCreate(99,99,99,99,99,99, 30,30,30,30), 30, 30, 30); + // If not stuck, standard policy applies. + compactEquals(sfCreate(99,99,99,99,99, 30,30,30,30), 99, 30, 30, 30, 30); + + // Add sufficiently small files to compaction, though + compactEquals(sfCreate(99,99,99,99,99,99, 30,30,30,15), 30, 30, 30, 15); + // Prefer earlier compaction to latter if the benefit is not significant + compactEquals(sfCreate(99,99,99,99, 30,26,26,29,25,25), 30, 26, 26); + // Prefer later compaction if the benefit is significant. + compactEquals(sfCreate(99,99,99,99, 27,27,27,20,20,20), 20, 20, 20); + } }