From 3e63cd38ef0e5c70c2644322935e61e46b22263f Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Wed, 5 Feb 2020 09:51:31 -0500 Subject: [PATCH] LUCENE-9200: consistently use double (not float) math for TieredMergePolicy's decisions, to fix a corner-case bug uncovered by randomized tests --- lucene/CHANGES.txt | 3 ++ .../org/apache/lucene/index/MergePolicy.java | 2 +- .../lucene/index/TestTieredMergePolicy.java | 33 +++++++++++++------ .../lucene/index/BaseMergePolicyTestCase.java | 9 ++++- 4 files changed, 35 insertions(+), 12 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 5569d683a44..6a28efee6e5 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -81,6 +81,9 @@ Bug Fixes * LUCENE-9135: Make UniformSplit FieldMetadata counters long. (Bruno Roustant) +* LUCENE-9200: Fix TieredMergePolicy to use double (not float) math to make its merging decisions, fixing + a corner-case bug uncovered by fun randomized tests (Robert Muir, Mike McCandless) + Other --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java index dc8a1d3d885..be535a7ee34 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java @@ -558,7 +558,7 @@ public abstract class MergePolicy { long byteSize = info.sizeInBytes(); int delCount = mergeContext.numDeletesToMerge(info); assert assertDelCount(delCount, info); - double delRatio = info.info.maxDoc() <= 0 ? 0.0f : (float) delCount / (float) info.info.maxDoc(); + double delRatio = info.info.maxDoc() <= 0 ? 0d : (double) delCount / (double) info.info.maxDoc(); assert delRatio <= 1.0; return (info.info.maxDoc() <= 0 ? byteSize : (long) (byteSize * (1.0 - delRatio))); } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java b/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java index 0285f658f8d..2109cd5ca64 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java @@ -23,6 +23,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Locale; import java.util.Map; import org.apache.lucene.analysis.MockAnalyzer; @@ -57,7 +58,7 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase { totalMaxDoc += sci.info.maxDoc(); long byteSize = sci.sizeInBytes(); double liveRatio = 1 - (double) sci.getDelCount() / sci.info.maxDoc(); - long weightedByteSize = Math.round(liveRatio * byteSize); + long weightedByteSize = (long) (liveRatio * byteSize); totalBytes += weightedByteSize; minSegmentBytes = Math.min(minSegmentBytes, weightedByteSize); } @@ -66,26 +67,38 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase { assertTrue("Percentage of deleted docs " + delPercentage + " is larger than the target: " + tmp.getDeletesPctAllowed(), delPercentage <= tmp.getDeletesPctAllowed()); - long levelSize = Math.max(minSegmentBytes, (long) (tmp.getFloorSegmentMB() * 1024 * 1024)); + long levelSizeBytes = Math.max(minSegmentBytes, (long) (tmp.getFloorSegmentMB() * 1024 * 1024)); long bytesLeft = totalBytes; double allowedSegCount = 0; // below we make the assumption that segments that reached the max segment // size divided by 2 don't need merging anymore int mergeFactor = (int) Math.min(tmp.getSegmentsPerTier(), tmp.getMaxMergeAtOnce()); while (true) { - final double segCountLevel = bytesLeft / (double) levelSize; - if (segCountLevel < tmp.getSegmentsPerTier() || levelSize >= maxMergedSegmentBytes / 2) { + final double segCountLevel = bytesLeft / (double) levelSizeBytes; + if (segCountLevel < tmp.getSegmentsPerTier() || levelSizeBytes >= maxMergedSegmentBytes / 2) { allowedSegCount += Math.ceil(segCountLevel); break; } allowedSegCount += tmp.getSegmentsPerTier(); - bytesLeft -= tmp.getSegmentsPerTier() * levelSize; - levelSize = Math.min(levelSize * mergeFactor, maxMergedSegmentBytes / 2); + bytesLeft -= tmp.getSegmentsPerTier() * levelSizeBytes; + levelSizeBytes = Math.min(levelSizeBytes * mergeFactor, maxMergedSegmentBytes / 2); } allowedSegCount = Math.max(allowedSegCount, tmp.getSegmentsPerTier()); int numSegments = infos.asList().size(); - assertTrue("numSegments=" + numSegments + ", allowed=" + allowedSegCount, numSegments <= allowedSegCount); + assertTrue(String.format(Locale.ROOT, + "mergeFactor=%d minSegmentBytes=%,d maxMergedSegmentBytes=%,d segmentsPerTier=%g maxMergeAtOnce=%d numSegments=%d allowed=%g totalBytes=%,d delPercentage=%g deletesPctAllowed=%g", + mergeFactor, + minSegmentBytes, + maxMergedSegmentBytes, + tmp.getSegmentsPerTier(), + tmp.getMaxMergeAtOnce(), + numSegments, + allowedSegCount, + totalBytes, + delPercentage, + tmp.getDeletesPctAllowed()), + numSegments <= allowedSegCount); } @Override @@ -710,13 +723,13 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase { doTestSimulateAppendOnly(mergePolicy, 100_000_000, 10_000); } - @Override @Nightly - // TODO: this test has bugs that prevent you from lowering the number of docs in the test! + @Override public void testSimulateUpdates() throws IOException { TieredMergePolicy mergePolicy = mergePolicy(); // Avoid low values of the max merged segment size which prevent this merge policy from scaling well mergePolicy.setMaxMergedSegmentMB(TestUtil.nextInt(random(), 1024, 10 * 1024)); - doTestSimulateUpdates(mergePolicy, 10_000_000, 2500); + int numDocs = TEST_NIGHTLY ? atLeast(10_000_000) : atLeast(1_000_000); + doTestSimulateUpdates(mergePolicy, numDocs, 2500); } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseMergePolicyTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseMergePolicyTestCase.java index 7fc3adc567b..9928c840571 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseMergePolicyTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseMergePolicyTestCase.java @@ -404,7 +404,14 @@ public abstract class BaseMergePolicyTestCase extends LuceneTestCase { SegmentInfos segmentInfos = new SegmentInfos(Version.LATEST.major); final double avgDocSizeMB = 5. / 1024; // 5kB for (int numDocs = 0; numDocs < totalDocs; ) { - int flushDocCount = TestUtil.nextInt(random(), 1, maxDocsPerFlush); + final int flushDocCount; + if (usually()) { + // reasonable value + flushDocCount = TestUtil.nextInt(random(), maxDocsPerFlush/2, maxDocsPerFlush); + } else { + // crazy value + flushDocCount = TestUtil.nextInt(random(), 1, maxDocsPerFlush); + } // how many of these documents are actually updates int delCount = (int) (flushDocCount * 0.9 * numDocs / totalDocs); numDocs += flushDocCount - delCount;