LUCENE-9200: consistently use double (not float) math for TieredMergePolicy's decisions, to fix a corner-case bug uncovered by randomized tests

This commit is contained in:
Mike McCandless 2020-02-05 09:51:31 -05:00
parent 37d4121770
commit 3e63cd38ef
4 changed files with 35 additions and 12 deletions

View File

@ -81,6 +81,9 @@ Bug Fixes
* LUCENE-9135: Make UniformSplit FieldMetadata counters long. (Bruno Roustant)
* LUCENE-9200: Fix TieredMergePolicy to use double (not float) math to make its merging decisions, fixing
a corner-case bug uncovered by fun randomized tests (Robert Muir, Mike McCandless)
Other
---------------------

View File

@ -558,7 +558,7 @@ public abstract class MergePolicy {
long byteSize = info.sizeInBytes();
int delCount = mergeContext.numDeletesToMerge(info);
assert assertDelCount(delCount, info);
double delRatio = info.info.maxDoc() <= 0 ? 0.0f : (float) delCount / (float) info.info.maxDoc();
double delRatio = info.info.maxDoc() <= 0 ? 0d : (double) delCount / (double) info.info.maxDoc();
assert delRatio <= 1.0;
return (info.info.maxDoc() <= 0 ? byteSize : (long) (byteSize * (1.0 - delRatio)));
}

View File

@ -23,6 +23,7 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.apache.lucene.analysis.MockAnalyzer;
@ -57,7 +58,7 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
totalMaxDoc += sci.info.maxDoc();
long byteSize = sci.sizeInBytes();
double liveRatio = 1 - (double) sci.getDelCount() / sci.info.maxDoc();
long weightedByteSize = Math.round(liveRatio * byteSize);
long weightedByteSize = (long) (liveRatio * byteSize);
totalBytes += weightedByteSize;
minSegmentBytes = Math.min(minSegmentBytes, weightedByteSize);
}
@ -66,26 +67,38 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
assertTrue("Percentage of deleted docs " + delPercentage + " is larger than the target: " + tmp.getDeletesPctAllowed(),
delPercentage <= tmp.getDeletesPctAllowed());
long levelSize = Math.max(minSegmentBytes, (long) (tmp.getFloorSegmentMB() * 1024 * 1024));
long levelSizeBytes = Math.max(minSegmentBytes, (long) (tmp.getFloorSegmentMB() * 1024 * 1024));
long bytesLeft = totalBytes;
double allowedSegCount = 0;
// below we make the assumption that segments that reached the max segment
// size divided by 2 don't need merging anymore
int mergeFactor = (int) Math.min(tmp.getSegmentsPerTier(), tmp.getMaxMergeAtOnce());
while (true) {
final double segCountLevel = bytesLeft / (double) levelSize;
if (segCountLevel < tmp.getSegmentsPerTier() || levelSize >= maxMergedSegmentBytes / 2) {
final double segCountLevel = bytesLeft / (double) levelSizeBytes;
if (segCountLevel < tmp.getSegmentsPerTier() || levelSizeBytes >= maxMergedSegmentBytes / 2) {
allowedSegCount += Math.ceil(segCountLevel);
break;
}
allowedSegCount += tmp.getSegmentsPerTier();
bytesLeft -= tmp.getSegmentsPerTier() * levelSize;
levelSize = Math.min(levelSize * mergeFactor, maxMergedSegmentBytes / 2);
bytesLeft -= tmp.getSegmentsPerTier() * levelSizeBytes;
levelSizeBytes = Math.min(levelSizeBytes * mergeFactor, maxMergedSegmentBytes / 2);
}
allowedSegCount = Math.max(allowedSegCount, tmp.getSegmentsPerTier());
int numSegments = infos.asList().size();
assertTrue("numSegments=" + numSegments + ", allowed=" + allowedSegCount, numSegments <= allowedSegCount);
assertTrue(String.format(Locale.ROOT,
"mergeFactor=%d minSegmentBytes=%,d maxMergedSegmentBytes=%,d segmentsPerTier=%g maxMergeAtOnce=%d numSegments=%d allowed=%g totalBytes=%,d delPercentage=%g deletesPctAllowed=%g",
mergeFactor,
minSegmentBytes,
maxMergedSegmentBytes,
tmp.getSegmentsPerTier(),
tmp.getMaxMergeAtOnce(),
numSegments,
allowedSegCount,
totalBytes,
delPercentage,
tmp.getDeletesPctAllowed()),
numSegments <= allowedSegCount);
}
@Override
@ -710,13 +723,13 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
doTestSimulateAppendOnly(mergePolicy, 100_000_000, 10_000);
}
@Override @Nightly
// TODO: this test has bugs that prevent you from lowering the number of docs in the test!
@Override
public void testSimulateUpdates() throws IOException {
TieredMergePolicy mergePolicy = mergePolicy();
// Avoid low values of the max merged segment size which prevent this merge policy from scaling well
mergePolicy.setMaxMergedSegmentMB(TestUtil.nextInt(random(), 1024, 10 * 1024));
doTestSimulateUpdates(mergePolicy, 10_000_000, 2500);
int numDocs = TEST_NIGHTLY ? atLeast(10_000_000) : atLeast(1_000_000);
doTestSimulateUpdates(mergePolicy, numDocs, 2500);
}
}

View File

@ -404,7 +404,14 @@ public abstract class BaseMergePolicyTestCase extends LuceneTestCase {
SegmentInfos segmentInfos = new SegmentInfos(Version.LATEST.major);
final double avgDocSizeMB = 5. / 1024; // 5kB
for (int numDocs = 0; numDocs < totalDocs; ) {
int flushDocCount = TestUtil.nextInt(random(), 1, maxDocsPerFlush);
final int flushDocCount;
if (usually()) {
// reasonable value
flushDocCount = TestUtil.nextInt(random(), maxDocsPerFlush/2, maxDocsPerFlush);
} else {
// crazy value
flushDocCount = TestUtil.nextInt(random(), 1, maxDocsPerFlush);
}
// how many of these documents are actually updates
int delCount = (int) (flushDocCount * 0.9 * numDocs / totalDocs);
numDocs += flushDocCount - delCount;