mirror of https://github.com/apache/lucene.git
LUCENE-9200: consistently use double (not float) math for TieredMergePolicy's decisions, to fix a corner-case bug uncovered by randomized tests
This commit is contained in:
parent
37d4121770
commit
3e63cd38ef
|
@ -81,6 +81,9 @@ Bug Fixes
|
|||
|
||||
* LUCENE-9135: Make UniformSplit FieldMetadata counters long. (Bruno Roustant)
|
||||
|
||||
* LUCENE-9200: Fix TieredMergePolicy to use double (not float) math to make its merging decisions, fixing
|
||||
a corner-case bug uncovered by fun randomized tests (Robert Muir, Mike McCandless)
|
||||
|
||||
Other
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -558,7 +558,7 @@ public abstract class MergePolicy {
|
|||
long byteSize = info.sizeInBytes();
|
||||
int delCount = mergeContext.numDeletesToMerge(info);
|
||||
assert assertDelCount(delCount, info);
|
||||
double delRatio = info.info.maxDoc() <= 0 ? 0.0f : (float) delCount / (float) info.info.maxDoc();
|
||||
double delRatio = info.info.maxDoc() <= 0 ? 0d : (double) delCount / (double) info.info.maxDoc();
|
||||
assert delRatio <= 1.0;
|
||||
return (info.info.maxDoc() <= 0 ? byteSize : (long) (byteSize * (1.0 - delRatio)));
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Collections;
|
|||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
|
@ -57,7 +58,7 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
|
|||
totalMaxDoc += sci.info.maxDoc();
|
||||
long byteSize = sci.sizeInBytes();
|
||||
double liveRatio = 1 - (double) sci.getDelCount() / sci.info.maxDoc();
|
||||
long weightedByteSize = Math.round(liveRatio * byteSize);
|
||||
long weightedByteSize = (long) (liveRatio * byteSize);
|
||||
totalBytes += weightedByteSize;
|
||||
minSegmentBytes = Math.min(minSegmentBytes, weightedByteSize);
|
||||
}
|
||||
|
@ -66,26 +67,38 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
|
|||
assertTrue("Percentage of deleted docs " + delPercentage + " is larger than the target: " + tmp.getDeletesPctAllowed(),
|
||||
delPercentage <= tmp.getDeletesPctAllowed());
|
||||
|
||||
long levelSize = Math.max(minSegmentBytes, (long) (tmp.getFloorSegmentMB() * 1024 * 1024));
|
||||
long levelSizeBytes = Math.max(minSegmentBytes, (long) (tmp.getFloorSegmentMB() * 1024 * 1024));
|
||||
long bytesLeft = totalBytes;
|
||||
double allowedSegCount = 0;
|
||||
// below we make the assumption that segments that reached the max segment
|
||||
// size divided by 2 don't need merging anymore
|
||||
int mergeFactor = (int) Math.min(tmp.getSegmentsPerTier(), tmp.getMaxMergeAtOnce());
|
||||
while (true) {
|
||||
final double segCountLevel = bytesLeft / (double) levelSize;
|
||||
if (segCountLevel < tmp.getSegmentsPerTier() || levelSize >= maxMergedSegmentBytes / 2) {
|
||||
final double segCountLevel = bytesLeft / (double) levelSizeBytes;
|
||||
if (segCountLevel < tmp.getSegmentsPerTier() || levelSizeBytes >= maxMergedSegmentBytes / 2) {
|
||||
allowedSegCount += Math.ceil(segCountLevel);
|
||||
break;
|
||||
}
|
||||
allowedSegCount += tmp.getSegmentsPerTier();
|
||||
bytesLeft -= tmp.getSegmentsPerTier() * levelSize;
|
||||
levelSize = Math.min(levelSize * mergeFactor, maxMergedSegmentBytes / 2);
|
||||
bytesLeft -= tmp.getSegmentsPerTier() * levelSizeBytes;
|
||||
levelSizeBytes = Math.min(levelSizeBytes * mergeFactor, maxMergedSegmentBytes / 2);
|
||||
}
|
||||
allowedSegCount = Math.max(allowedSegCount, tmp.getSegmentsPerTier());
|
||||
|
||||
int numSegments = infos.asList().size();
|
||||
assertTrue("numSegments=" + numSegments + ", allowed=" + allowedSegCount, numSegments <= allowedSegCount);
|
||||
assertTrue(String.format(Locale.ROOT,
|
||||
"mergeFactor=%d minSegmentBytes=%,d maxMergedSegmentBytes=%,d segmentsPerTier=%g maxMergeAtOnce=%d numSegments=%d allowed=%g totalBytes=%,d delPercentage=%g deletesPctAllowed=%g",
|
||||
mergeFactor,
|
||||
minSegmentBytes,
|
||||
maxMergedSegmentBytes,
|
||||
tmp.getSegmentsPerTier(),
|
||||
tmp.getMaxMergeAtOnce(),
|
||||
numSegments,
|
||||
allowedSegCount,
|
||||
totalBytes,
|
||||
delPercentage,
|
||||
tmp.getDeletesPctAllowed()),
|
||||
numSegments <= allowedSegCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -710,13 +723,13 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
|
|||
doTestSimulateAppendOnly(mergePolicy, 100_000_000, 10_000);
|
||||
}
|
||||
|
||||
@Override @Nightly
|
||||
// TODO: this test has bugs that prevent you from lowering the number of docs in the test!
|
||||
@Override
|
||||
public void testSimulateUpdates() throws IOException {
|
||||
TieredMergePolicy mergePolicy = mergePolicy();
|
||||
// Avoid low values of the max merged segment size which prevent this merge policy from scaling well
|
||||
mergePolicy.setMaxMergedSegmentMB(TestUtil.nextInt(random(), 1024, 10 * 1024));
|
||||
doTestSimulateUpdates(mergePolicy, 10_000_000, 2500);
|
||||
int numDocs = TEST_NIGHTLY ? atLeast(10_000_000) : atLeast(1_000_000);
|
||||
doTestSimulateUpdates(mergePolicy, numDocs, 2500);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -404,7 +404,14 @@ public abstract class BaseMergePolicyTestCase extends LuceneTestCase {
|
|||
SegmentInfos segmentInfos = new SegmentInfos(Version.LATEST.major);
|
||||
final double avgDocSizeMB = 5. / 1024; // 5kB
|
||||
for (int numDocs = 0; numDocs < totalDocs; ) {
|
||||
int flushDocCount = TestUtil.nextInt(random(), 1, maxDocsPerFlush);
|
||||
final int flushDocCount;
|
||||
if (usually()) {
|
||||
// reasonable value
|
||||
flushDocCount = TestUtil.nextInt(random(), maxDocsPerFlush/2, maxDocsPerFlush);
|
||||
} else {
|
||||
// crazy value
|
||||
flushDocCount = TestUtil.nextInt(random(), 1, maxDocsPerFlush);
|
||||
}
|
||||
// how many of these documents are actually updates
|
||||
int delCount = (int) (flushDocCount * 0.9 * numDocs / totalDocs);
|
||||
numDocs += flushDocCount - delCount;
|
||||
|
|
Loading…
Reference in New Issue