mirror of https://github.com/apache/lucene.git
Fix flakiness issues with TestTieredMergePolicy. (#13881)
The two seeds at #13818 had different root causes: - The test allows the number of segments to go above the limit, only if none of the merges are legal. But there are multiple reasons why a merge may be illegal: because it exceeds the max doc count or because it is too imbalanced. However these two things were checked independently, so you could run into cases when the test would think that there are legal merges from the doc count perspective and from the balance perspective, but all legal merges from the doc count perspective are illegal from the balance perspective and vice-versa. The test now checks that there are merges that are good wrt these two criteria at once. - `TieredMergePolicy` allows at least `targetSearchConcurrency` segments in an index. There was a bug in `TieredMergePolicy` where this condition is applied after "too big" segments have been removed, so it effectively allowed more segments than necessary in the index. Closes #13818
This commit is contained in:
parent
b6512a4680
commit
a2a751cbac
|
@ -438,7 +438,10 @@ public class TieredMergePolicy extends MergePolicy {
|
||||||
}
|
}
|
||||||
// allowedSegCount may occasionally be less than segsPerTier
|
// allowedSegCount may occasionally be less than segsPerTier
|
||||||
// if segment sizes are below the floor size
|
// if segment sizes are below the floor size
|
||||||
allowedSegCount = Math.max(allowedSegCount, Math.max(segsPerTier, targetSearchConcurrency));
|
allowedSegCount = Math.max(allowedSegCount, segsPerTier);
|
||||||
|
// No need to merge if the total number of segments (including too big segments) is less than or
|
||||||
|
// equal to the target search concurrency.
|
||||||
|
allowedSegCount = Math.max(allowedSegCount, targetSearchConcurrency - tooBigCount);
|
||||||
int allowedDocCount = getMaxAllowedDocs(totalMaxDoc, totalDelDocs);
|
int allowedDocCount = getMaxAllowedDocs(totalMaxDoc, totalDelDocs);
|
||||||
|
|
||||||
if (verbose(mergeContext) && tooBigCount > 0) {
|
if (verbose(mergeContext) && tooBigCount > 0) {
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.index;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
@ -39,6 +40,8 @@ import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
|
public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
|
||||||
|
|
||||||
|
private record DocCountAndSizeInBytes(int docCount, long sizeInBytes) {}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TieredMergePolicy mergePolicy() {
|
public TieredMergePolicy mergePolicy() {
|
||||||
return newTieredMergePolicy();
|
return newTieredMergePolicy();
|
||||||
|
@ -54,7 +57,7 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
|
||||||
int totalDelCount = 0;
|
int totalDelCount = 0;
|
||||||
int totalMaxDoc = 0;
|
int totalMaxDoc = 0;
|
||||||
long totalBytes = 0;
|
long totalBytes = 0;
|
||||||
List<Long> segmentSizes = new ArrayList<>();
|
List<DocCountAndSizeInBytes> segmentSizes = new ArrayList<>();
|
||||||
for (SegmentCommitInfo sci : infos) {
|
for (SegmentCommitInfo sci : infos) {
|
||||||
totalDelCount += sci.getDelCount();
|
totalDelCount += sci.getDelCount();
|
||||||
totalMaxDoc += sci.info.maxDoc();
|
totalMaxDoc += sci.info.maxDoc();
|
||||||
|
@ -62,10 +65,11 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
|
||||||
double liveRatio = 1 - (double) sci.getDelCount() / sci.info.maxDoc();
|
double liveRatio = 1 - (double) sci.getDelCount() / sci.info.maxDoc();
|
||||||
long weightedByteSize = (long) (liveRatio * byteSize);
|
long weightedByteSize = (long) (liveRatio * byteSize);
|
||||||
totalBytes += weightedByteSize;
|
totalBytes += weightedByteSize;
|
||||||
segmentSizes.add(weightedByteSize);
|
segmentSizes.add(
|
||||||
|
new DocCountAndSizeInBytes(sci.info.maxDoc() - sci.getDelCount(), weightedByteSize));
|
||||||
minSegmentBytes = Math.min(minSegmentBytes, weightedByteSize);
|
minSegmentBytes = Math.min(minSegmentBytes, weightedByteSize);
|
||||||
}
|
}
|
||||||
Collections.sort(segmentSizes);
|
Collections.sort(segmentSizes, Comparator.comparingLong(DocCountAndSizeInBytes::sizeInBytes));
|
||||||
|
|
||||||
final double delPercentage = 100.0 * totalDelCount / totalMaxDoc;
|
final double delPercentage = 100.0 * totalDelCount / totalMaxDoc;
|
||||||
assertTrue(
|
assertTrue(
|
||||||
|
@ -78,7 +82,7 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
|
||||||
long levelSizeBytes = Math.max(minSegmentBytes, (long) (tmp.getFloorSegmentMB() * 1024 * 1024));
|
long levelSizeBytes = Math.max(minSegmentBytes, (long) (tmp.getFloorSegmentMB() * 1024 * 1024));
|
||||||
long bytesLeft = totalBytes;
|
long bytesLeft = totalBytes;
|
||||||
double allowedSegCount = 0;
|
double allowedSegCount = 0;
|
||||||
List<Long> biggestSegments = segmentSizes;
|
List<DocCountAndSizeInBytes> biggestSegments = segmentSizes;
|
||||||
if (biggestSegments.size() > tmp.getTargetSearchConcurrency() - 1) {
|
if (biggestSegments.size() > tmp.getTargetSearchConcurrency() - 1) {
|
||||||
biggestSegments =
|
biggestSegments =
|
||||||
biggestSegments.subList(
|
biggestSegments.subList(
|
||||||
|
@ -86,11 +90,18 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
|
||||||
biggestSegments.size());
|
biggestSegments.size());
|
||||||
}
|
}
|
||||||
// Allow whole segments for the targetSearchConcurrency-1 biggest segments
|
// Allow whole segments for the targetSearchConcurrency-1 biggest segments
|
||||||
for (long size : biggestSegments) {
|
for (DocCountAndSizeInBytes size : biggestSegments) {
|
||||||
bytesLeft -= size;
|
bytesLeft -= size.sizeInBytes();
|
||||||
allowedSegCount++;
|
allowedSegCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int tooBigCount = 0;
|
||||||
|
for (DocCountAndSizeInBytes size : segmentSizes) {
|
||||||
|
if (size.sizeInBytes() >= maxMergedSegmentBytes / 2) {
|
||||||
|
tooBigCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// below we make the assumption that segments that reached the max segment
|
// below we make the assumption that segments that reached the max segment
|
||||||
// size divided by 2 don't need merging anymore
|
// size divided by 2 don't need merging anymore
|
||||||
int mergeFactor = (int) Math.min(tmp.getSegmentsPerTier(), tmp.getMaxMergeAtOnce());
|
int mergeFactor = (int) Math.min(tmp.getSegmentsPerTier(), tmp.getMaxMergeAtOnce());
|
||||||
|
@ -105,38 +116,30 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
|
||||||
bytesLeft -= tmp.getSegmentsPerTier() * levelSizeBytes;
|
bytesLeft -= tmp.getSegmentsPerTier() * levelSizeBytes;
|
||||||
levelSizeBytes = Math.min(levelSizeBytes * mergeFactor, maxMergedSegmentBytes / 2);
|
levelSizeBytes = Math.min(levelSizeBytes * mergeFactor, maxMergedSegmentBytes / 2);
|
||||||
}
|
}
|
||||||
allowedSegCount = Math.max(allowedSegCount, tmp.getSegmentsPerTier());
|
// Allow at least a full tier in addition of the too big segments.
|
||||||
|
allowedSegCount = Math.max(allowedSegCount, tooBigCount + tmp.getSegmentsPerTier());
|
||||||
|
// Allow at least `targetSearchConcurrency` segments.
|
||||||
|
allowedSegCount = Math.max(allowedSegCount, tmp.getTargetSearchConcurrency());
|
||||||
|
|
||||||
// It's ok to be over the allowed segment count if none of the most balanced merges are balanced
|
// It's ok to be over the allowed segment count if none of the merges are legal, because they
|
||||||
// enough
|
// are either not balanced or because they exceed the max merged segment doc count.
|
||||||
boolean hasBalancedMerges = false;
|
// We only check pairwise merges instead of every possible merge to keep things simple. If none
|
||||||
for (int i = 0; i < segmentSizes.size() - mergeFactor; ++i) {
|
// of the pairwise merges are legal, chances are high that no merge is legal.
|
||||||
long maxMergeSegmentSize = segmentSizes.get(i + mergeFactor - 1);
|
|
||||||
if (maxMergeSegmentSize >= maxMergedSegmentBytes / 2) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
long totalMergeSize = 0;
|
|
||||||
for (int j = 0; j < i + mergeFactor; ++j) {
|
|
||||||
totalMergeSize += segmentSizes.get(j);
|
|
||||||
}
|
|
||||||
if (maxMergedSegmentBytes * 1.5 <= totalMergeSize) {
|
|
||||||
hasBalancedMerges = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// There can be more segments if we can't merge docs because they are balanced between segments.
|
|
||||||
// At least the
|
|
||||||
// 2 smallest segments should be mergeable.
|
|
||||||
// should be 2 segments to merge
|
|
||||||
int maxDocsPerSegment = tmp.getMaxAllowedDocs(infos.totalMaxDoc(), totalDelCount);
|
int maxDocsPerSegment = tmp.getMaxAllowedDocs(infos.totalMaxDoc(), totalDelCount);
|
||||||
List<Integer> segmentDocs =
|
boolean hasLegalMerges = false;
|
||||||
infos.asList().stream()
|
for (int i = 0; i < segmentSizes.size() - 1; ++i) {
|
||||||
.map(info -> info.info.maxDoc() - info.getDelCount())
|
DocCountAndSizeInBytes size1 = segmentSizes.get(i);
|
||||||
.sorted()
|
DocCountAndSizeInBytes size2 = segmentSizes.get(i + 1);
|
||||||
.toList();
|
long mergedSegmentSizeInBytes = size1.sizeInBytes() + size2.sizeInBytes();
|
||||||
boolean eligibleDocsMerge =
|
int mergedSegmentDocCount = size1.docCount() + size2.docCount();
|
||||||
segmentDocs.size() >= 2 && segmentDocs.get(0) + segmentDocs.get(1) < maxDocsPerSegment;
|
|
||||||
|
if (mergedSegmentSizeInBytes <= maxMergedSegmentBytes
|
||||||
|
&& size2.sizeInBytes() * 1.5 <= mergedSegmentSizeInBytes
|
||||||
|
&& mergedSegmentDocCount <= maxDocsPerSegment) {
|
||||||
|
hasLegalMerges = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int numSegments = infos.asList().size();
|
int numSegments = infos.asList().size();
|
||||||
assertTrue(
|
assertTrue(
|
||||||
|
@ -154,7 +157,7 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
|
||||||
delPercentage,
|
delPercentage,
|
||||||
tmp.getDeletesPctAllowed(),
|
tmp.getDeletesPctAllowed(),
|
||||||
tmp.getTargetSearchConcurrency()),
|
tmp.getTargetSearchConcurrency()),
|
||||||
numSegments <= allowedSegCount || hasBalancedMerges == false || eligibleDocsMerge == false);
|
numSegments <= allowedSegCount || hasLegalMerges == false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
Loading…
Reference in New Issue