Fix flakiness issues with TestTieredMergePolicy. (#13881)

The two seeds at #13818 had different root causes:
 - The test allows the number of segments to go above the limit, only if none
   of the merges are legal. But there are multiple reasons why a merge may be
   illegal: because it exceeds the max doc count or because it is too imbalanced.
   However these two things were checked independently, so you could run into
   cases when the test would think that there are legal merges from the doc count
   perspective and from the balance perspective, but all legal merges from the doc
   count perspective are illegal from the balance perspective and vice-versa. The
   test now checks that there are merges that are good wrt these two criteria
   at once.
 - `TieredMergePolicy` allows at least `targetSearchConcurrency` segments in an
   index. There was a bug in `TieredMergePolicy` where this condition is
   applied after "too big" segments have been removed, so it effectively allowed
   more segments than necessary in the index.

Closes #13818
This commit is contained in:
Adrien Grand 2024-10-09 17:28:43 +02:00
parent b6512a4680
commit a2a751cbac
2 changed files with 42 additions and 36 deletions

View File

@ -438,7 +438,10 @@ public class TieredMergePolicy extends MergePolicy {
}
// allowedSegCount may occasionally be less than segsPerTier
// if segment sizes are below the floor size
allowedSegCount = Math.max(allowedSegCount, Math.max(segsPerTier, targetSearchConcurrency));
allowedSegCount = Math.max(allowedSegCount, segsPerTier);
// No need to merge if the total number of segments (including too big segments) is less than or
// equal to the target search concurrency.
allowedSegCount = Math.max(allowedSegCount, targetSearchConcurrency - tooBigCount);
int allowedDocCount = getMaxAllowedDocs(totalMaxDoc, totalDelDocs);
if (verbose(mergeContext) && tooBigCount > 0) {

View File

@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
@ -39,6 +40,8 @@ import org.apache.lucene.util.Version;
public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
private record DocCountAndSizeInBytes(int docCount, long sizeInBytes) {}
@Override
public TieredMergePolicy mergePolicy() {
return newTieredMergePolicy();
@ -54,7 +57,7 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
int totalDelCount = 0;
int totalMaxDoc = 0;
long totalBytes = 0;
List<Long> segmentSizes = new ArrayList<>();
List<DocCountAndSizeInBytes> segmentSizes = new ArrayList<>();
for (SegmentCommitInfo sci : infos) {
totalDelCount += sci.getDelCount();
totalMaxDoc += sci.info.maxDoc();
@ -62,10 +65,11 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
double liveRatio = 1 - (double) sci.getDelCount() / sci.info.maxDoc();
long weightedByteSize = (long) (liveRatio * byteSize);
totalBytes += weightedByteSize;
segmentSizes.add(weightedByteSize);
segmentSizes.add(
new DocCountAndSizeInBytes(sci.info.maxDoc() - sci.getDelCount(), weightedByteSize));
minSegmentBytes = Math.min(minSegmentBytes, weightedByteSize);
}
Collections.sort(segmentSizes);
Collections.sort(segmentSizes, Comparator.comparingLong(DocCountAndSizeInBytes::sizeInBytes));
final double delPercentage = 100.0 * totalDelCount / totalMaxDoc;
assertTrue(
@ -78,7 +82,7 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
long levelSizeBytes = Math.max(minSegmentBytes, (long) (tmp.getFloorSegmentMB() * 1024 * 1024));
long bytesLeft = totalBytes;
double allowedSegCount = 0;
List<Long> biggestSegments = segmentSizes;
List<DocCountAndSizeInBytes> biggestSegments = segmentSizes;
if (biggestSegments.size() > tmp.getTargetSearchConcurrency() - 1) {
biggestSegments =
biggestSegments.subList(
@ -86,11 +90,18 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
biggestSegments.size());
}
// Allow whole segments for the targetSearchConcurrency-1 biggest segments
for (long size : biggestSegments) {
bytesLeft -= size;
for (DocCountAndSizeInBytes size : biggestSegments) {
bytesLeft -= size.sizeInBytes();
allowedSegCount++;
}
int tooBigCount = 0;
for (DocCountAndSizeInBytes size : segmentSizes) {
if (size.sizeInBytes() >= maxMergedSegmentBytes / 2) {
tooBigCount++;
}
}
// below we make the assumption that segments that reached the max segment
// size divided by 2 don't need merging anymore
int mergeFactor = (int) Math.min(tmp.getSegmentsPerTier(), tmp.getMaxMergeAtOnce());
@ -105,38 +116,30 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
bytesLeft -= tmp.getSegmentsPerTier() * levelSizeBytes;
levelSizeBytes = Math.min(levelSizeBytes * mergeFactor, maxMergedSegmentBytes / 2);
}
allowedSegCount = Math.max(allowedSegCount, tmp.getSegmentsPerTier());
// Allow at least a full tier in addition of the too big segments.
allowedSegCount = Math.max(allowedSegCount, tooBigCount + tmp.getSegmentsPerTier());
// Allow at least `targetSearchConcurrency` segments.
allowedSegCount = Math.max(allowedSegCount, tmp.getTargetSearchConcurrency());
// It's ok to be over the allowed segment count if none of the most balanced merges are balanced
// enough
boolean hasBalancedMerges = false;
for (int i = 0; i < segmentSizes.size() - mergeFactor; ++i) {
long maxMergeSegmentSize = segmentSizes.get(i + mergeFactor - 1);
if (maxMergeSegmentSize >= maxMergedSegmentBytes / 2) {
break;
}
long totalMergeSize = 0;
for (int j = 0; j < i + mergeFactor; ++j) {
totalMergeSize += segmentSizes.get(j);
}
if (maxMergedSegmentBytes * 1.5 <= totalMergeSize) {
hasBalancedMerges = true;
break;
}
}
// There can be more segments if we can't merge docs because they are balanced between segments.
// At least the
// 2 smallest segments should be mergeable.
// should be 2 segments to merge
// It's ok to be over the allowed segment count if none of the merges are legal, because they
// are either not balanced or because they exceed the max merged segment doc count.
// We only check pairwise merges instead of every possible merge to keep things simple. If none
// of the pairwise merges are legal, chances are high that no merge is legal.
int maxDocsPerSegment = tmp.getMaxAllowedDocs(infos.totalMaxDoc(), totalDelCount);
List<Integer> segmentDocs =
infos.asList().stream()
.map(info -> info.info.maxDoc() - info.getDelCount())
.sorted()
.toList();
boolean eligibleDocsMerge =
segmentDocs.size() >= 2 && segmentDocs.get(0) + segmentDocs.get(1) < maxDocsPerSegment;
boolean hasLegalMerges = false;
for (int i = 0; i < segmentSizes.size() - 1; ++i) {
DocCountAndSizeInBytes size1 = segmentSizes.get(i);
DocCountAndSizeInBytes size2 = segmentSizes.get(i + 1);
long mergedSegmentSizeInBytes = size1.sizeInBytes() + size2.sizeInBytes();
int mergedSegmentDocCount = size1.docCount() + size2.docCount();
if (mergedSegmentSizeInBytes <= maxMergedSegmentBytes
&& size2.sizeInBytes() * 1.5 <= mergedSegmentSizeInBytes
&& mergedSegmentDocCount <= maxDocsPerSegment) {
hasLegalMerges = true;
break;
}
}
int numSegments = infos.asList().size();
assertTrue(
@ -154,7 +157,7 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
delPercentage,
tmp.getDeletesPctAllowed(),
tmp.getTargetSearchConcurrency()),
numSegments <= allowedSegCount || hasBalancedMerges == false || eligibleDocsMerge == false);
numSegments <= allowedSegCount || hasLegalMerges == false);
}
@Override