Merge branch 'apache:main' into bpv21_main

This commit is contained in:
expani1729 2024-10-16 15:17:20 +05:30 committed by GitHub
commit 7312d91394
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 96 additions and 64 deletions

View File

@ -47,8 +47,16 @@ Improvements
Optimizations Optimizations
--------------------- ---------------------
* GITHUB#13828: Reduce long[] array allocation for bitset in readBitSetIterator. (Zhang Chao) * GITHUB#13828: Reduce long[] array allocation for bitset in readBitSetIterator. (Zhang Chao)
* GITHUB#13800: MaxScoreBulkScorer now recomputes scorer partitions when the
minimum competitive allows for a more favorable partitioning. (Adrien Grand)
* GITHUB#13904: BlockMaxConjunctionBulkScorer can now early exit when the
leading clause has a single impact block (e.g. ConstantScoreQuery).
(Adrien Grand)
Bug Fixes Bug Fixes
--------------------- ---------------------
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended * GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended

View File

@ -115,9 +115,6 @@ public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
*/ */
protected BackwardsCompatibilityTestBase( protected BackwardsCompatibilityTestBase(
@Name("version") Version version, @Name("pattern") String indexPattern) { @Name("version") Version version, @Name("pattern") String indexPattern) {
// TODO: add 10.0.0 bw indices after 10.0.0 has been released, see
// https://github.com/apache/lucene/issues/13847
assumeTrue("Can only test with 10.0.0 has been released", version.major < 10);
this.version = version; this.version = version;
this.indexPattern = indexPattern; this.indexPattern = indexPattern;
} }

View File

@ -832,7 +832,7 @@ public class TestBasicBackwardsCompatibility extends BackwardsCompatibilityTestB
expectThrows(IllegalArgumentException.class, () -> TestUtil.addIndexesSlowly(w, reader)); expectThrows(IllegalArgumentException.class, () -> TestUtil.addIndexesSlowly(w, reader));
assertEquals( assertEquals(
e.getMessage(), e.getMessage(),
"Cannot merge a segment that has been created with major version 9 into this index which has been created by major version 10"); "Cannot merge a segment that has been created with major version 10 into this index which has been created by major version 11");
w.close(); w.close();
targetDir2.close(); targetDir2.close();

View File

@ -85,9 +85,20 @@ final class BlockMaxConjunctionBulkScorer extends BulkScorer {
int windowMin = Math.max(lead1.docID(), min); int windowMin = Math.max(lead1.docID(), min);
while (windowMin < max) { while (windowMin < max) {
// Use impacts of the least costly scorer to compute windows // Use impacts of the least costly scorer to compute windows to keep the per-block overhead
// NOTE: windowMax is inclusive // under control.
int windowMax = Math.min(scorers[0].advanceShallow(windowMin), max - 1); // NOTE: windowMax is inclusive.
int windowMax = scorer1.advanceShallow(windowMin);
if (windowMax == DocIdSetIterator.NO_MORE_DOCS) {
// If the query doesn't have impacts anymore, or has a single block for the whole doc ID
// space (e.g. ConstantScoreQuery), then we try to create a block that has ~128 docs of the
// leading clause. This gives us higher chances to exit early based on the maximum scores of
// other clauses.
long windowSize = 128L * maxDoc / Math.max(1, lead1.cost());
windowSize = Math.max(windowSize, 128L);
windowMax = (int) Math.min(Integer.MAX_VALUE, windowMin + windowSize);
}
windowMax = Math.min(windowMax, max - 1);
float maxWindowScore = Float.POSITIVE_INFINITY; float maxWindowScore = Float.POSITIVE_INFINITY;
if (0 < scorable.minCompetitiveScore) { if (0 < scorable.minCompetitiveScore) {

View File

@ -40,6 +40,8 @@ final class MaxScoreBulkScorer extends BulkScorer {
// Index of the first scorer that is required, this scorer and all following scorers are required // Index of the first scorer that is required, this scorer and all following scorers are required
// for a document to match. // for a document to match.
int firstRequiredScorer; int firstRequiredScorer;
// The minimum value of minCompetitiveScore that would produce a more favorable partitioning.
float nextMinCompetitiveScore;
private final long cost; private final long cost;
float minCompetitiveScore; float minCompetitiveScore;
private final Score scorable = new Score(); private final Score scorable = new Score();
@ -114,9 +116,14 @@ final class MaxScoreBulkScorer extends BulkScorer {
while (top.doc < outerWindowMax) { while (top.doc < outerWindowMax) {
scoreInnerWindow(collector, acceptDocs, outerWindowMax); scoreInnerWindow(collector, acceptDocs, outerWindowMax);
top = essentialQueue.top(); top = essentialQueue.top();
if (minCompetitiveScore >= nextMinCompetitiveScore) {
// The minimum competitive score increased substantially, so we can now partition scorers
// in a more favorable way.
break;
}
} }
outerWindowMin = outerWindowMax; outerWindowMin = Math.min(top.doc, outerWindowMax);
} }
return nextCandidate(max); return nextCandidate(max);
@ -337,6 +344,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
}); });
double maxScoreSum = 0; double maxScoreSum = 0;
firstEssentialScorer = 0; firstEssentialScorer = 0;
nextMinCompetitiveScore = Float.POSITIVE_INFINITY;
for (int i = 0; i < allScorers.length; ++i) { for (int i = 0; i < allScorers.length; ++i) {
final DisiWrapper w = scratch[i]; final DisiWrapper w = scratch[i];
double newMaxScoreSum = maxScoreSum + w.maxWindowScore; double newMaxScoreSum = maxScoreSum + w.maxWindowScore;
@ -349,6 +357,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
firstEssentialScorer++; firstEssentialScorer++;
} else { } else {
allScorers[allScorers.length - 1 - (i - firstEssentialScorer)] = w; allScorers[allScorers.length - 1 - (i - firstEssentialScorer)] = w;
nextMinCompetitiveScore = Math.min(maxScoreSumFloat, nextMinCompetitiveScore);
} }
} }

View File

@ -181,7 +181,7 @@ public abstract class PointInSetQuery extends Query implements Accountable {
@Override @Override
public Scorer get(long leadCost) throws IOException { public Scorer get(long leadCost) throws IOException {
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field); DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
values.intersect(new MergePointVisitor(sortedPackedPoints, result)); values.intersect(new MergePointVisitor(sortedPackedPoints.iterator(), result));
DocIdSetIterator iterator = result.build().iterator(); DocIdSetIterator iterator = result.build().iterator();
return new ConstantScoreScorer(score(), scoreMode, iterator); return new ConstantScoreScorer(score(), scoreMode, iterator);
} }
@ -192,7 +192,9 @@ public abstract class PointInSetQuery extends Query implements Accountable {
if (cost == -1) { if (cost == -1) {
// Computing the cost may be expensive, so only do it if necessary // Computing the cost may be expensive, so only do it if necessary
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field); DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
cost = values.estimateDocCount(new MergePointVisitor(sortedPackedPoints, result)); cost =
values.estimateDocCount(
new MergePointVisitor(sortedPackedPoints.iterator(), result));
assert cost >= 0; assert cost >= 0;
} }
return cost; return cost;
@ -260,18 +262,15 @@ public abstract class PointInSetQuery extends Query implements Accountable {
private class MergePointVisitor implements IntersectVisitor { private class MergePointVisitor implements IntersectVisitor {
private final DocIdSetBuilder result; private final DocIdSetBuilder result;
private TermIterator iterator; private final TermIterator iterator;
private BytesRef nextQueryPoint; private BytesRef nextQueryPoint;
private final ByteArrayComparator comparator; private final ByteArrayComparator comparator;
private final PrefixCodedTerms sortedPackedPoints;
private DocIdSetBuilder.BulkAdder adder; private DocIdSetBuilder.BulkAdder adder;
public MergePointVisitor(PrefixCodedTerms sortedPackedPoints, DocIdSetBuilder result) public MergePointVisitor(TermIterator iterator, DocIdSetBuilder result) throws IOException {
throws IOException {
this.result = result; this.result = result;
this.sortedPackedPoints = sortedPackedPoints;
this.comparator = ArrayUtil.getUnsignedComparator(bytesPerDim); this.comparator = ArrayUtil.getUnsignedComparator(bytesPerDim);
this.iterator = this.sortedPackedPoints.iterator(); this.iterator = iterator;
nextQueryPoint = iterator.next(); nextQueryPoint = iterator.next();
} }

View File

@ -775,10 +775,11 @@ public final class Util {
/** Just takes unsigned byte values from the BytesRef and converts into an IntsRef. */ /** Just takes unsigned byte values from the BytesRef and converts into an IntsRef. */
public static IntsRef toIntsRef(BytesRef input, IntsRefBuilder scratch) { public static IntsRef toIntsRef(BytesRef input, IntsRefBuilder scratch) {
scratch.clear(); scratch.growNoCopy(input.length);
for (int i = 0; i < input.length; i++) { for (int i = 0; i < input.length; i++) {
scratch.append(input.bytes[i + input.offset] & 0xFF); scratch.setIntAt(i, input.bytes[i + input.offset] & 0xFF);
} }
scratch.setLength(input.length);
return scratch.get(); return scratch.get();
} }

View File

@ -530,7 +530,29 @@ abstract class MemorySegmentIndexInput extends IndexInput
@Override @Override
public final MemorySegmentIndexInput clone() { public final MemorySegmentIndexInput clone() {
final MemorySegmentIndexInput clone = buildSlice((String) null, 0L, this.length); ensureOpen();
ensureAccessible();
final MemorySegmentIndexInput clone;
if (segments.length == 1) {
clone =
new SingleSegmentImpl(
toString(),
null, // clones don't have an Arena, as they can't close)
segments[0],
length,
chunkSizePower,
confined);
} else {
clone =
new MultiSegmentImpl(
toString(),
null, // clones don't have an Arena, as they can't close)
segments,
((MultiSegmentImpl) this).offset,
length,
chunkSizePower,
confined);
}
try { try {
clone.seek(getFilePointer()); clone.seek(getFilePointer());
} catch (IOException ioe) { } catch (IOException ioe) {
@ -570,6 +592,13 @@ abstract class MemorySegmentIndexInput extends IndexInput
if (NATIVE_ACCESS.isPresent() && advice != ReadAdvice.NORMAL) { if (NATIVE_ACCESS.isPresent() && advice != ReadAdvice.NORMAL) {
// No need to madvise with a normal advice, since it's the OS' default. // No need to madvise with a normal advice, since it's the OS' default.
final NativeAccess nativeAccess = NATIVE_ACCESS.get(); final NativeAccess nativeAccess = NATIVE_ACCESS.get();
if (length >= nativeAccess.getPageSize()) {
// Only set the read advice if the inner file is large enough. Otherwise the cons are likely
// outweighing the pros as we're:
// - potentially overriding the advice of other files that share the same pages,
// - paying the cost of a madvise system call for little value.
// We could align inner files with the page size to avoid the first issue, but again the
// pros don't clearly overweigh the cons.
slice.advise( slice.advise(
0, 0,
slice.length, slice.length,
@ -577,6 +606,7 @@ abstract class MemorySegmentIndexInput extends IndexInput
nativeAccess.madvise(segment, advice); nativeAccess.madvise(segment, advice);
}); });
} }
}
return slice; return slice;
} }
@ -584,26 +614,30 @@ abstract class MemorySegmentIndexInput extends IndexInput
MemorySegmentIndexInput buildSlice(String sliceDescription, long offset, long length) { MemorySegmentIndexInput buildSlice(String sliceDescription, long offset, long length) {
ensureOpen(); ensureOpen();
ensureAccessible(); ensureAccessible();
final MemorySegment[] slices;
final boolean isClone = offset == 0 && length == this.length;
if (isClone) {
slices = segments;
} else {
final long sliceEnd = offset + length; final long sliceEnd = offset + length;
final int startIndex = (int) (offset >>> chunkSizePower); final int startIndex = (int) (offset >>> chunkSizePower);
final int endIndex = (int) (sliceEnd >>> chunkSizePower); final int endIndex = (int) (sliceEnd >>> chunkSizePower);
// we always allocate one more slice, the last one may be a 0 byte one after truncating with // we always allocate one more slice, the last one may be a 0 byte one after truncating with
// asSlice(): // asSlice():
final MemorySegment slices[] = ArrayUtil.copyOfSubArray(segments, startIndex, endIndex + 1); slices = ArrayUtil.copyOfSubArray(segments, startIndex, endIndex + 1);
// set the last segment's limit for the sliced view. // set the last segment's limit for the sliced view.
slices[slices.length - 1] = slices[slices.length - 1].asSlice(0L, sliceEnd & chunkSizeMask); slices[slices.length - 1] = slices[slices.length - 1].asSlice(0L, sliceEnd & chunkSizeMask);
offset = offset & chunkSizeMask; offset = offset & chunkSizeMask;
}
final String newResourceDescription = getFullSliceDescription(sliceDescription); final String newResourceDescription = getFullSliceDescription(sliceDescription);
if (slices.length == 1) { if (slices.length == 1) {
return new SingleSegmentImpl( return new SingleSegmentImpl(
newResourceDescription, newResourceDescription,
null, // clones don't have an Arena, as they can't close) null, // clones don't have an Arena, as they can't close)
slices[0].asSlice(offset, length), isClone ? slices[0] : slices[0].asSlice(offset, length),
length, length,
chunkSizePower, chunkSizePower,
confined); confined);

View File

@ -38,23 +38,6 @@ import org.apache.lucene.util.Bits;
// These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept // These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept
public class TestMaxScoreBulkScorer extends LuceneTestCase { public class TestMaxScoreBulkScorer extends LuceneTestCase {
private static class CapMaxScoreWindowAt2048Scorer extends FilterScorer {
public CapMaxScoreWindowAt2048Scorer(Scorer in) {
super(in);
}
@Override
public int advanceShallow(int target) throws IOException {
return Math.min(target | 0x7FF, in.advanceShallow(target));
}
@Override
public float getMaxScore(int upTo) throws IOException {
return in.getMaxScore(upTo);
}
}
private void writeDocuments(Directory dir) throws IOException { private void writeDocuments(Directory dir) throws IOException {
try (IndexWriter w = try (IndexWriter w =
new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) { new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
@ -96,12 +79,10 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
searcher searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f) .createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context); .scorer(context);
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 = Scorer scorer2 =
searcher searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f) .createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context); .scorer(context);
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
BulkScorer scorer = BulkScorer scorer =
new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2)); new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2));
@ -168,12 +149,10 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
searcher searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f) .createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context); .scorer(context);
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 = Scorer scorer2 =
searcher searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f) .createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context); .scorer(context);
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
BulkScorer scorer = BulkScorer scorer =
new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2)); new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2));
@ -237,17 +216,14 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
searcher searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f) .createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context); .scorer(context);
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 = Scorer scorer2 =
searcher searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f) .createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context); .scorer(context);
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
Scorer scorer3 = Scorer scorer3 =
searcher searcher
.createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f) .createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f)
.scorer(context); .scorer(context);
scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3);
BulkScorer scorer = BulkScorer scorer =
new MaxScoreBulkScorer( new MaxScoreBulkScorer(
@ -317,17 +293,14 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
searcher searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f) .createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context); .scorer(context);
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 = Scorer scorer2 =
searcher searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f) .createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context); .scorer(context);
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
Scorer scorer3 = Scorer scorer3 =
searcher searcher
.createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f) .createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f)
.scorer(context); .scorer(context);
scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3);
BulkScorer scorer = BulkScorer scorer =
new MaxScoreBulkScorer( new MaxScoreBulkScorer(