Merge branch 'apache:main' into bpv21_main

This commit is contained in:
expani1729 2024-10-16 15:17:20 +05:30 committed by GitHub
commit 7312d91394
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 96 additions and 64 deletions

View File

@ -47,8 +47,16 @@ Improvements
Optimizations
---------------------
* GITHUB#13828: Reduce long[] array allocation for bitset in readBitSetIterator. (Zhang Chao)
* GITHUB#13800: MaxScoreBulkScorer now recomputes scorer partitions when the
minimum competitive allows for a more favorable partitioning. (Adrien Grand)
* GITHUB#13904: BlockMaxConjunctionBulkScorer can now early exit when the
leading clause has a single impact block (e.g. ConstantScoreQuery).
(Adrien Grand)
Bug Fixes
---------------------
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended

View File

@ -115,9 +115,6 @@ public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
*/
protected BackwardsCompatibilityTestBase(
@Name("version") Version version, @Name("pattern") String indexPattern) {
// TODO: add 10.0.0 bw indices after 10.0.0 has been released, see
// https://github.com/apache/lucene/issues/13847
assumeTrue("Can only test with 10.0.0 has been released", version.major < 10);
this.version = version;
this.indexPattern = indexPattern;
}

View File

@ -832,7 +832,7 @@ public class TestBasicBackwardsCompatibility extends BackwardsCompatibilityTestB
expectThrows(IllegalArgumentException.class, () -> TestUtil.addIndexesSlowly(w, reader));
assertEquals(
e.getMessage(),
"Cannot merge a segment that has been created with major version 9 into this index which has been created by major version 10");
"Cannot merge a segment that has been created with major version 10 into this index which has been created by major version 11");
w.close();
targetDir2.close();

View File

@ -85,9 +85,20 @@ final class BlockMaxConjunctionBulkScorer extends BulkScorer {
int windowMin = Math.max(lead1.docID(), min);
while (windowMin < max) {
// Use impacts of the least costly scorer to compute windows
// NOTE: windowMax is inclusive
int windowMax = Math.min(scorers[0].advanceShallow(windowMin), max - 1);
// Use impacts of the least costly scorer to compute windows to keep the per-block overhead
// under control.
// NOTE: windowMax is inclusive.
int windowMax = scorer1.advanceShallow(windowMin);
if (windowMax == DocIdSetIterator.NO_MORE_DOCS) {
// If the query doesn't have impacts anymore, or has a single block for the whole doc ID
// space (e.g. ConstantScoreQuery), then we try to create a block that has ~128 docs of the
// leading clause. This gives us higher chances to exit early based on the maximum scores of
// other clauses.
long windowSize = 128L * maxDoc / Math.max(1, lead1.cost());
windowSize = Math.max(windowSize, 128L);
windowMax = (int) Math.min(Integer.MAX_VALUE, windowMin + windowSize);
}
windowMax = Math.min(windowMax, max - 1);
float maxWindowScore = Float.POSITIVE_INFINITY;
if (0 < scorable.minCompetitiveScore) {

View File

@ -40,6 +40,8 @@ final class MaxScoreBulkScorer extends BulkScorer {
// Index of the first scorer that is required, this scorer and all following scorers are required
// for a document to match.
int firstRequiredScorer;
// The minimum value of minCompetitiveScore that would produce a more favorable partitioning.
float nextMinCompetitiveScore;
private final long cost;
float minCompetitiveScore;
private final Score scorable = new Score();
@ -114,9 +116,14 @@ final class MaxScoreBulkScorer extends BulkScorer {
while (top.doc < outerWindowMax) {
scoreInnerWindow(collector, acceptDocs, outerWindowMax);
top = essentialQueue.top();
if (minCompetitiveScore >= nextMinCompetitiveScore) {
// The minimum competitive score increased substantially, so we can now partition scorers
// in a more favorable way.
break;
}
}
outerWindowMin = outerWindowMax;
outerWindowMin = Math.min(top.doc, outerWindowMax);
}
return nextCandidate(max);
@ -337,6 +344,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
});
double maxScoreSum = 0;
firstEssentialScorer = 0;
nextMinCompetitiveScore = Float.POSITIVE_INFINITY;
for (int i = 0; i < allScorers.length; ++i) {
final DisiWrapper w = scratch[i];
double newMaxScoreSum = maxScoreSum + w.maxWindowScore;
@ -349,6 +357,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
firstEssentialScorer++;
} else {
allScorers[allScorers.length - 1 - (i - firstEssentialScorer)] = w;
nextMinCompetitiveScore = Math.min(maxScoreSumFloat, nextMinCompetitiveScore);
}
}

View File

@ -181,7 +181,7 @@ public abstract class PointInSetQuery extends Query implements Accountable {
@Override
public Scorer get(long leadCost) throws IOException {
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
values.intersect(new MergePointVisitor(sortedPackedPoints, result));
values.intersect(new MergePointVisitor(sortedPackedPoints.iterator(), result));
DocIdSetIterator iterator = result.build().iterator();
return new ConstantScoreScorer(score(), scoreMode, iterator);
}
@ -192,7 +192,9 @@ public abstract class PointInSetQuery extends Query implements Accountable {
if (cost == -1) {
// Computing the cost may be expensive, so only do it if necessary
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
cost = values.estimateDocCount(new MergePointVisitor(sortedPackedPoints, result));
cost =
values.estimateDocCount(
new MergePointVisitor(sortedPackedPoints.iterator(), result));
assert cost >= 0;
}
return cost;
@ -260,18 +262,15 @@ public abstract class PointInSetQuery extends Query implements Accountable {
private class MergePointVisitor implements IntersectVisitor {
private final DocIdSetBuilder result;
private TermIterator iterator;
private final TermIterator iterator;
private BytesRef nextQueryPoint;
private final ByteArrayComparator comparator;
private final PrefixCodedTerms sortedPackedPoints;
private DocIdSetBuilder.BulkAdder adder;
public MergePointVisitor(PrefixCodedTerms sortedPackedPoints, DocIdSetBuilder result)
throws IOException {
public MergePointVisitor(TermIterator iterator, DocIdSetBuilder result) throws IOException {
this.result = result;
this.sortedPackedPoints = sortedPackedPoints;
this.comparator = ArrayUtil.getUnsignedComparator(bytesPerDim);
this.iterator = this.sortedPackedPoints.iterator();
this.iterator = iterator;
nextQueryPoint = iterator.next();
}

View File

@ -775,10 +775,11 @@ public final class Util {
/** Just takes unsigned byte values from the BytesRef and converts into an IntsRef. */
public static IntsRef toIntsRef(BytesRef input, IntsRefBuilder scratch) {
scratch.clear();
scratch.growNoCopy(input.length);
for (int i = 0; i < input.length; i++) {
scratch.append(input.bytes[i + input.offset] & 0xFF);
scratch.setIntAt(i, input.bytes[i + input.offset] & 0xFF);
}
scratch.setLength(input.length);
return scratch.get();
}

View File

@ -530,7 +530,29 @@ abstract class MemorySegmentIndexInput extends IndexInput
@Override
public final MemorySegmentIndexInput clone() {
final MemorySegmentIndexInput clone = buildSlice((String) null, 0L, this.length);
ensureOpen();
ensureAccessible();
final MemorySegmentIndexInput clone;
if (segments.length == 1) {
clone =
new SingleSegmentImpl(
toString(),
null, // clones don't have an Arena, as they can't close)
segments[0],
length,
chunkSizePower,
confined);
} else {
clone =
new MultiSegmentImpl(
toString(),
null, // clones don't have an Arena, as they can't close)
segments,
((MultiSegmentImpl) this).offset,
length,
chunkSizePower,
confined);
}
try {
clone.seek(getFilePointer());
} catch (IOException ioe) {
@ -570,12 +592,20 @@ abstract class MemorySegmentIndexInput extends IndexInput
if (NATIVE_ACCESS.isPresent() && advice != ReadAdvice.NORMAL) {
// No need to madvise with a normal advice, since it's the OS' default.
final NativeAccess nativeAccess = NATIVE_ACCESS.get();
slice.advise(
0,
slice.length,
segment -> {
nativeAccess.madvise(segment, advice);
});
if (length >= nativeAccess.getPageSize()) {
// Only set the read advice if the inner file is large enough. Otherwise the cons are likely
// outweighing the pros as we're:
// - potentially overriding the advice of other files that share the same pages,
// - paying the cost of a madvise system call for little value.
// We could align inner files with the page size to avoid the first issue, but again the
// pros don't clearly overweigh the cons.
slice.advise(
0,
slice.length,
segment -> {
nativeAccess.madvise(segment, advice);
});
}
}
return slice;
}
@ -584,26 +614,30 @@ abstract class MemorySegmentIndexInput extends IndexInput
MemorySegmentIndexInput buildSlice(String sliceDescription, long offset, long length) {
ensureOpen();
ensureAccessible();
final MemorySegment[] slices;
final boolean isClone = offset == 0 && length == this.length;
if (isClone) {
slices = segments;
} else {
final long sliceEnd = offset + length;
final int startIndex = (int) (offset >>> chunkSizePower);
final int endIndex = (int) (sliceEnd >>> chunkSizePower);
// we always allocate one more slice, the last one may be a 0 byte one after truncating with
// asSlice():
slices = ArrayUtil.copyOfSubArray(segments, startIndex, endIndex + 1);
final long sliceEnd = offset + length;
final int startIndex = (int) (offset >>> chunkSizePower);
final int endIndex = (int) (sliceEnd >>> chunkSizePower);
// set the last segment's limit for the sliced view.
slices[slices.length - 1] = slices[slices.length - 1].asSlice(0L, sliceEnd & chunkSizeMask);
// we always allocate one more slice, the last one may be a 0 byte one after truncating with
// asSlice():
final MemorySegment slices[] = ArrayUtil.copyOfSubArray(segments, startIndex, endIndex + 1);
// set the last segment's limit for the sliced view.
slices[slices.length - 1] = slices[slices.length - 1].asSlice(0L, sliceEnd & chunkSizeMask);
offset = offset & chunkSizeMask;
offset = offset & chunkSizeMask;
}
final String newResourceDescription = getFullSliceDescription(sliceDescription);
if (slices.length == 1) {
return new SingleSegmentImpl(
newResourceDescription,
null, // clones don't have an Arena, as they can't close)
slices[0].asSlice(offset, length),
isClone ? slices[0] : slices[0].asSlice(offset, length),
length,
chunkSizePower,
confined);

View File

@ -38,23 +38,6 @@ import org.apache.lucene.util.Bits;
// These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept
public class TestMaxScoreBulkScorer extends LuceneTestCase {
private static class CapMaxScoreWindowAt2048Scorer extends FilterScorer {
public CapMaxScoreWindowAt2048Scorer(Scorer in) {
super(in);
}
@Override
public int advanceShallow(int target) throws IOException {
return Math.min(target | 0x7FF, in.advanceShallow(target));
}
@Override
public float getMaxScore(int upTo) throws IOException {
return in.getMaxScore(upTo);
}
}
private void writeDocuments(Directory dir) throws IOException {
try (IndexWriter w =
new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
@ -96,12 +79,10 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 =
searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
BulkScorer scorer =
new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2));
@ -168,12 +149,10 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 =
searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
BulkScorer scorer =
new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2));
@ -237,17 +216,14 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 =
searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
Scorer scorer3 =
searcher
.createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3);
BulkScorer scorer =
new MaxScoreBulkScorer(
@ -317,17 +293,14 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 =
searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
Scorer scorer3 =
searcher
.createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3);
BulkScorer scorer =
new MaxScoreBulkScorer(