mirror of https://github.com/apache/lucene.git
Merge branch 'apache:main' into bpv21_main
This commit is contained in:
commit
7312d91394
|
@ -47,8 +47,16 @@ Improvements
|
|||
|
||||
Optimizations
|
||||
---------------------
|
||||
|
||||
* GITHUB#13828: Reduce long[] array allocation for bitset in readBitSetIterator. (Zhang Chao)
|
||||
|
||||
* GITHUB#13800: MaxScoreBulkScorer now recomputes scorer partitions when the
|
||||
minimum competitive allows for a more favorable partitioning. (Adrien Grand)
|
||||
|
||||
* GITHUB#13904: BlockMaxConjunctionBulkScorer can now early exit when the
|
||||
leading clause has a single impact block (e.g. ConstantScoreQuery).
|
||||
(Adrien Grand)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended
|
||||
|
|
|
@ -115,9 +115,6 @@ public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
|
|||
*/
|
||||
protected BackwardsCompatibilityTestBase(
|
||||
@Name("version") Version version, @Name("pattern") String indexPattern) {
|
||||
// TODO: add 10.0.0 bw indices after 10.0.0 has been released, see
|
||||
// https://github.com/apache/lucene/issues/13847
|
||||
assumeTrue("Can only test with 10.0.0 has been released", version.major < 10);
|
||||
this.version = version;
|
||||
this.indexPattern = indexPattern;
|
||||
}
|
||||
|
|
|
@ -832,7 +832,7 @@ public class TestBasicBackwardsCompatibility extends BackwardsCompatibilityTestB
|
|||
expectThrows(IllegalArgumentException.class, () -> TestUtil.addIndexesSlowly(w, reader));
|
||||
assertEquals(
|
||||
e.getMessage(),
|
||||
"Cannot merge a segment that has been created with major version 9 into this index which has been created by major version 10");
|
||||
"Cannot merge a segment that has been created with major version 10 into this index which has been created by major version 11");
|
||||
w.close();
|
||||
targetDir2.close();
|
||||
|
||||
|
|
|
@ -85,9 +85,20 @@ final class BlockMaxConjunctionBulkScorer extends BulkScorer {
|
|||
|
||||
int windowMin = Math.max(lead1.docID(), min);
|
||||
while (windowMin < max) {
|
||||
// Use impacts of the least costly scorer to compute windows
|
||||
// NOTE: windowMax is inclusive
|
||||
int windowMax = Math.min(scorers[0].advanceShallow(windowMin), max - 1);
|
||||
// Use impacts of the least costly scorer to compute windows to keep the per-block overhead
|
||||
// under control.
|
||||
// NOTE: windowMax is inclusive.
|
||||
int windowMax = scorer1.advanceShallow(windowMin);
|
||||
if (windowMax == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
// If the query doesn't have impacts anymore, or has a single block for the whole doc ID
|
||||
// space (e.g. ConstantScoreQuery), then we try to create a block that has ~128 docs of the
|
||||
// leading clause. This gives us higher chances to exit early based on the maximum scores of
|
||||
// other clauses.
|
||||
long windowSize = 128L * maxDoc / Math.max(1, lead1.cost());
|
||||
windowSize = Math.max(windowSize, 128L);
|
||||
windowMax = (int) Math.min(Integer.MAX_VALUE, windowMin + windowSize);
|
||||
}
|
||||
windowMax = Math.min(windowMax, max - 1);
|
||||
|
||||
float maxWindowScore = Float.POSITIVE_INFINITY;
|
||||
if (0 < scorable.minCompetitiveScore) {
|
||||
|
|
|
@ -40,6 +40,8 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
// Index of the first scorer that is required, this scorer and all following scorers are required
|
||||
// for a document to match.
|
||||
int firstRequiredScorer;
|
||||
// The minimum value of minCompetitiveScore that would produce a more favorable partitioning.
|
||||
float nextMinCompetitiveScore;
|
||||
private final long cost;
|
||||
float minCompetitiveScore;
|
||||
private final Score scorable = new Score();
|
||||
|
@ -114,9 +116,14 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
while (top.doc < outerWindowMax) {
|
||||
scoreInnerWindow(collector, acceptDocs, outerWindowMax);
|
||||
top = essentialQueue.top();
|
||||
if (minCompetitiveScore >= nextMinCompetitiveScore) {
|
||||
// The minimum competitive score increased substantially, so we can now partition scorers
|
||||
// in a more favorable way.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
outerWindowMin = outerWindowMax;
|
||||
outerWindowMin = Math.min(top.doc, outerWindowMax);
|
||||
}
|
||||
|
||||
return nextCandidate(max);
|
||||
|
@ -337,6 +344,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
});
|
||||
double maxScoreSum = 0;
|
||||
firstEssentialScorer = 0;
|
||||
nextMinCompetitiveScore = Float.POSITIVE_INFINITY;
|
||||
for (int i = 0; i < allScorers.length; ++i) {
|
||||
final DisiWrapper w = scratch[i];
|
||||
double newMaxScoreSum = maxScoreSum + w.maxWindowScore;
|
||||
|
@ -349,6 +357,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
firstEssentialScorer++;
|
||||
} else {
|
||||
allScorers[allScorers.length - 1 - (i - firstEssentialScorer)] = w;
|
||||
nextMinCompetitiveScore = Math.min(maxScoreSumFloat, nextMinCompetitiveScore);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -181,7 +181,7 @@ public abstract class PointInSetQuery extends Query implements Accountable {
|
|||
@Override
|
||||
public Scorer get(long leadCost) throws IOException {
|
||||
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
|
||||
values.intersect(new MergePointVisitor(sortedPackedPoints, result));
|
||||
values.intersect(new MergePointVisitor(sortedPackedPoints.iterator(), result));
|
||||
DocIdSetIterator iterator = result.build().iterator();
|
||||
return new ConstantScoreScorer(score(), scoreMode, iterator);
|
||||
}
|
||||
|
@ -192,7 +192,9 @@ public abstract class PointInSetQuery extends Query implements Accountable {
|
|||
if (cost == -1) {
|
||||
// Computing the cost may be expensive, so only do it if necessary
|
||||
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
|
||||
cost = values.estimateDocCount(new MergePointVisitor(sortedPackedPoints, result));
|
||||
cost =
|
||||
values.estimateDocCount(
|
||||
new MergePointVisitor(sortedPackedPoints.iterator(), result));
|
||||
assert cost >= 0;
|
||||
}
|
||||
return cost;
|
||||
|
@ -260,18 +262,15 @@ public abstract class PointInSetQuery extends Query implements Accountable {
|
|||
private class MergePointVisitor implements IntersectVisitor {
|
||||
|
||||
private final DocIdSetBuilder result;
|
||||
private TermIterator iterator;
|
||||
private final TermIterator iterator;
|
||||
private BytesRef nextQueryPoint;
|
||||
private final ByteArrayComparator comparator;
|
||||
private final PrefixCodedTerms sortedPackedPoints;
|
||||
private DocIdSetBuilder.BulkAdder adder;
|
||||
|
||||
public MergePointVisitor(PrefixCodedTerms sortedPackedPoints, DocIdSetBuilder result)
|
||||
throws IOException {
|
||||
public MergePointVisitor(TermIterator iterator, DocIdSetBuilder result) throws IOException {
|
||||
this.result = result;
|
||||
this.sortedPackedPoints = sortedPackedPoints;
|
||||
this.comparator = ArrayUtil.getUnsignedComparator(bytesPerDim);
|
||||
this.iterator = this.sortedPackedPoints.iterator();
|
||||
this.iterator = iterator;
|
||||
nextQueryPoint = iterator.next();
|
||||
}
|
||||
|
||||
|
|
|
@ -775,10 +775,11 @@ public final class Util {
|
|||
|
||||
/** Just takes unsigned byte values from the BytesRef and converts into an IntsRef. */
|
||||
public static IntsRef toIntsRef(BytesRef input, IntsRefBuilder scratch) {
|
||||
scratch.clear();
|
||||
scratch.growNoCopy(input.length);
|
||||
for (int i = 0; i < input.length; i++) {
|
||||
scratch.append(input.bytes[i + input.offset] & 0xFF);
|
||||
scratch.setIntAt(i, input.bytes[i + input.offset] & 0xFF);
|
||||
}
|
||||
scratch.setLength(input.length);
|
||||
return scratch.get();
|
||||
}
|
||||
|
||||
|
|
|
@ -530,7 +530,29 @@ abstract class MemorySegmentIndexInput extends IndexInput
|
|||
|
||||
@Override
|
||||
public final MemorySegmentIndexInput clone() {
|
||||
final MemorySegmentIndexInput clone = buildSlice((String) null, 0L, this.length);
|
||||
ensureOpen();
|
||||
ensureAccessible();
|
||||
final MemorySegmentIndexInput clone;
|
||||
if (segments.length == 1) {
|
||||
clone =
|
||||
new SingleSegmentImpl(
|
||||
toString(),
|
||||
null, // clones don't have an Arena, as they can't close)
|
||||
segments[0],
|
||||
length,
|
||||
chunkSizePower,
|
||||
confined);
|
||||
} else {
|
||||
clone =
|
||||
new MultiSegmentImpl(
|
||||
toString(),
|
||||
null, // clones don't have an Arena, as they can't close)
|
||||
segments,
|
||||
((MultiSegmentImpl) this).offset,
|
||||
length,
|
||||
chunkSizePower,
|
||||
confined);
|
||||
}
|
||||
try {
|
||||
clone.seek(getFilePointer());
|
||||
} catch (IOException ioe) {
|
||||
|
@ -570,12 +592,20 @@ abstract class MemorySegmentIndexInput extends IndexInput
|
|||
if (NATIVE_ACCESS.isPresent() && advice != ReadAdvice.NORMAL) {
|
||||
// No need to madvise with a normal advice, since it's the OS' default.
|
||||
final NativeAccess nativeAccess = NATIVE_ACCESS.get();
|
||||
slice.advise(
|
||||
0,
|
||||
slice.length,
|
||||
segment -> {
|
||||
nativeAccess.madvise(segment, advice);
|
||||
});
|
||||
if (length >= nativeAccess.getPageSize()) {
|
||||
// Only set the read advice if the inner file is large enough. Otherwise the cons are likely
|
||||
// outweighing the pros as we're:
|
||||
// - potentially overriding the advice of other files that share the same pages,
|
||||
// - paying the cost of a madvise system call for little value.
|
||||
// We could align inner files with the page size to avoid the first issue, but again the
|
||||
// pros don't clearly overweigh the cons.
|
||||
slice.advise(
|
||||
0,
|
||||
slice.length,
|
||||
segment -> {
|
||||
nativeAccess.madvise(segment, advice);
|
||||
});
|
||||
}
|
||||
}
|
||||
return slice;
|
||||
}
|
||||
|
@ -584,26 +614,30 @@ abstract class MemorySegmentIndexInput extends IndexInput
|
|||
MemorySegmentIndexInput buildSlice(String sliceDescription, long offset, long length) {
|
||||
ensureOpen();
|
||||
ensureAccessible();
|
||||
final MemorySegment[] slices;
|
||||
final boolean isClone = offset == 0 && length == this.length;
|
||||
if (isClone) {
|
||||
slices = segments;
|
||||
} else {
|
||||
final long sliceEnd = offset + length;
|
||||
final int startIndex = (int) (offset >>> chunkSizePower);
|
||||
final int endIndex = (int) (sliceEnd >>> chunkSizePower);
|
||||
// we always allocate one more slice, the last one may be a 0 byte one after truncating with
|
||||
// asSlice():
|
||||
slices = ArrayUtil.copyOfSubArray(segments, startIndex, endIndex + 1);
|
||||
|
||||
final long sliceEnd = offset + length;
|
||||
final int startIndex = (int) (offset >>> chunkSizePower);
|
||||
final int endIndex = (int) (sliceEnd >>> chunkSizePower);
|
||||
// set the last segment's limit for the sliced view.
|
||||
slices[slices.length - 1] = slices[slices.length - 1].asSlice(0L, sliceEnd & chunkSizeMask);
|
||||
|
||||
// we always allocate one more slice, the last one may be a 0 byte one after truncating with
|
||||
// asSlice():
|
||||
final MemorySegment slices[] = ArrayUtil.copyOfSubArray(segments, startIndex, endIndex + 1);
|
||||
|
||||
// set the last segment's limit for the sliced view.
|
||||
slices[slices.length - 1] = slices[slices.length - 1].asSlice(0L, sliceEnd & chunkSizeMask);
|
||||
|
||||
offset = offset & chunkSizeMask;
|
||||
offset = offset & chunkSizeMask;
|
||||
}
|
||||
|
||||
final String newResourceDescription = getFullSliceDescription(sliceDescription);
|
||||
if (slices.length == 1) {
|
||||
return new SingleSegmentImpl(
|
||||
newResourceDescription,
|
||||
null, // clones don't have an Arena, as they can't close)
|
||||
slices[0].asSlice(offset, length),
|
||||
isClone ? slices[0] : slices[0].asSlice(offset, length),
|
||||
length,
|
||||
chunkSizePower,
|
||||
confined);
|
||||
|
|
|
@ -38,23 +38,6 @@ import org.apache.lucene.util.Bits;
|
|||
// These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept
|
||||
public class TestMaxScoreBulkScorer extends LuceneTestCase {
|
||||
|
||||
private static class CapMaxScoreWindowAt2048Scorer extends FilterScorer {
|
||||
|
||||
public CapMaxScoreWindowAt2048Scorer(Scorer in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advanceShallow(int target) throws IOException {
|
||||
return Math.min(target | 0x7FF, in.advanceShallow(target));
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getMaxScore(int upTo) throws IOException {
|
||||
return in.getMaxScore(upTo);
|
||||
}
|
||||
}
|
||||
|
||||
private void writeDocuments(Directory dir) throws IOException {
|
||||
try (IndexWriter w =
|
||||
new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
|
||||
|
@ -96,12 +79,10 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
|
|||
searcher
|
||||
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
|
||||
.scorer(context);
|
||||
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
|
||||
Scorer scorer2 =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
|
||||
.scorer(context);
|
||||
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
|
||||
|
||||
BulkScorer scorer =
|
||||
new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2));
|
||||
|
@ -168,12 +149,10 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
|
|||
searcher
|
||||
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
|
||||
.scorer(context);
|
||||
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
|
||||
Scorer scorer2 =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
|
||||
.scorer(context);
|
||||
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
|
||||
|
||||
BulkScorer scorer =
|
||||
new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2));
|
||||
|
@ -237,17 +216,14 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
|
|||
searcher
|
||||
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
|
||||
.scorer(context);
|
||||
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
|
||||
Scorer scorer2 =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
|
||||
.scorer(context);
|
||||
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
|
||||
Scorer scorer3 =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f)
|
||||
.scorer(context);
|
||||
scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3);
|
||||
|
||||
BulkScorer scorer =
|
||||
new MaxScoreBulkScorer(
|
||||
|
@ -317,17 +293,14 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
|
|||
searcher
|
||||
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
|
||||
.scorer(context);
|
||||
scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
|
||||
Scorer scorer2 =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
|
||||
.scorer(context);
|
||||
scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
|
||||
Scorer scorer3 =
|
||||
searcher
|
||||
.createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f)
|
||||
.scorer(context);
|
||||
scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3);
|
||||
|
||||
BulkScorer scorer =
|
||||
new MaxScoreBulkScorer(
|
||||
|
|
Loading…
Reference in New Issue