Stop aligning windows in BooleanScorer. (#12488)

BooleanScorer aligns windows to multiples of 2048, but it doesn't have to.
Actually, not aligning windows can help evaluate fewer windows overall and
speed up query evaluation.
This commit is contained in:
Adrien Grand 2023-08-05 11:29:34 +02:00 committed by GitHub
parent df3632cb03
commit 09e3b43331
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 19 additions and 28 deletions

View File

@ -32,9 +32,7 @@ final class BooleanScorer extends BulkScorer {
static final int SHIFT = 11;
static final int SIZE = 1 << SHIFT;
static final int MASK = SIZE - 1;
static final int SET_SIZE = 1 << (SHIFT - 6);
static final int SET_MASK = SET_SIZE - 1;
static class Bucket {
double score;
@ -53,6 +51,7 @@ final class BooleanScorer extends BulkScorer {
}
void advance(int min) throws IOException {
orCollector.windowMin = min;
score(orCollector, null, min, min);
}
@ -122,6 +121,7 @@ final class BooleanScorer extends BulkScorer {
final boolean needsScores;
final class OrCollector implements LeafCollector {
int windowMin;
Scorable scorer;
@Override
@ -131,7 +131,7 @@ final class BooleanScorer extends BulkScorer {
@Override
public void collect(int doc) throws IOException {
final int i = doc & MASK;
final int i = doc - windowMin;
final int idx = i >>> 6;
matching[idx] |= 1L << i;
if (buckets != null) {
@ -186,30 +186,30 @@ final class BooleanScorer extends BulkScorer {
return cost;
}
private void scoreDocument(LeafCollector collector, int base, int i) throws IOException {
private void scoreDocument(LeafCollector collector, int min, int i) throws IOException {
if (buckets != null) {
final Score score = this.score;
final Bucket bucket = buckets[i];
if (bucket.freq >= minShouldMatch) {
score.score = (float) bucket.score;
final int doc = base | i;
final int doc = min + i;
collector.collect(doc);
}
bucket.freq = 0;
bucket.score = 0;
} else {
collector.collect(base | i);
collector.collect(min + i);
}
}
private void scoreMatches(LeafCollector collector, int base) throws IOException {
private void scoreMatches(LeafCollector collector, int min) throws IOException {
long[] matching = this.matching;
for (int idx = 0; idx < matching.length; idx++) {
long bits = matching[idx];
while (bits != 0L) {
int ntz = Long.numberOfTrailingZeros(bits);
int doc = idx << 6 | ntz;
scoreDocument(collector, base, doc);
scoreDocument(collector, min, doc);
bits ^= 1L << ntz;
}
}
@ -218,7 +218,6 @@ final class BooleanScorer extends BulkScorer {
private void scoreWindowIntoBitSetAndReplay(
LeafCollector collector,
Bits acceptDocs,
int base,
int min,
int max,
BulkScorerAndDoc[] scorers,
@ -227,10 +226,11 @@ final class BooleanScorer extends BulkScorer {
for (int i = 0; i < numScorers; ++i) {
final BulkScorerAndDoc scorer = scorers[i];
assert scorer.next < max;
orCollector.windowMin = min;
scorer.score(orCollector, acceptDocs, min, max);
}
scoreMatches(collector, base);
scoreMatches(collector, min);
Arrays.fill(matching, 0L);
}
@ -256,12 +256,7 @@ final class BooleanScorer extends BulkScorer {
}
private void scoreWindowMultipleScorers(
LeafCollector collector,
Bits acceptDocs,
int windowBase,
int windowMin,
int windowMax,
int maxFreq)
LeafCollector collector, Bits acceptDocs, int windowMin, int windowMax, int maxFreq)
throws IOException {
while (maxFreq < minShouldMatch && maxFreq + tail.size() >= minShouldMatch) {
// a match is still possible
@ -281,8 +276,7 @@ final class BooleanScorer extends BulkScorer {
}
tail.clear();
scoreWindowIntoBitSetAndReplay(
collector, acceptDocs, windowBase, windowMin, windowMax, leads, maxFreq);
scoreWindowIntoBitSetAndReplay(collector, acceptDocs, windowMin, windowMax, leads, maxFreq);
}
// Push back scorers into head and tail
@ -299,14 +293,11 @@ final class BooleanScorer extends BulkScorer {
LeafCollector collector,
Bits acceptDocs,
int windowMin,
int windowMax,
int max)
int windowMax)
throws IOException {
assert tail.size() == 0;
final int nextWindowBase = head.top().next & ~MASK;
final int end = Math.max(windowMax, Math.min(max, nextWindowBase));
bulkScorer.score(collector, acceptDocs, windowMin, end);
bulkScorer.score(collector, acceptDocs, windowMin, windowMax);
// reset the scorer that should be used for the general case
collector.setScorer(score);
@ -315,9 +306,8 @@ final class BooleanScorer extends BulkScorer {
private BulkScorerAndDoc scoreWindow(
BulkScorerAndDoc top, LeafCollector collector, Bits acceptDocs, int min, int max)
throws IOException {
final int windowBase = top.next & ~MASK; // find the window that the next match belongs to
final int windowMin = Math.max(min, windowBase);
final int windowMax = Math.min(max, windowBase + SIZE);
final int windowMin = Math.max(min, top.next);
final int windowMax = Math.min(max, windowMin + SIZE);
// Fill 'leads' with all scorers from 'head' that are in the right window
leads[0] = head.pop();
@ -330,11 +320,12 @@ final class BooleanScorer extends BulkScorer {
// special case: only one scorer can match in the current window,
// we can collect directly
final BulkScorerAndDoc bulkScorer = leads[0];
scoreWindowSingleScorer(bulkScorer, collector, acceptDocs, windowMin, windowMax, max);
scoreWindowSingleScorer(
bulkScorer, collector, acceptDocs, windowMin, Math.min(max, head.top().next));
return head.add(bulkScorer);
} else {
// general case, collect through a bit set first and then replay
scoreWindowMultipleScorers(collector, acceptDocs, windowBase, windowMin, windowMax, maxFreq);
scoreWindowMultipleScorers(collector, acceptDocs, windowMin, windowMax, maxFreq);
return head.top();
}
}