mirror of https://github.com/apache/lucene.git
Stop aligning windows in BooleanScorer. (#12488)
BooleanScorer aligns windows to multiples of 2048, but it doesn't have to. Actually, not aligning windows can help evaluate fewer windows overall and speed up query evaluation.
This commit is contained in:
parent
df3632cb03
commit
09e3b43331
|
@ -32,9 +32,7 @@ final class BooleanScorer extends BulkScorer {
|
|||
|
||||
static final int SHIFT = 11;
|
||||
static final int SIZE = 1 << SHIFT;
|
||||
static final int MASK = SIZE - 1;
|
||||
static final int SET_SIZE = 1 << (SHIFT - 6);
|
||||
static final int SET_MASK = SET_SIZE - 1;
|
||||
|
||||
static class Bucket {
|
||||
double score;
|
||||
|
@ -53,6 +51,7 @@ final class BooleanScorer extends BulkScorer {
|
|||
}
|
||||
|
||||
void advance(int min) throws IOException {
|
||||
orCollector.windowMin = min;
|
||||
score(orCollector, null, min, min);
|
||||
}
|
||||
|
||||
|
@ -122,6 +121,7 @@ final class BooleanScorer extends BulkScorer {
|
|||
final boolean needsScores;
|
||||
|
||||
final class OrCollector implements LeafCollector {
|
||||
int windowMin;
|
||||
Scorable scorer;
|
||||
|
||||
@Override
|
||||
|
@ -131,7 +131,7 @@ final class BooleanScorer extends BulkScorer {
|
|||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
final int i = doc & MASK;
|
||||
final int i = doc - windowMin;
|
||||
final int idx = i >>> 6;
|
||||
matching[idx] |= 1L << i;
|
||||
if (buckets != null) {
|
||||
|
@ -186,30 +186,30 @@ final class BooleanScorer extends BulkScorer {
|
|||
return cost;
|
||||
}
|
||||
|
||||
private void scoreDocument(LeafCollector collector, int base, int i) throws IOException {
|
||||
private void scoreDocument(LeafCollector collector, int min, int i) throws IOException {
|
||||
if (buckets != null) {
|
||||
final Score score = this.score;
|
||||
final Bucket bucket = buckets[i];
|
||||
if (bucket.freq >= minShouldMatch) {
|
||||
score.score = (float) bucket.score;
|
||||
final int doc = base | i;
|
||||
final int doc = min + i;
|
||||
collector.collect(doc);
|
||||
}
|
||||
bucket.freq = 0;
|
||||
bucket.score = 0;
|
||||
} else {
|
||||
collector.collect(base | i);
|
||||
collector.collect(min + i);
|
||||
}
|
||||
}
|
||||
|
||||
private void scoreMatches(LeafCollector collector, int base) throws IOException {
|
||||
private void scoreMatches(LeafCollector collector, int min) throws IOException {
|
||||
long[] matching = this.matching;
|
||||
for (int idx = 0; idx < matching.length; idx++) {
|
||||
long bits = matching[idx];
|
||||
while (bits != 0L) {
|
||||
int ntz = Long.numberOfTrailingZeros(bits);
|
||||
int doc = idx << 6 | ntz;
|
||||
scoreDocument(collector, base, doc);
|
||||
scoreDocument(collector, min, doc);
|
||||
bits ^= 1L << ntz;
|
||||
}
|
||||
}
|
||||
|
@ -218,7 +218,6 @@ final class BooleanScorer extends BulkScorer {
|
|||
private void scoreWindowIntoBitSetAndReplay(
|
||||
LeafCollector collector,
|
||||
Bits acceptDocs,
|
||||
int base,
|
||||
int min,
|
||||
int max,
|
||||
BulkScorerAndDoc[] scorers,
|
||||
|
@ -227,10 +226,11 @@ final class BooleanScorer extends BulkScorer {
|
|||
for (int i = 0; i < numScorers; ++i) {
|
||||
final BulkScorerAndDoc scorer = scorers[i];
|
||||
assert scorer.next < max;
|
||||
orCollector.windowMin = min;
|
||||
scorer.score(orCollector, acceptDocs, min, max);
|
||||
}
|
||||
|
||||
scoreMatches(collector, base);
|
||||
scoreMatches(collector, min);
|
||||
Arrays.fill(matching, 0L);
|
||||
}
|
||||
|
||||
|
@ -256,12 +256,7 @@ final class BooleanScorer extends BulkScorer {
|
|||
}
|
||||
|
||||
private void scoreWindowMultipleScorers(
|
||||
LeafCollector collector,
|
||||
Bits acceptDocs,
|
||||
int windowBase,
|
||||
int windowMin,
|
||||
int windowMax,
|
||||
int maxFreq)
|
||||
LeafCollector collector, Bits acceptDocs, int windowMin, int windowMax, int maxFreq)
|
||||
throws IOException {
|
||||
while (maxFreq < minShouldMatch && maxFreq + tail.size() >= minShouldMatch) {
|
||||
// a match is still possible
|
||||
|
@ -281,8 +276,7 @@ final class BooleanScorer extends BulkScorer {
|
|||
}
|
||||
tail.clear();
|
||||
|
||||
scoreWindowIntoBitSetAndReplay(
|
||||
collector, acceptDocs, windowBase, windowMin, windowMax, leads, maxFreq);
|
||||
scoreWindowIntoBitSetAndReplay(collector, acceptDocs, windowMin, windowMax, leads, maxFreq);
|
||||
}
|
||||
|
||||
// Push back scorers into head and tail
|
||||
|
@ -299,14 +293,11 @@ final class BooleanScorer extends BulkScorer {
|
|||
LeafCollector collector,
|
||||
Bits acceptDocs,
|
||||
int windowMin,
|
||||
int windowMax,
|
||||
int max)
|
||||
int windowMax)
|
||||
throws IOException {
|
||||
assert tail.size() == 0;
|
||||
final int nextWindowBase = head.top().next & ~MASK;
|
||||
final int end = Math.max(windowMax, Math.min(max, nextWindowBase));
|
||||
|
||||
bulkScorer.score(collector, acceptDocs, windowMin, end);
|
||||
bulkScorer.score(collector, acceptDocs, windowMin, windowMax);
|
||||
|
||||
// reset the scorer that should be used for the general case
|
||||
collector.setScorer(score);
|
||||
|
@ -315,9 +306,8 @@ final class BooleanScorer extends BulkScorer {
|
|||
private BulkScorerAndDoc scoreWindow(
|
||||
BulkScorerAndDoc top, LeafCollector collector, Bits acceptDocs, int min, int max)
|
||||
throws IOException {
|
||||
final int windowBase = top.next & ~MASK; // find the window that the next match belongs to
|
||||
final int windowMin = Math.max(min, windowBase);
|
||||
final int windowMax = Math.min(max, windowBase + SIZE);
|
||||
final int windowMin = Math.max(min, top.next);
|
||||
final int windowMax = Math.min(max, windowMin + SIZE);
|
||||
|
||||
// Fill 'leads' with all scorers from 'head' that are in the right window
|
||||
leads[0] = head.pop();
|
||||
|
@ -330,11 +320,12 @@ final class BooleanScorer extends BulkScorer {
|
|||
// special case: only one scorer can match in the current window,
|
||||
// we can collect directly
|
||||
final BulkScorerAndDoc bulkScorer = leads[0];
|
||||
scoreWindowSingleScorer(bulkScorer, collector, acceptDocs, windowMin, windowMax, max);
|
||||
scoreWindowSingleScorer(
|
||||
bulkScorer, collector, acceptDocs, windowMin, Math.min(max, head.top().next));
|
||||
return head.add(bulkScorer);
|
||||
} else {
|
||||
// general case, collect through a bit set first and then replay
|
||||
scoreWindowMultipleScorers(collector, acceptDocs, windowBase, windowMin, windowMax, maxFreq);
|
||||
scoreWindowMultipleScorers(collector, acceptDocs, windowMin, windowMax, maxFreq);
|
||||
return head.top();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue