mirror of https://github.com/apache/lucene.git
Optimize TermQuery within BooleanQuery, a common case.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150529 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b85006589c
commit
e1e3d71cc8
|
@ -42,7 +42,7 @@ final class BooleanScorer extends Scorer {
|
|||
public SubScorer next;
|
||||
|
||||
public SubScorer(Scorer scorer, boolean required, boolean prohibited,
|
||||
HitCollector collector, SubScorer next)
|
||||
HitCollector collector, SubScorer next)
|
||||
throws IOException {
|
||||
this.scorer = scorer;
|
||||
this.done = !scorer.next();
|
||||
|
@ -58,8 +58,8 @@ final class BooleanScorer extends Scorer {
|
|||
int mask = 0;
|
||||
if (required || prohibited) {
|
||||
if (nextMask == 0)
|
||||
throw new IndexOutOfBoundsException
|
||||
("More than 32 required/prohibited clauses in query.");
|
||||
throw new IndexOutOfBoundsException
|
||||
("More than 32 required/prohibited clauses in query.");
|
||||
mask = nextMask;
|
||||
nextMask = nextMask << 1;
|
||||
} else
|
||||
|
@ -69,12 +69,12 @@ final class BooleanScorer extends Scorer {
|
|||
maxCoord++;
|
||||
|
||||
if (prohibited)
|
||||
prohibitedMask |= mask; // update prohibited mask
|
||||
prohibitedMask |= mask; // update prohibited mask
|
||||
else if (required)
|
||||
requiredMask |= mask; // update required mask
|
||||
requiredMask |= mask; // update required mask
|
||||
|
||||
scorers = new SubScorer(scorer, required, prohibited,
|
||||
bucketTable.newCollector(mask), scorers);
|
||||
bucketTable.newCollector(mask), scorers);
|
||||
}
|
||||
|
||||
private final void computeCoordFactors() {
|
||||
|
@ -86,6 +86,46 @@ final class BooleanScorer extends Scorer {
|
|||
private int end;
|
||||
private Bucket current;
|
||||
|
||||
public void score(HitCollector hc) throws IOException {
|
||||
score(hc, Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
protected boolean score(HitCollector hc, int max) throws IOException {
|
||||
if (coordFactors == null)
|
||||
computeCoordFactors();
|
||||
|
||||
boolean more;
|
||||
do {
|
||||
while (bucketTable.first != null) { // more queued
|
||||
current = bucketTable.first;
|
||||
if (current.doc >= max)
|
||||
return true;
|
||||
|
||||
// check prohibited & required
|
||||
if ((current.bits & prohibitedMask) == 0 &&
|
||||
(current.bits & requiredMask) == requiredMask) {
|
||||
hc.collect(current.doc, current.score * coordFactors[current.coord]);
|
||||
}
|
||||
|
||||
bucketTable.first = current.next; // pop the queue
|
||||
}
|
||||
|
||||
// refill the queue
|
||||
more = false;
|
||||
end += BucketTable.SIZE;
|
||||
for (SubScorer sub = scorers; sub != null; sub = sub.next) {
|
||||
if (!sub.done) {
|
||||
sub.done = !sub.scorer.score(sub.collector, end);
|
||||
if (!sub.done)
|
||||
more = true;
|
||||
}
|
||||
}
|
||||
} while (bucketTable.first != null || more);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
public int doc() { return current.doc; }
|
||||
|
||||
public boolean next() throws IOException {
|
||||
|
@ -127,20 +167,20 @@ final class BooleanScorer extends Scorer {
|
|||
}
|
||||
|
||||
static final class Bucket {
|
||||
int doc = -1; // tells if bucket is valid
|
||||
float score; // incremental score
|
||||
int bits; // used for bool constraints
|
||||
int coord; // count of terms in score
|
||||
Bucket next; // next valid bucket
|
||||
int doc = -1; // tells if bucket is valid
|
||||
float score; // incremental score
|
||||
int bits; // used for bool constraints
|
||||
int coord; // count of terms in score
|
||||
Bucket next; // next valid bucket
|
||||
}
|
||||
|
||||
/** A simple hash table of document scores within a range. */
|
||||
static final class BucketTable {
|
||||
public static final int SIZE = 1 << 10;
|
||||
public static final int SIZE = 1 << 8;
|
||||
public static final int MASK = SIZE - 1;
|
||||
|
||||
final Bucket[] buckets = new Bucket[SIZE];
|
||||
Bucket first = null; // head of valid list
|
||||
Bucket first = null; // head of valid list
|
||||
|
||||
private BooleanScorer scorer;
|
||||
|
||||
|
@ -167,20 +207,20 @@ final class BooleanScorer extends Scorer {
|
|||
final int i = doc & BucketTable.MASK;
|
||||
Bucket bucket = table.buckets[i];
|
||||
if (bucket == null)
|
||||
table.buckets[i] = bucket = new Bucket();
|
||||
table.buckets[i] = bucket = new Bucket();
|
||||
|
||||
if (bucket.doc != doc) { // invalid bucket
|
||||
bucket.doc = doc; // set doc
|
||||
bucket.score = score; // initialize score
|
||||
bucket.bits = mask; // initialize mask
|
||||
bucket.coord = 1; // initialize coord
|
||||
if (bucket.doc != doc) { // invalid bucket
|
||||
bucket.doc = doc; // set doc
|
||||
bucket.score = score; // initialize score
|
||||
bucket.bits = mask; // initialize mask
|
||||
bucket.coord = 1; // initialize coord
|
||||
|
||||
bucket.next = table.first; // push onto valid list
|
||||
table.first = bucket;
|
||||
} else { // valid bucket
|
||||
bucket.score += score; // increment score
|
||||
bucket.bits |= mask; // add bits in mask
|
||||
bucket.coord++; // increment coord
|
||||
bucket.next = table.first; // push onto valid list
|
||||
table.first = bucket;
|
||||
} else { // valid bucket
|
||||
bucket.score += score; // increment score
|
||||
bucket.bits |= mask; // add bits in mask
|
||||
bucket.coord++; // increment coord
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,6 +48,23 @@ public abstract class Scorer {
|
|||
}
|
||||
}
|
||||
|
||||
/** Expert: Collects matching documents in a range. Hook for optimization.
|
||||
* Note that {@link #next()} must be called once before this method is called
|
||||
* for the first time.
|
||||
* @param hc The collector to which all matching documents are passed through
|
||||
* {@link HitCollector#collect(int, float)}.
|
||||
* @param max Do not score documents past this.
|
||||
* @return true if more matching documents may remain.
|
||||
*/
|
||||
protected boolean score(HitCollector hc, int max) throws IOException {
|
||||
while (doc() < max) {
|
||||
hc.collect(doc(), score());
|
||||
if (!next())
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Advances to the next document matching the query.
|
||||
* @return true iff there is another document matching the query.
|
||||
*/
|
||||
|
|
|
@ -29,8 +29,8 @@ final class TermScorer extends Scorer {
|
|||
private float weightValue;
|
||||
private int doc;
|
||||
|
||||
private final int[] docs = new int[32]; // buffered doc numbers
|
||||
private final int[] freqs = new int[32]; // buffered term freqs
|
||||
private final int[] docs = new int[32]; // buffered doc numbers
|
||||
private final int[] freqs = new int[32]; // buffered term freqs
|
||||
private int pointer;
|
||||
private int pointerMax;
|
||||
|
||||
|
@ -55,6 +55,39 @@ final class TermScorer extends Scorer {
|
|||
scoreCache[i] = getSimilarity().tf(i) * weightValue;
|
||||
}
|
||||
|
||||
public void score(HitCollector hc) throws IOException {
|
||||
next();
|
||||
score(hc, Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
protected boolean score(HitCollector c, int end) throws IOException {
|
||||
Similarity similarity = getSimilarity(); // cache sim in local
|
||||
while (doc < end) { // for docs in window
|
||||
int f = freqs[pointer];
|
||||
float score = // compute tf(f)*weight
|
||||
f < SCORE_CACHE_SIZE // check cache
|
||||
? scoreCache[f] // cache hit
|
||||
: similarity.tf(f)*weightValue; // cache miss
|
||||
|
||||
score *= Similarity.decodeNorm(norms[doc]); // normalize for field
|
||||
|
||||
c.collect(doc, score); // collect score
|
||||
|
||||
if (++pointer >= pointerMax) {
|
||||
pointerMax = termDocs.read(docs, freqs); // refill buffers
|
||||
if (pointerMax != 0) {
|
||||
pointer = 0;
|
||||
} else {
|
||||
termDocs.close(); // close stream
|
||||
doc = Integer.MAX_VALUE; // set to sentinel value
|
||||
return false;
|
||||
}
|
||||
}
|
||||
doc = docs[pointer];
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Returns the current document number matching the query.
|
||||
* Initially invalid, until {@link #next()} is called the first time.
|
||||
*/
|
||||
|
@ -72,8 +105,8 @@ final class TermScorer extends Scorer {
|
|||
if (pointerMax != 0) {
|
||||
pointer = 0;
|
||||
} else {
|
||||
termDocs.close(); // close stream
|
||||
doc = Integer.MAX_VALUE; // set to sentinel value
|
||||
termDocs.close(); // close stream
|
||||
doc = Integer.MAX_VALUE; // set to sentinel value
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -84,7 +117,7 @@ final class TermScorer extends Scorer {
|
|||
public float score() {
|
||||
int f = freqs[pointer];
|
||||
float raw = // compute tf(f)*weight
|
||||
f < SCORE_CACHE_SIZE // check cache
|
||||
f < SCORE_CACHE_SIZE // check cache
|
||||
? scoreCache[f] // cache hit
|
||||
: getSimilarity().tf(f)*weightValue; // cache miss
|
||||
|
||||
|
|
Loading…
Reference in New Issue