Optimize TermQuery within BooleanQuery, a common case.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150529 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doug Cutting 2004-09-22 17:56:00 +00:00
parent b85006589c
commit e1e3d71cc8
3 changed files with 120 additions and 30 deletions

View File

@ -42,7 +42,7 @@ final class BooleanScorer extends Scorer {
public SubScorer next;
public SubScorer(Scorer scorer, boolean required, boolean prohibited,
HitCollector collector, SubScorer next)
HitCollector collector, SubScorer next)
throws IOException {
this.scorer = scorer;
this.done = !scorer.next();
@ -58,8 +58,8 @@ final class BooleanScorer extends Scorer {
int mask = 0;
if (required || prohibited) {
if (nextMask == 0)
throw new IndexOutOfBoundsException
("More than 32 required/prohibited clauses in query.");
throw new IndexOutOfBoundsException
("More than 32 required/prohibited clauses in query.");
mask = nextMask;
nextMask = nextMask << 1;
} else
@ -69,12 +69,12 @@ final class BooleanScorer extends Scorer {
maxCoord++;
if (prohibited)
prohibitedMask |= mask; // update prohibited mask
prohibitedMask |= mask; // update prohibited mask
else if (required)
requiredMask |= mask; // update required mask
requiredMask |= mask; // update required mask
scorers = new SubScorer(scorer, required, prohibited,
bucketTable.newCollector(mask), scorers);
bucketTable.newCollector(mask), scorers);
}
private final void computeCoordFactors() {
@ -86,6 +86,46 @@ final class BooleanScorer extends Scorer {
private int end;
private Bucket current;
public void score(HitCollector hc) throws IOException {
score(hc, Integer.MAX_VALUE);
}
protected boolean score(HitCollector hc, int max) throws IOException {
if (coordFactors == null)
computeCoordFactors();
boolean more;
do {
while (bucketTable.first != null) { // more queued
current = bucketTable.first;
if (current.doc >= max)
return true;
// check prohibited & required
if ((current.bits & prohibitedMask) == 0 &&
(current.bits & requiredMask) == requiredMask) {
hc.collect(current.doc, current.score * coordFactors[current.coord]);
}
bucketTable.first = current.next; // pop the queue
}
// refill the queue
more = false;
end += BucketTable.SIZE;
for (SubScorer sub = scorers; sub != null; sub = sub.next) {
if (!sub.done) {
sub.done = !sub.scorer.score(sub.collector, end);
if (!sub.done)
more = true;
}
}
} while (bucketTable.first != null || more);
return false;
}
public int doc() { return current.doc; }
public boolean next() throws IOException {
@ -127,20 +167,20 @@ final class BooleanScorer extends Scorer {
}
static final class Bucket {
int doc = -1; // tells if bucket is valid
float score; // incremental score
int bits; // used for bool constraints
int coord; // count of terms in score
Bucket next; // next valid bucket
int doc = -1; // tells if bucket is valid
float score; // incremental score
int bits; // used for bool constraints
int coord; // count of terms in score
Bucket next; // next valid bucket
}
/** A simple hash table of document scores within a range. */
static final class BucketTable {
public static final int SIZE = 1 << 10;
public static final int SIZE = 1 << 8;
public static final int MASK = SIZE - 1;
final Bucket[] buckets = new Bucket[SIZE];
Bucket first = null; // head of valid list
Bucket first = null; // head of valid list
private BooleanScorer scorer;
@ -167,20 +207,20 @@ final class BooleanScorer extends Scorer {
final int i = doc & BucketTable.MASK;
Bucket bucket = table.buckets[i];
if (bucket == null)
table.buckets[i] = bucket = new Bucket();
table.buckets[i] = bucket = new Bucket();
if (bucket.doc != doc) { // invalid bucket
bucket.doc = doc; // set doc
bucket.score = score; // initialize score
bucket.bits = mask; // initialize mask
bucket.coord = 1; // initialize coord
if (bucket.doc != doc) { // invalid bucket
bucket.doc = doc; // set doc
bucket.score = score; // initialize score
bucket.bits = mask; // initialize mask
bucket.coord = 1; // initialize coord
bucket.next = table.first; // push onto valid list
table.first = bucket;
} else { // valid bucket
bucket.score += score; // increment score
bucket.bits |= mask; // add bits in mask
bucket.coord++; // increment coord
bucket.next = table.first; // push onto valid list
table.first = bucket;
} else { // valid bucket
bucket.score += score; // increment score
bucket.bits |= mask; // add bits in mask
bucket.coord++; // increment coord
}
}
}

View File

@ -48,6 +48,23 @@ public abstract class Scorer {
}
}
/** Expert: Collects matching documents in a range. Hook for optimization.
* Note that {@link #next()} must be called once before this method is called
* for the first time.
* @param hc The collector to which all matching documents are passed through
* {@link HitCollector#collect(int, float)}.
* @param max Do not score documents past this.
* @return true if more matching documents may remain.
*/
protected boolean score(HitCollector hc, int max) throws IOException {
while (doc() < max) {
hc.collect(doc(), score());
if (!next())
return false;
}
return true;
}
/** Advances to the next document matching the query.
* @return true iff there is another document matching the query.
*/

View File

@ -29,8 +29,8 @@ final class TermScorer extends Scorer {
private float weightValue;
private int doc;
private final int[] docs = new int[32]; // buffered doc numbers
private final int[] freqs = new int[32]; // buffered term freqs
private final int[] docs = new int[32]; // buffered doc numbers
private final int[] freqs = new int[32]; // buffered term freqs
private int pointer;
private int pointerMax;
@ -55,6 +55,39 @@ final class TermScorer extends Scorer {
scoreCache[i] = getSimilarity().tf(i) * weightValue;
}
public void score(HitCollector hc) throws IOException {
next();
score(hc, Integer.MAX_VALUE);
}
protected boolean score(HitCollector c, int end) throws IOException {
Similarity similarity = getSimilarity(); // cache sim in local
while (doc < end) { // for docs in window
int f = freqs[pointer];
float score = // compute tf(f)*weight
f < SCORE_CACHE_SIZE // check cache
? scoreCache[f] // cache hit
: similarity.tf(f)*weightValue; // cache miss
score *= Similarity.decodeNorm(norms[doc]); // normalize for field
c.collect(doc, score); // collect score
if (++pointer >= pointerMax) {
pointerMax = termDocs.read(docs, freqs); // refill buffers
if (pointerMax != 0) {
pointer = 0;
} else {
termDocs.close(); // close stream
doc = Integer.MAX_VALUE; // set to sentinel value
return false;
}
}
doc = docs[pointer];
}
return true;
}
/** Returns the current document number matching the query.
* Initially invalid, until {@link #next()} is called the first time.
*/
@ -72,8 +105,8 @@ final class TermScorer extends Scorer {
if (pointerMax != 0) {
pointer = 0;
} else {
termDocs.close(); // close stream
doc = Integer.MAX_VALUE; // set to sentinel value
termDocs.close(); // close stream
doc = Integer.MAX_VALUE; // set to sentinel value
return false;
}
}
@ -84,7 +117,7 @@ final class TermScorer extends Scorer {
public float score() {
int f = freqs[pointer];
float raw = // compute tf(f)*weight
f < SCORE_CACHE_SIZE // check cache
f < SCORE_CACHE_SIZE // check cache
? scoreCache[f] // cache hit
: getSimilarity().tf(f)*weightValue; // cache miss