mirror of
https://github.com/apache/lucene.git
synced 2025-02-28 13:29:26 +00:00
LUCENE-3510: don't limit number of prohibited clauses to 32 in BooleanScorer
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1182982 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d90e7d51d1
commit
040dc8a2f6
@ -329,7 +329,7 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
||||
}
|
||||
|
||||
// Check if we can return a BooleanScorer
|
||||
if (!scoreDocsInOrder && topScorer && required.size() == 0 && prohibited.size() < 32) {
|
||||
if (!scoreDocsInOrder && topScorer && required.size() == 0) {
|
||||
return new BooleanScorer(this, disableCoord, minNrShouldMatch, optional, prohibited, maxCoord);
|
||||
}
|
||||
|
||||
@ -375,10 +375,6 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
||||
}
|
||||
}
|
||||
|
||||
if (numProhibited > 32) { // cannot use BS
|
||||
return false;
|
||||
}
|
||||
|
||||
// scorer() will return an out-of-order scorer if requested.
|
||||
return true;
|
||||
}
|
||||
|
@ -29,10 +29,10 @@ import org.apache.lucene.search.BooleanQuery.BooleanWeight;
|
||||
/* Description from Doug Cutting (excerpted from
|
||||
* LUCENE-1483):
|
||||
*
|
||||
* BooleanScorer uses a ~16k array to score windows of
|
||||
* docs. So it scores docs 0-16k first, then docs 16-32k,
|
||||
* BooleanScorer uses an array to score windows of
|
||||
* 2K docs. So it scores docs 0-2K first, then docs 2K-4K,
|
||||
* etc. For each window it iterates through all query terms
|
||||
* and accumulates a score in table[doc%16k]. It also stores
|
||||
* and accumulates a score in table[doc%2K]. It also stores
|
||||
* in the table a bitmask representing which terms
|
||||
* contributed to the score. Non-zero scores are chained in
|
||||
* a linked list. At the end of scoring each window it then
|
||||
@ -75,9 +75,7 @@ final class BooleanScorer extends Scorer {
|
||||
public void collect(final int doc) throws IOException {
|
||||
final BucketTable table = bucketTable;
|
||||
final int i = doc & BucketTable.MASK;
|
||||
Bucket bucket = table.buckets[i];
|
||||
if (bucket == null)
|
||||
table.buckets[i] = bucket = new Bucket();
|
||||
final Bucket bucket = table.buckets[i];
|
||||
|
||||
if (bucket.doc != doc) { // invalid bucket
|
||||
bucket.doc = doc; // set doc
|
||||
@ -143,6 +141,9 @@ final class BooleanScorer extends Scorer {
|
||||
static final class Bucket {
|
||||
int doc = -1; // tells if bucket is valid
|
||||
float score; // incremental score
|
||||
// TODO: break out bool anyProhibited, int
|
||||
// numRequiredMatched; then we can remove 32 limit on
|
||||
// required clauses
|
||||
int bits; // used for bool constraints
|
||||
int coord; // count of terms in score
|
||||
Bucket next; // next valid bucket
|
||||
@ -156,7 +157,13 @@ final class BooleanScorer extends Scorer {
|
||||
final Bucket[] buckets = new Bucket[SIZE];
|
||||
Bucket first = null; // head of valid list
|
||||
|
||||
public BucketTable() {}
|
||||
public BucketTable() {
|
||||
// Pre-fill to save the lazy init when collecting
|
||||
// each sub:
|
||||
for(int idx=0;idx<SIZE;idx++) {
|
||||
buckets[idx] = new Bucket();
|
||||
}
|
||||
}
|
||||
|
||||
public Collector newCollector(int mask) {
|
||||
return new BooleanScorerCollector(mask, this);
|
||||
@ -169,7 +176,7 @@ final class BooleanScorer extends Scorer {
|
||||
public Scorer scorer;
|
||||
// TODO: re-enable this if BQ ever sends us required clauses
|
||||
//public boolean required = false;
|
||||
public boolean prohibited = false;
|
||||
public boolean prohibited;
|
||||
public Collector collector;
|
||||
public SubScorer next;
|
||||
|
||||
@ -193,12 +200,13 @@ final class BooleanScorer extends Scorer {
|
||||
private final float[] coordFactors;
|
||||
// TODO: re-enable this if BQ ever sends us required clauses
|
||||
//private int requiredMask = 0;
|
||||
private int prohibitedMask = 0;
|
||||
private int nextMask = 1;
|
||||
private final int minNrShouldMatch;
|
||||
private int end;
|
||||
private Bucket current;
|
||||
private int doc = -1;
|
||||
|
||||
// Any time a prohibited clause matches we set bit 0:
|
||||
private static final int PROHIBITED_MASK = 1;
|
||||
|
||||
BooleanScorer(BooleanWeight weight, boolean disableCoord, int minNrShouldMatch,
|
||||
List<Scorer> optionalScorers, List<Scorer> prohibitedScorers, int maxCoord) throws IOException {
|
||||
@ -215,11 +223,8 @@ final class BooleanScorer extends Scorer {
|
||||
|
||||
if (prohibitedScorers != null && prohibitedScorers.size() > 0) {
|
||||
for (Scorer scorer : prohibitedScorers) {
|
||||
int mask = nextMask;
|
||||
nextMask = nextMask << 1;
|
||||
prohibitedMask |= mask; // update prohibited mask
|
||||
if (scorer.nextDoc() != NO_MORE_DOCS) {
|
||||
scorers = new SubScorer(scorer, false, true, bucketTable.newCollector(mask), scorers);
|
||||
scorers = new SubScorer(scorer, false, true, bucketTable.newCollector(PROHIBITED_MASK), scorers);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -233,9 +238,12 @@ final class BooleanScorer extends Scorer {
|
||||
// firstDocID is ignored since nextDoc() initializes 'current'
|
||||
@Override
|
||||
public boolean score(Collector collector, int max, int firstDocID) throws IOException {
|
||||
// Make sure it's only BooleanScorer that calls us:
|
||||
assert firstDocID == -1;
|
||||
boolean more;
|
||||
Bucket tmp;
|
||||
BucketScorer bs = new BucketScorer(weight);
|
||||
|
||||
// The internal loop will set the score and doc before calling collect.
|
||||
collector.setScorer(bs);
|
||||
do {
|
||||
@ -244,12 +252,13 @@ final class BooleanScorer extends Scorer {
|
||||
while (current != null) { // more queued
|
||||
|
||||
// check prohibited & required
|
||||
if ((current.bits & prohibitedMask) == 0) {
|
||||
if ((current.bits & PROHIBITED_MASK) == 0) {
|
||||
|
||||
// TODO: re-enable this if BQ ever sends us required
|
||||
// clauses
|
||||
//&& (current.bits & requiredMask) == requiredMask) {
|
||||
// TODO: re-enable this if BQ ever sends us required
|
||||
// clauses
|
||||
//&& (current.bits & requiredMask) == requiredMask) {
|
||||
|
||||
// TODO: can we remove this?
|
||||
if (current.doc >= max){
|
||||
tmp = current;
|
||||
current = current.next;
|
||||
@ -298,48 +307,22 @@ final class BooleanScorer extends Scorer {
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
boolean more;
|
||||
do {
|
||||
while (bucketTable.first != null) { // more queued
|
||||
current = bucketTable.first;
|
||||
bucketTable.first = current.next; // pop the queue
|
||||
|
||||
// check prohibited & required, and minNrShouldMatch
|
||||
if ((current.bits & prohibitedMask) == 0 &&
|
||||
current.coord >= minNrShouldMatch) {
|
||||
// TODO: re-enable this if BQ ever sends us required clauses
|
||||
// (current.bits & requiredMask) == requiredMask &&
|
||||
return doc = current.doc;
|
||||
}
|
||||
}
|
||||
|
||||
// refill the queue
|
||||
more = false;
|
||||
end += BucketTable.SIZE;
|
||||
for (SubScorer sub = scorers; sub != null; sub = sub.next) {
|
||||
int subScorerDocID = sub.scorer.docID();
|
||||
if (subScorerDocID != NO_MORE_DOCS) {
|
||||
more |= sub.scorer.score(sub.collector, end, subScorerDocID);
|
||||
}
|
||||
}
|
||||
} while (bucketTable.first != null || more);
|
||||
|
||||
return doc = NO_MORE_DOCS;
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() {
|
||||
return current.score * coordFactors[current.coord];
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void score(Collector collector) throws IOException {
|
||||
score(collector, Integer.MAX_VALUE, nextDoc());
|
||||
score(collector, Integer.MAX_VALUE, -1);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -18,16 +18,19 @@ package org.apache.lucene.search;
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanQuery.BooleanWeight;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestBooleanScorer extends LuceneTestCase
|
||||
@ -93,13 +96,88 @@ public class TestBooleanScorer extends LuceneTestCase
|
||||
}};
|
||||
|
||||
BooleanScorer bs = new BooleanScorer(weight, false, 1, Arrays.asList(scorers), null, scorers.length);
|
||||
|
||||
assertEquals("should have received 3000", 3000, bs.nextDoc());
|
||||
assertEquals("should have received NO_MORE_DOCS", DocIdSetIterator.NO_MORE_DOCS, bs.nextDoc());
|
||||
|
||||
final List<Integer> hits = new ArrayList<Integer>();
|
||||
bs.score(new Collector() {
|
||||
int docBase;
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
hits.add(docBase+doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) {
|
||||
docBase = context.docBase;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return true;
|
||||
}
|
||||
});
|
||||
|
||||
assertEquals("should have only 1 hit", 1, hits.size());
|
||||
assertEquals("hit should have been docID=3000", 3000, hits.get(0).intValue());
|
||||
searcher.close();
|
||||
ir.close();
|
||||
directory.close();
|
||||
|
||||
}
|
||||
|
||||
public void testMoreThan32ProhibitedClauses() throws Exception {
|
||||
final Directory d = newDirectory();
|
||||
final RandomIndexWriter w = new RandomIndexWriter(random, d);
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField("field", "0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33"));
|
||||
w.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(new TextField("field", "33"));
|
||||
w.addDocument(doc);
|
||||
final IndexReader r = w.getReader();
|
||||
w.close();
|
||||
final IndexSearcher s = newSearcher(r);
|
||||
|
||||
final BooleanQuery q = new BooleanQuery();
|
||||
for(int term=0;term<33;term++) {
|
||||
q.add(new BooleanClause(new TermQuery(new Term("field", ""+term)),
|
||||
BooleanClause.Occur.MUST_NOT));
|
||||
}
|
||||
q.add(new BooleanClause(new TermQuery(new Term("field", "33")),
|
||||
BooleanClause.Occur.SHOULD));
|
||||
|
||||
final int[] count = new int[1];
|
||||
s.search(q, new Collector() {
|
||||
private Scorer scorer;
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) {
|
||||
// Make sure we got BooleanScorer:
|
||||
this.scorer = scorer;
|
||||
assertEquals("Scorer is implemented by wrong class", BooleanScorer.class.getName() + "$BucketScorer", scorer.getClass().getName());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
count[0]++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return true;
|
||||
}
|
||||
});
|
||||
|
||||
assertEquals(1, count[0]);
|
||||
|
||||
s.close();
|
||||
r.close();
|
||||
d.close();
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user