LUCENE-10608: Implement Weight#count on pure conjunctions. (#950)

This commit is contained in:
Adrien Grand 2022-06-14 09:41:38 +02:00 committed by GitHub
parent 7504b0a258
commit 83461601ad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 112 additions and 1 deletions

View File

@ -344,6 +344,48 @@ final class BooleanWeight extends Weight {
}
}
@Override
public int count(LeafReaderContext context) throws IOException {
// Implement counting for pure conjunctions in the case when one clause doesn't match any docs,
// or all clauses but one match all docs.
if (weightedClauses.isEmpty()) {
return 0;
}
for (WeightedBooleanClause weightedClause : weightedClauses) {
switch (weightedClause.clause.getOccur()) {
case FILTER:
case MUST:
break;
case MUST_NOT:
case SHOULD:
default:
return super.count(context);
}
}
// From now on we know the query is a pure conjunction
final int numDocs = context.reader().numDocs();
int conjunctionCount = numDocs;
for (WeightedBooleanClause weightedClause : weightedClauses) {
int count = weightedClause.weight.count(context);
if (count == -1 || count == 0) {
// If the count of one clause is unknown, then the count of the conjunction is unknown too.
// If one clause doesn't match any docs then the conjunction doesn't match any docs either.
return count;
} else if (count == numDocs) {
// the query matches all docs, it can be safely ignored
} else if (conjunctionCount == numDocs) {
// all clauses seen so far match all docs, so the count of the new clause is also the count
// of the conjunction
conjunctionCount = count;
} else {
// We have two clauses whose count is in [1, numDocs), we can't figure out the number of
// docs that match the conjunction without running the query.
return super.count(context);
}
}
return conjunctionCount;
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
ScorerSupplier scorerSupplier = scorerSupplier(context);

View File

@ -29,10 +29,14 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.Term;
@ -770,6 +774,71 @@ public class TestBooleanQuery extends LuceneTestCase {
IOUtils.close(reader, w, dir);
}
public void testConjunctionMatchesCount() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
Document doc = new Document();
LongPoint longPoint = new LongPoint("long", 3L);
doc.add(longPoint);
StringField stringField = new StringField("string", "abc", Store.NO);
doc.add(stringField);
writer.addDocument(doc);
longPoint.setLongValue(10);
stringField.setStringValue("xyz");
writer.addDocument(doc);
IndexReader reader = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = new IndexSearcher(reader);
Query query =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("string", "abc")), Occur.MUST)
.add(LongPoint.newExactQuery("long", 3L), Occur.FILTER)
.build();
Weight weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
// Both queries match a single doc, BooleanWeight can't figure out the count of the conjunction
assertEquals(-1, weight.count(reader.leaves().get(0)));
query =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("string", "missing")), Occur.MUST)
.add(LongPoint.newExactQuery("long", 3L), Occur.FILTER)
.build();
weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
// One query has a count of 0, the conjunction has a count of 0 too
assertEquals(0, weight.count(reader.leaves().get(0)));
query =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("string", "abc")), Occur.MUST)
.add(LongPoint.newExactQuery("long", 5L), Occur.FILTER)
.build();
weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
// One query has a count of 0, the conjunction has a count of 0 too
assertEquals(0, weight.count(reader.leaves().get(0)));
query =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("string", "abc")), Occur.MUST)
.add(LongPoint.newRangeQuery("long", 0L, 10L), Occur.FILTER)
.build();
// One query matches all docs, the count of the conjunction is the count of the other query
weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
assertEquals(1, weight.count(reader.leaves().get(0)));
query =
new BooleanQuery.Builder()
.add(new MatchAllDocsQuery(), Occur.MUST)
.add(LongPoint.newRangeQuery("long", 1L, 5L), Occur.FILTER)
.build();
// One query matches all docs, the count of the conjunction is the count of the other query
weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
assertEquals(1, weight.count(reader.leaves().get(0)));
reader.close();
dir.close();
}
public void testToString() {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(new TermQuery(new Term("field", "a")), Occur.SHOULD);

View File

@ -1273,7 +1273,7 @@ public class TestLRUQueryCache extends LuceneTestCase {
query.add(bar, Occur.FILTER);
query.add(foo, Occur.FILTER);
}
indexSearcher.count(query.build());
indexSearcher.search(query.build(), new TotalHitCountCollectorManager());
assertEquals(1, policy.frequency(query.build()));
assertEquals(1, policy.frequency(foo));
assertEquals(1, policy.frequency(bar));