Aggressive `count` in BooleanWeight (#12017)

This commit is contained in:
Lu Xugang 2022-12-22 23:48:05 +08:00 committed by GitHub
parent ad22fb2879
commit 3bc8cd5c20
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 70 additions and 5 deletions

View File

@ -243,6 +243,8 @@ Optimizations
* GITHUB#12006: Do ints compare instead of ArrayUtil#compareUnsigned4 in LatlonPointQueries. (Guo Feng)
* GITHUB#12017: Aggressive count in BooleanWeight. (Lu Xugang)
Other
---------------------

View File

@ -470,14 +470,18 @@ final class BooleanWeight extends Weight {
private int optCount(LeafReaderContext context, Occur occur) throws IOException {
final int numDocs = context.reader().numDocs();
int optCount = 0;
boolean unknownCount = false;
for (WeightedBooleanClause weightedClause : weightedClauses) {
if (weightedClause.clause.getOccur() != occur) {
continue;
}
int count = weightedClause.weight.count(context);
if (count == -1 || count == numDocs) {
// If any of the clauses has a number of matches that is unknown, the number of matches of
// the disjunction is unknown.
if (count == -1) {
// If one clause has a number of matches that is unknown, let's be more aggressive to check
// whether remain clauses could match all docs.
unknownCount = true;
continue;
} else if (count == numDocs) {
// If either clause matches all docs, then the disjunction matches all docs.
return count;
} else if (count == 0) {
@ -489,10 +493,13 @@ final class BooleanWeight extends Weight {
} else {
// We have two clauses whose count is in [1, numDocs), we can't figure out the number of
// docs that match the disjunction without running the query.
return -1;
unknownCount = true;
}
}
return optCount;
// If at least one of clauses has a number of matches that is unknown and no clause matches all
// docs, then the number of matches of
// the disjunction is unknown
return unknownCount ? -1 : optCount;
}
@Override

View File

@ -848,11 +848,14 @@ public class TestBooleanQuery extends LuceneTestCase {
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
Document doc = new Document();
LongPoint longPoint = new LongPoint("long", 3L);
LongPoint longPoint3dim = new LongPoint("long3dim", 3L, 4L, 5L);
doc.add(longPoint);
doc.add(longPoint3dim);
StringField stringField = new StringField("string", "abc", Store.NO);
doc.add(stringField);
writer.addDocument(doc);
longPoint.setLongValue(10);
longPoint3dim.setLongValues(10L, 11L, 12L);
stringField.setStringValue("xyz");
writer.addDocument(doc);
IndexReader reader = DirectoryReader.open(writer);
@ -904,6 +907,59 @@ public class TestBooleanQuery extends LuceneTestCase {
weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
assertEquals(2, weight.count(reader.leaves().get(0)));
long[] lower = new long[] {4L, 5L, 6L};
long[] upper = new long[] {9L, 10L, 11L};
Query unknownCountQuery = LongPoint.newRangeQuery("long3dim", lower, upper);
assert reader.leaves().size() == 1;
assert searcher
.createWeight(unknownCountQuery, ScoreMode.COMPLETE, 1f)
.count(reader.leaves().get(0))
== -1;
query =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("string", "xyz")), Occur.MUST)
.add(unknownCountQuery, Occur.MUST_NOT)
.add(new MatchAllDocsQuery(), Occur.MUST_NOT)
.build();
weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
// count of the first MUST_NOT clause is unknown, but the second MUST_NOT clause matches all
// docs
assertEquals(0, weight.count(reader.leaves().get(0)));
query =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("string", "xyz")), Occur.MUST)
.add(unknownCountQuery, Occur.MUST_NOT)
.add(new TermQuery(new Term("string", "abc")), Occur.MUST_NOT)
.build();
weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
// count of the first MUST_NOT clause is unknown, though the second MUST_NOT clause matche one
// doc, we can't figure out the number of
// docs
assertEquals(-1, weight.count(reader.leaves().get(0)));
// test pure disjunction
query =
new BooleanQuery.Builder()
.add(unknownCountQuery, Occur.SHOULD)
.add(new MatchAllDocsQuery(), Occur.SHOULD)
.build();
weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
// count of the first SHOULD clause is unknown, but the second SHOULD clause matches all docs
assertEquals(2, weight.count(reader.leaves().get(0)));
query =
new BooleanQuery.Builder()
.add(unknownCountQuery, Occur.SHOULD)
.add(new TermQuery(new Term("string", "abc")), Occur.SHOULD)
.build();
weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
// count of the first SHOULD clause is unknown, though the second SHOULD clause matche one doc,
// we can't figure out the number of
// docs
assertEquals(-1, weight.count(reader.leaves().get(0)));
reader.close();
dir.close();
}