mirror of https://github.com/apache/lucene.git
Aggressive `count` in BooleanWeight (#12017)
This commit is contained in:
parent
ad22fb2879
commit
3bc8cd5c20
|
@ -243,6 +243,8 @@ Optimizations
|
|||
|
||||
* GITHUB#12006: Do ints compare instead of ArrayUtil#compareUnsigned4 in LatlonPointQueries. (Guo Feng)
|
||||
|
||||
* GITHUB#12017: Aggressive count in BooleanWeight. (Lu Xugang)
|
||||
|
||||
Other
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -470,14 +470,18 @@ final class BooleanWeight extends Weight {
|
|||
private int optCount(LeafReaderContext context, Occur occur) throws IOException {
|
||||
final int numDocs = context.reader().numDocs();
|
||||
int optCount = 0;
|
||||
boolean unknownCount = false;
|
||||
for (WeightedBooleanClause weightedClause : weightedClauses) {
|
||||
if (weightedClause.clause.getOccur() != occur) {
|
||||
continue;
|
||||
}
|
||||
int count = weightedClause.weight.count(context);
|
||||
if (count == -1 || count == numDocs) {
|
||||
// If any of the clauses has a number of matches that is unknown, the number of matches of
|
||||
// the disjunction is unknown.
|
||||
if (count == -1) {
|
||||
// If one clause has a number of matches that is unknown, let's be more aggressive to check
|
||||
// whether remain clauses could match all docs.
|
||||
unknownCount = true;
|
||||
continue;
|
||||
} else if (count == numDocs) {
|
||||
// If either clause matches all docs, then the disjunction matches all docs.
|
||||
return count;
|
||||
} else if (count == 0) {
|
||||
|
@ -489,10 +493,13 @@ final class BooleanWeight extends Weight {
|
|||
} else {
|
||||
// We have two clauses whose count is in [1, numDocs), we can't figure out the number of
|
||||
// docs that match the disjunction without running the query.
|
||||
return -1;
|
||||
unknownCount = true;
|
||||
}
|
||||
}
|
||||
return optCount;
|
||||
// If at least one of clauses has a number of matches that is unknown and no clause matches all
|
||||
// docs, then the number of matches of
|
||||
// the disjunction is unknown
|
||||
return unknownCount ? -1 : optCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -848,11 +848,14 @@ public class TestBooleanQuery extends LuceneTestCase {
|
|||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
|
||||
Document doc = new Document();
|
||||
LongPoint longPoint = new LongPoint("long", 3L);
|
||||
LongPoint longPoint3dim = new LongPoint("long3dim", 3L, 4L, 5L);
|
||||
doc.add(longPoint);
|
||||
doc.add(longPoint3dim);
|
||||
StringField stringField = new StringField("string", "abc", Store.NO);
|
||||
doc.add(stringField);
|
||||
writer.addDocument(doc);
|
||||
longPoint.setLongValue(10);
|
||||
longPoint3dim.setLongValues(10L, 11L, 12L);
|
||||
stringField.setStringValue("xyz");
|
||||
writer.addDocument(doc);
|
||||
IndexReader reader = DirectoryReader.open(writer);
|
||||
|
@ -904,6 +907,59 @@ public class TestBooleanQuery extends LuceneTestCase {
|
|||
weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
|
||||
assertEquals(2, weight.count(reader.leaves().get(0)));
|
||||
|
||||
long[] lower = new long[] {4L, 5L, 6L};
|
||||
long[] upper = new long[] {9L, 10L, 11L};
|
||||
Query unknownCountQuery = LongPoint.newRangeQuery("long3dim", lower, upper);
|
||||
assert reader.leaves().size() == 1;
|
||||
assert searcher
|
||||
.createWeight(unknownCountQuery, ScoreMode.COMPLETE, 1f)
|
||||
.count(reader.leaves().get(0))
|
||||
== -1;
|
||||
|
||||
query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("string", "xyz")), Occur.MUST)
|
||||
.add(unknownCountQuery, Occur.MUST_NOT)
|
||||
.add(new MatchAllDocsQuery(), Occur.MUST_NOT)
|
||||
.build();
|
||||
weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
|
||||
// count of the first MUST_NOT clause is unknown, but the second MUST_NOT clause matches all
|
||||
// docs
|
||||
assertEquals(0, weight.count(reader.leaves().get(0)));
|
||||
|
||||
query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("string", "xyz")), Occur.MUST)
|
||||
.add(unknownCountQuery, Occur.MUST_NOT)
|
||||
.add(new TermQuery(new Term("string", "abc")), Occur.MUST_NOT)
|
||||
.build();
|
||||
weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
|
||||
// count of the first MUST_NOT clause is unknown, though the second MUST_NOT clause matche one
|
||||
// doc, we can't figure out the number of
|
||||
// docs
|
||||
assertEquals(-1, weight.count(reader.leaves().get(0)));
|
||||
|
||||
// test pure disjunction
|
||||
query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(unknownCountQuery, Occur.SHOULD)
|
||||
.add(new MatchAllDocsQuery(), Occur.SHOULD)
|
||||
.build();
|
||||
weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
|
||||
// count of the first SHOULD clause is unknown, but the second SHOULD clause matches all docs
|
||||
assertEquals(2, weight.count(reader.leaves().get(0)));
|
||||
|
||||
query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(unknownCountQuery, Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("string", "abc")), Occur.SHOULD)
|
||||
.build();
|
||||
weight = searcher.createWeight(query, ScoreMode.COMPLETE, 1f);
|
||||
// count of the first SHOULD clause is unknown, though the second SHOULD clause matche one doc,
|
||||
// we can't figure out the number of
|
||||
// docs
|
||||
assertEquals(-1, weight.count(reader.leaves().get(0)));
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue