LUCENE-7840: Avoid Building Scorer Supplier For Redundant SHOULD Clauses

For boolean queries, we should eliminate redundant SHOULD clauses during
query rewrite and not build the scorer supplier, as opposed to
eliminating them during weight construction

Signed-off-by: jimczi <jimczi@apache.org>
This commit is contained in:
Atri Sharma 2019-05-06 13:49:47 +05:30 committed by jimczi
parent cde00b9a84
commit c988b04b18
6 changed files with 88 additions and 21 deletions

View File

@ -35,11 +35,17 @@ Other
======================= Lucene 8.2.0 =======================
Bug Fixes:
Bug Fixes
* LUCENE-8785: Ensure new threadstates are locked before retrieving the number of active threadstates.
This causes assertion errors and potentially broken field attributes in the IndexWriter when
IndexWriter#deleteAll is called while actively indexing. (Simon Willnauer)
Improvements
* LUCENE-7840: Non-scoring BooleanQuery now removes SHOULD clauses before building the scorer supplier
as opposed to eliminating them during scoring construction. (Atri Sharma via Jim Ferenczi)
======================= Lucene 8.1.0 =======================
API Changes

View File

@ -192,18 +192,33 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
}
private BooleanQuery rewriteNoScoring() {
if (clauseSets.get(Occur.MUST).size() == 0) {
boolean keepShould = getMinimumNumberShouldMatch() > 0
|| (clauseSets.get(Occur.MUST).size() + clauseSets.get(Occur.FILTER).size() == 0);
if (clauseSets.get(Occur.MUST).size() == 0 && keepShould) {
return this;
}
BooleanQuery.Builder newQuery = new BooleanQuery.Builder();
newQuery.setMinimumNumberShouldMatch(getMinimumNumberShouldMatch());
for (BooleanClause clause : clauses) {
if (clause.getOccur() == Occur.MUST) {
newQuery.add(clause.getQuery(), Occur.FILTER);
} else {
newQuery.add(clause);
switch (clause.getOccur()) {
case MUST: {
newQuery.add(clause.getQuery(), Occur.FILTER);
break;
}
case SHOULD: {
if (keepShould) {
newQuery.add(clause);
}
break;
}
default: {
newQuery.add(clause);
}
}
}
return newQuery.build();
}

View File

@ -387,11 +387,6 @@ final class BooleanWeight extends Weight {
return null;
}
// we don't need scores, so if we have required clauses, drop optional clauses completely
if (scoreMode.needsScores() == false && minShouldMatch == 0 && scorers.get(Occur.MUST).size() + scorers.get(Occur.FILTER).size() > 0) {
scorers.get(Occur.SHOULD).clear();
}
return new Boolean2ScorerSupplier(this, scorers, scoreMode, minShouldMatch);
}

View File

@ -544,4 +544,58 @@ public class TestBooleanRewrites extends LuceneTestCase {
.build();
assertSame(query, searcher.rewrite(query));
}
public void testDiscardShouldClauses() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
Field f = newTextField("field", "a", Field.Store.NO);
doc.add(f);
w.addDocument(doc);
w.commit();
DirectoryReader reader = w.getReader();
final IndexSearcher searcher = new IndexSearcher(reader);
BooleanQuery.Builder query1 = new BooleanQuery.Builder();
query1.add(new TermQuery(new Term("field", "a")), Occur.MUST);
query1.add(new TermQuery(new Term("field", "b")), Occur.SHOULD);
query1.setMinimumNumberShouldMatch(0);
Weight weight = searcher.createWeight(searcher.rewrite(query1.build()), ScoreMode.COMPLETE_NO_SCORES, 1);
Query rewrittenQuery1 = weight.getQuery();
assertTrue(rewrittenQuery1 instanceof BooleanQuery);
BooleanQuery booleanRewrittenQuery1 = (BooleanQuery) rewrittenQuery1;
for (BooleanClause clause : booleanRewrittenQuery1.clauses()) {
assertNotEquals(clause.getOccur(), Occur.SHOULD);
}
BooleanQuery.Builder query2 = new BooleanQuery.Builder();
query2.add(new TermQuery(new Term("field", "a")), Occur.MUST);
query2.add(new TermQuery(new Term("field", "b")), Occur.SHOULD);
query2.add(new TermQuery(new Term("field", "c")), Occur.FILTER);
query2.setMinimumNumberShouldMatch(0);
weight = searcher.createWeight(searcher.rewrite(query2.build()), ScoreMode.COMPLETE_NO_SCORES, 1);
Query rewrittenQuery2 = weight.getQuery();
assertTrue(rewrittenQuery2 instanceof BooleanQuery);
BooleanQuery booleanRewrittenQuery2 = (BooleanQuery) rewrittenQuery1;
for (BooleanClause clause : booleanRewrittenQuery2.clauses()) {
assertNotEquals(clause.getOccur(), Occur.SHOULD);
}
reader.close();
w.close();
dir.close();
}
}

View File

@ -826,18 +826,15 @@ public class TestLRUQueryCache extends LuceneTestCase {
searcher.setQueryCachingPolicy(ALWAYS_CACHE);
BooleanQuery.Builder bq = new BooleanQuery.Builder();
TermQuery should = new TermQuery(new Term("foo", "baz"));
TermQuery must = new TermQuery(new Term("foo", "bar"));
TermQuery filter = new TermQuery(new Term("foo", "quux"));
TermQuery mustNot = new TermQuery(new Term("foo", "foo"));
bq.add(should, Occur.SHOULD);
bq.add(must, Occur.MUST);
bq.add(filter, Occur.FILTER);
bq.add(mustNot, Occur.MUST_NOT);
// same bq but with FILTER instead of MUST
BooleanQuery.Builder bq2 = new BooleanQuery.Builder();
bq2.add(should, Occur.SHOULD);
bq2.add(must, Occur.FILTER);
bq2.add(filter, Occur.FILTER);
bq2.add(mustNot, Occur.MUST_NOT);
@ -849,7 +846,7 @@ public class TestLRUQueryCache extends LuceneTestCase {
queryCache.clear();
assertEquals(Collections.emptySet(), new HashSet<>(queryCache.cachedQueries()));
searcher.search(new ConstantScoreQuery(bq.build()), 1);
assertEquals(new HashSet<>(Arrays.asList(bq2.build(), should, must, filter, mustNot)), new HashSet<>(queryCache.cachedQueries()));
assertEquals(new HashSet<>(Arrays.asList(bq2.build(), must, filter, mustNot)), new HashSet<>(queryCache.cachedQueries()));
reader.close();
dir.close();

View File

@ -113,7 +113,7 @@ public class TestMatchesIterator extends LuceneTestCase {
};
private void checkMatches(Query q, String field, int[][] expected) throws IOException {
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE, 1);
for (int i = 0; i < expected.length; i++) {
LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(expected[i][0], searcher.leafContexts));
int doc = expected[i][0] - ctx.docBase;
@ -133,7 +133,7 @@ public class TestMatchesIterator extends LuceneTestCase {
}
private void checkLabelCount(Query q, String field, int[] expected) throws IOException {
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE, 1);
for (int i = 0; i < expected.length; i++) {
LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
int doc = i - ctx.docBase;
@ -172,7 +172,7 @@ public class TestMatchesIterator extends LuceneTestCase {
}
private void checkNoPositionsMatches(Query q, String field, boolean[] expected) throws IOException {
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE, 1);
for (int i = 0; i < expected.length; i++) {
LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
int doc = i - ctx.docBase;
@ -188,7 +188,7 @@ public class TestMatchesIterator extends LuceneTestCase {
}
private void assertIsLeafMatch(Query q, String field) throws IOException {
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE, 1);
for (int i = 0; i < searcher.reader.maxDoc(); i++) {
LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
int doc = i - ctx.docBase;
@ -207,7 +207,7 @@ public class TestMatchesIterator extends LuceneTestCase {
}
private void checkTermMatches(Query q, String field, TermMatch[][][] expected) throws IOException {
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE, 1);
for (int i = 0; i < expected.length; i++) {
LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
int doc = i - ctx.docBase;