LUCENE-7416: Rewrite optimizations for BooleanQuery.

This commit is contained in:
Adrien Grand 2016-08-25 15:42:28 +02:00
parent 18306628a9
commit 81c796a1fa
3 changed files with 105 additions and 1 deletions

View File

@ -31,7 +31,12 @@ Other
* LUCENE-7360: Remove Explanation.toHtml() (Alan Woodward) * LUCENE-7360: Remove Explanation.toHtml() (Alan Woodward)
======================= Lucene 6.3.0 ======================= ======================= Lucene 6.3.0 =======================
(No Changes)
Optimizations
* LUCENE-7416: BooleanQuery optimizes queries that have queries that occur both
in the sets of SHOULD and FILTER clauses, or both in MUST/FILTER and MUST_NOT
clauses. (Spyros Kapnissis via Adrien Grand)
======================= Lucene 6.2.0 ======================= ======================= Lucene 6.2.0 =======================

View File

@ -272,6 +272,17 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
} }
} }
// Check whether some clauses are both required and excluded
if (clauseSets.get(Occur.MUST_NOT).size() > 0) {
final Set<Query> reqAndExclQueries = new HashSet<Query>(clauseSets.get(Occur.FILTER));
reqAndExclQueries.addAll(clauseSets.get(Occur.MUST));
reqAndExclQueries.retainAll(clauseSets.get(Occur.MUST_NOT));
if (reqAndExclQueries.isEmpty() == false) {
return new MatchNoDocsQuery("FILTER or MUST clause also in MUST_NOT");
}
}
// remove FILTER clauses that are also MUST clauses // remove FILTER clauses that are also MUST clauses
// or that match all documents // or that match all documents
if (clauseSets.get(Occur.MUST).size() > 0 && clauseSets.get(Occur.FILTER).size() > 0) { if (clauseSets.get(Occur.MUST).size() > 0 && clauseSets.get(Occur.FILTER).size() > 0) {
@ -293,6 +304,35 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
} }
} }
// convert FILTER clauses that are also SHOULD clauses to MUST clauses
if (clauseSets.get(Occur.SHOULD).size() > 0 && clauseSets.get(Occur.FILTER).size() > 0) {
final Collection<Query> filters = clauseSets.get(Occur.FILTER);
final Collection<Query> shoulds = clauseSets.get(Occur.SHOULD);
Set<Query> intersection = new HashSet<>(filters);
intersection.retainAll(shoulds);
if (intersection.isEmpty() == false) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
int minShouldMatch = getMinimumNumberShouldMatch();
for (BooleanClause clause : clauses) {
if (intersection.contains(clause.getQuery())) {
if (clause.getOccur() == Occur.SHOULD) {
builder.add(new BooleanClause(clause.getQuery(), Occur.MUST));
minShouldMatch--;
}
} else {
builder.add(clause);
}
}
builder.setMinimumNumberShouldMatch(Math.max(0, minShouldMatch));
return builder.build();
}
}
// Rewrite queries whose single scoring clause is a MUST clause on a // Rewrite queries whose single scoring clause is a MUST clause on a
// MatchAllDocsQuery to a ConstantScoreQuery // MatchAllDocsQuery to a ConstantScoreQuery
{ {

View File

@ -206,6 +206,65 @@ public class TestBooleanRewrites extends LuceneTestCase {
assertEquals(expected, searcher.rewrite(bq)); assertEquals(expected, searcher.rewrite(bq));
} }
// Duplicate Should and Filter query is converted to Must (with minShouldMatch -1)
public void testConvertShouldAndFilterToMust() throws IOException {
IndexSearcher searcher = newSearcher(new MultiReader());
// no minShouldMatch
BooleanQuery bq = new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
.add(new TermQuery(new Term("foo", "bar")), Occur.FILTER)
.build();
assertEquals(new TermQuery(new Term("foo", "bar")), searcher.rewrite(bq));
// minShouldMatch is set to -1
bq = new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD)
.add(new TermQuery(new Term("foo", "bar")), Occur.FILTER)
.add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
.add(new TermQuery(new Term("foo", "quz")), Occur.SHOULD)
.setMinimumNumberShouldMatch(2)
.build();
BooleanQuery expected = new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
.add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
.add(new TermQuery(new Term("foo", "quz")), Occur.SHOULD)
.setMinimumNumberShouldMatch(1)
.build();
assertEquals(expected, searcher.rewrite(bq));
}
// Duplicate Must or Filter with MustNot returns no match
public void testDuplicateMustOrFilterWithMustNot() throws IOException {
IndexSearcher searcher = newSearcher(new MultiReader());
// Test Must with MustNot
BooleanQuery bq = new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
// other terms
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
.add(new TermQuery(new Term("foo", "bad")), Occur.SHOULD)
//
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST_NOT)
.build();
assertEquals(new MatchNoDocsQuery(), searcher.rewrite(bq));
// Test Filter with MustNot
BooleanQuery bq2 = new BooleanQuery.Builder()
.add(new TermQuery(new Term("foo", "bar")), Occur.FILTER)
// other terms
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
.add(new TermQuery(new Term("foo", "bad")), Occur.SHOULD)
//
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST_NOT)
.build();
assertEquals(new MatchNoDocsQuery(), searcher.rewrite(bq2));
}
public void testRemoveMatchAllFilter() throws IOException { public void testRemoveMatchAllFilter() throws IOException {
IndexSearcher searcher = newSearcher(new MultiReader()); IndexSearcher searcher = newSearcher(new MultiReader());