From 81c796a1fa30a9bec77712a8f8e188b347dc490a Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 25 Aug 2016 15:42:28 +0200 Subject: [PATCH] LUCENE-7416: Rewrite optimizations for BooleanQuery. --- lucene/CHANGES.txt | 7 ++- .../apache/lucene/search/BooleanQuery.java | 40 +++++++++++++ .../lucene/search/TestBooleanRewrites.java | 59 +++++++++++++++++++ 3 files changed, 105 insertions(+), 1 deletion(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index fbe016bedec..cb90ec1712a 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -31,7 +31,12 @@ Other * LUCENE-7360: Remove Explanation.toHtml() (Alan Woodward) ======================= Lucene 6.3.0 ======================= -(No Changes) + +Optimizations + +* LUCENE-7416: BooleanQuery optimizes queries that have queries that occur both + in the sets of SHOULD and FILTER clauses, or both in MUST/FILTER and MUST_NOT + clauses. (Spyros Kapnissis via Adrien Grand) ======================= Lucene 6.2.0 ======================= diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index 3742bfc71f6..b2477e883b9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -272,6 +272,17 @@ public class BooleanQuery extends Query implements Iterable { } } + // Check whether some clauses are both required and excluded + if (clauseSets.get(Occur.MUST_NOT).size() > 0) { + final Set reqAndExclQueries = new HashSet(clauseSets.get(Occur.FILTER)); + reqAndExclQueries.addAll(clauseSets.get(Occur.MUST)); + reqAndExclQueries.retainAll(clauseSets.get(Occur.MUST_NOT)); + + if (reqAndExclQueries.isEmpty() == false) { + return new MatchNoDocsQuery("FILTER or MUST clause also in MUST_NOT"); + } + } + // remove FILTER clauses that are also MUST clauses // or that match all documents if (clauseSets.get(Occur.MUST).size() > 0 && clauseSets.get(Occur.FILTER).size() > 0) { @@ -293,6 +304,35 @@ public class BooleanQuery extends Query implements Iterable { } } + // convert FILTER clauses that are also SHOULD clauses to MUST clauses + if (clauseSets.get(Occur.SHOULD).size() > 0 && clauseSets.get(Occur.FILTER).size() > 0) { + final Collection filters = clauseSets.get(Occur.FILTER); + final Collection shoulds = clauseSets.get(Occur.SHOULD); + + Set intersection = new HashSet<>(filters); + intersection.retainAll(shoulds); + + if (intersection.isEmpty() == false) { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + int minShouldMatch = getMinimumNumberShouldMatch(); + + for (BooleanClause clause : clauses) { + if (intersection.contains(clause.getQuery())) { + if (clause.getOccur() == Occur.SHOULD) { + builder.add(new BooleanClause(clause.getQuery(), Occur.MUST)); + minShouldMatch--; + } + } else { + builder.add(clause); + } + } + + builder.setMinimumNumberShouldMatch(Math.max(0, minShouldMatch)); + return builder.build(); + } + } + + // Rewrite queries whose single scoring clause is a MUST clause on a // MatchAllDocsQuery to a ConstantScoreQuery { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java index 0886340f9e7..44708413c4b 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java @@ -205,6 +205,65 @@ public class TestBooleanRewrites extends LuceneTestCase { .build(); assertEquals(expected, searcher.rewrite(bq)); } + + // Duplicate Should and Filter query is converted to Must (with minShouldMatch -1) + public void testConvertShouldAndFilterToMust() throws IOException { + IndexSearcher searcher = newSearcher(new MultiReader()); + + // no minShouldMatch + BooleanQuery bq = new BooleanQuery.Builder() + .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD) + .add(new TermQuery(new Term("foo", "bar")), Occur.FILTER) + .build(); + assertEquals(new TermQuery(new Term("foo", "bar")), searcher.rewrite(bq)); + + + // minShouldMatch is set to -1 + bq = new BooleanQuery.Builder() + .add(new TermQuery(new Term("foo", "bar")), Occur.SHOULD) + .add(new TermQuery(new Term("foo", "bar")), Occur.FILTER) + .add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD) + .add(new TermQuery(new Term("foo", "quz")), Occur.SHOULD) + .setMinimumNumberShouldMatch(2) + .build(); + + BooleanQuery expected = new BooleanQuery.Builder() + .add(new TermQuery(new Term("foo", "bar")), Occur.MUST) + .add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD) + .add(new TermQuery(new Term("foo", "quz")), Occur.SHOULD) + .setMinimumNumberShouldMatch(1) + .build(); + assertEquals(expected, searcher.rewrite(bq)); + } + + // Duplicate Must or Filter with MustNot returns no match + public void testDuplicateMustOrFilterWithMustNot() throws IOException { + IndexSearcher searcher = newSearcher(new MultiReader()); + + // Test Must with MustNot + BooleanQuery bq = new BooleanQuery.Builder() + .add(new TermQuery(new Term("foo", "bar")), Occur.MUST) + // other terms + .add(new TermQuery(new Term("foo", "baz")), Occur.MUST) + .add(new TermQuery(new Term("foo", "bad")), Occur.SHOULD) + // + .add(new TermQuery(new Term("foo", "bar")), Occur.MUST_NOT) + .build(); + + assertEquals(new MatchNoDocsQuery(), searcher.rewrite(bq)); + + // Test Filter with MustNot + BooleanQuery bq2 = new BooleanQuery.Builder() + .add(new TermQuery(new Term("foo", "bar")), Occur.FILTER) + // other terms + .add(new TermQuery(new Term("foo", "baz")), Occur.MUST) + .add(new TermQuery(new Term("foo", "bad")), Occur.SHOULD) + // + .add(new TermQuery(new Term("foo", "bar")), Occur.MUST_NOT) + .build(); + + assertEquals(new MatchNoDocsQuery(), searcher.rewrite(bq2)); + } public void testRemoveMatchAllFilter() throws IOException { IndexSearcher searcher = newSearcher(new MultiReader());