From 08a9dfddeb4ffd32115673487eb58caef9b3b6c2 Mon Sep 17 00:00:00 2001 From: JoeHF Date: Mon, 20 Jun 2022 20:04:38 +0800 Subject: [PATCH] LUCENE-10618: Implement BooleanQuery rewrite rules based for minimumShouldMatch (#965) --- lucene/CHANGES.txt | 2 + .../apache/lucene/search/BooleanQuery.java | 24 ++++++ .../search/TestBooleanMinShouldMatch.java | 25 ++++++ .../lucene/search/TestBooleanRewrites.java | 83 ++++++++++++++++++- .../lucene/search/TestMinShouldMatch2.java | 8 +- 5 files changed, 137 insertions(+), 5 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 1522816605c..d3406e159ff 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -93,6 +93,8 @@ Optimizations --------------------- * LUCENE-8519: MultiDocValues.getNormValues should not call getMergedFieldInfos (Rushabh Shah) +* LUCENE-10618: Implement BooleanQuery rewrite rules based for minimumShouldMatch. (Fang Hou) + Bug Fixes --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index 165bf12de9f..c7bf1b39d01 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -533,6 +533,30 @@ public class BooleanQuery extends Query implements Iterable { } } + // SHOULD clause count less than or equal to minimumNumberShouldMatch + // Important(this can only be processed after nested clauses have been flattened) + { + final Collection shoulds = clauseSets.get(Occur.SHOULD); + if (shoulds.size() > 0) { + if (shoulds.size() < minimumNumberShouldMatch) { + return new MatchNoDocsQuery("SHOULD clause count less than minimumNumberShouldMatch"); + } + + if (shoulds.size() == minimumNumberShouldMatch) { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (BooleanClause clause : clauses) { + if (clause.getOccur() == Occur.SHOULD) { + builder.add(clause.getQuery(), Occur.MUST); + } else { + builder.add(clause); + } + } + + return builder.build(); + } + } + } + return super.rewrite(reader); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java index b1b1448ebc4..1db3b4ebc74 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java @@ -436,6 +436,31 @@ public class TestBooleanMinShouldMatch extends LuceneTestCase { assertSubsetOfSameScores(q2.build(), top1, top2); } + public void testFlattenInnerDisjunctions() throws Exception { + Query q = + new BooleanQuery.Builder() + .setMinimumNumberShouldMatch(2) + .add(new TermQuery(new Term("all", "all")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("data", "2")), BooleanClause.Occur.MUST) + .build(); + verifyNrHits(q, 1); + + Query inner = + new BooleanQuery.Builder() + .add(new TermQuery(new Term("all", "all")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD) + .build(); + q = + new BooleanQuery.Builder() + .setMinimumNumberShouldMatch(2) + .add(inner, BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("data", "2")), BooleanClause.Occur.MUST) + .build(); + + verifyNrHits(q, 0); + } + protected void printHits(String test, ScoreDoc[] h, IndexSearcher searcher) throws Exception { System.err.println("------- " + test + " -------"); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java index 8ffb04bced3..f0bf8ff96ff 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanRewrites.java @@ -604,7 +604,7 @@ public class TestBooleanRewrites extends LuceneTestCase { .add(inner, Occur.SHOULD) .add(new TermQuery(new Term("foo", "baz")), Occur.MUST) .build(); - assertSame(query, searcher.rewrite(query)); + assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query)); inner = new BooleanQuery.Builder() @@ -783,4 +783,85 @@ public class TestBooleanRewrites extends LuceneTestCase { .build()); assertEquals(expected, searcher.rewrite(query)); } + + public void testShouldClausesLessThanOrEqualToMinimumNumberShouldMatch() throws IOException { + IndexSearcher searcher = newSearcher(new MultiReader()); + + // The only one SHOULD clause is MatchNoDocsQuery + BooleanQuery query = + new BooleanQuery.Builder() + .add(new PhraseQuery.Builder().build(), Occur.SHOULD) + .setMinimumNumberShouldMatch(1) + .build(); + assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query)); + query = + new BooleanQuery.Builder() + .add(new PhraseQuery.Builder().build(), Occur.SHOULD) + .setMinimumNumberShouldMatch(0) + .build(); + assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query)); + + // Meaningful SHOULD clause count is less than MinimumNumberShouldMatch + query = + new BooleanQuery.Builder() + .add(new PhraseQuery.Builder().build(), Occur.SHOULD) + .add(new PhraseQuery.Builder().add(new Term("field", "a")).build(), Occur.SHOULD) + .setMinimumNumberShouldMatch(2) + .build(); + assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query)); + + // Meaningful SHOULD clause count is equal to MinimumNumberShouldMatch + query = + new BooleanQuery.Builder() + .add(new PhraseQuery.Builder().add(new Term("field", "b")).build(), Occur.SHOULD) + .add( + new PhraseQuery.Builder() + .add(new Term("field", "a")) + .add(new Term("field", "c")) + .build(), + Occur.SHOULD) + .setMinimumNumberShouldMatch(2) + .build(); + BooleanQuery expected = + new BooleanQuery.Builder() + .add(new TermQuery(new Term("field", "b")), Occur.MUST) + .add( + new PhraseQuery.Builder() + .add(new Term("field", "a")) + .add(new Term("field", "c")) + .build(), + Occur.MUST) + .build(); + assertEquals(expected, searcher.rewrite(query)); + + // Invalid Inner query get removed after rewrite + Query inner = + new BooleanQuery.Builder() + .add(new PhraseQuery.Builder().build(), Occur.SHOULD) + .add(new PhraseQuery.Builder().add(new Term("field", "a")).build(), Occur.SHOULD) + .setMinimumNumberShouldMatch(2) + .build(); + + query = + new BooleanQuery.Builder() + .add(inner, Occur.SHOULD) + .add(new PhraseQuery.Builder().add(new Term("field", "b")).build(), Occur.SHOULD) + .add( + new PhraseQuery.Builder() + .add(new Term("field", "a")) + .add(new Term("field", "c")) + .build(), + Occur.SHOULD) + .setMinimumNumberShouldMatch(2) + .build(); + assertEquals(expected, searcher.rewrite(query)); + + query = + new BooleanQuery.Builder() + .add(inner, Occur.SHOULD) + .add(new PhraseQuery.Builder().add(new Term("field", "b")).build(), Occur.SHOULD) + .setMinimumNumberShouldMatch(2) + .build(); + assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query)); + } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java index e293854fecb..adde74c7ba0 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java @@ -253,7 +253,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase { termsList.addAll(Arrays.asList(rareTerms)); String[] terms = termsList.toArray(new String[0]); - for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.length; minNrShouldMatch++) { + for (int minNrShouldMatch = 1; minNrShouldMatch < terms.length; minNrShouldMatch++) { Scorer expected = scorer(terms, minNrShouldMatch, Mode.DOC_VALUES); Scorer actual = scorer(terms, minNrShouldMatch, Mode.SCORER); assertNext(expected, actual); @@ -273,7 +273,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase { String[] terms = termsList.toArray(new String[0]); for (int amount = 25; amount < 200; amount += 25) { - for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.length; minNrShouldMatch++) { + for (int minNrShouldMatch = 1; minNrShouldMatch < terms.length; minNrShouldMatch++) { Scorer expected = scorer(terms, minNrShouldMatch, Mode.DOC_VALUES); Scorer actual = scorer(terms, minNrShouldMatch, Mode.SCORER); assertAdvance(expected, actual, amount); @@ -294,7 +294,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase { Collections.shuffle(termsList, random()); for (int numTerms = 2; numTerms <= termsList.size(); numTerms++) { String[] terms = termsList.subList(0, numTerms).toArray(new String[0]); - for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.length; minNrShouldMatch++) { + for (int minNrShouldMatch = 1; minNrShouldMatch < terms.length; minNrShouldMatch++) { Scorer expected = scorer(terms, minNrShouldMatch, Mode.DOC_VALUES); Scorer actual = scorer(terms, minNrShouldMatch, Mode.SCORER); assertNext(expected, actual); @@ -318,7 +318,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase { for (int amount = 25; amount < 200; amount += 25) { for (int numTerms = 2; numTerms <= termsList.size(); numTerms++) { String[] terms = termsList.subList(0, numTerms).toArray(new String[0]); - for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.length; minNrShouldMatch++) { + for (int minNrShouldMatch = 1; minNrShouldMatch < terms.length; minNrShouldMatch++) { Scorer expected = scorer(terms, minNrShouldMatch, Mode.DOC_VALUES); Scorer actual = scorer(terms, minNrShouldMatch, Mode.SCORER); assertAdvance(expected, actual, amount);