LUCENE-10618: Implement BooleanQuery rewrite rules based for minimumShouldMatch (#965)

This commit is contained in:
JoeHF 2022-06-20 20:04:38 +08:00 committed by GitHub
parent 6ba759df86
commit 08a9dfddeb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 137 additions and 5 deletions

View File

@ -93,6 +93,8 @@ Optimizations
---------------------
* LUCENE-8519: MultiDocValues.getNormValues should not call getMergedFieldInfos (Rushabh Shah)
* LUCENE-10618: Implement BooleanQuery rewrite rules based for minimumShouldMatch. (Fang Hou)
Bug Fixes
---------------------

View File

@ -533,6 +533,30 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
}
}
// SHOULD clause count less than or equal to minimumNumberShouldMatch
// Important(this can only be processed after nested clauses have been flattened)
{
final Collection<Query> shoulds = clauseSets.get(Occur.SHOULD);
if (shoulds.size() > 0) {
if (shoulds.size() < minimumNumberShouldMatch) {
return new MatchNoDocsQuery("SHOULD clause count less than minimumNumberShouldMatch");
}
if (shoulds.size() == minimumNumberShouldMatch) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (BooleanClause clause : clauses) {
if (clause.getOccur() == Occur.SHOULD) {
builder.add(clause.getQuery(), Occur.MUST);
} else {
builder.add(clause);
}
}
return builder.build();
}
}
}
return super.rewrite(reader);
}

View File

@ -436,6 +436,31 @@ public class TestBooleanMinShouldMatch extends LuceneTestCase {
assertSubsetOfSameScores(q2.build(), top1, top2);
}
public void testFlattenInnerDisjunctions() throws Exception {
Query q =
new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(2)
.add(new TermQuery(new Term("all", "all")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("data", "2")), BooleanClause.Occur.MUST)
.build();
verifyNrHits(q, 1);
Query inner =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("all", "all")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD)
.build();
q =
new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(2)
.add(inner, BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("data", "2")), BooleanClause.Occur.MUST)
.build();
verifyNrHits(q, 0);
}
protected void printHits(String test, ScoreDoc[] h, IndexSearcher searcher) throws Exception {
System.err.println("------- " + test + " -------");

View File

@ -604,7 +604,7 @@ public class TestBooleanRewrites extends LuceneTestCase {
.add(inner, Occur.SHOULD)
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
.build();
assertSame(query, searcher.rewrite(query));
assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
inner =
new BooleanQuery.Builder()
@ -783,4 +783,85 @@ public class TestBooleanRewrites extends LuceneTestCase {
.build());
assertEquals(expected, searcher.rewrite(query));
}
public void testShouldClausesLessThanOrEqualToMinimumNumberShouldMatch() throws IOException {
IndexSearcher searcher = newSearcher(new MultiReader());
// The only one SHOULD clause is MatchNoDocsQuery
BooleanQuery query =
new BooleanQuery.Builder()
.add(new PhraseQuery.Builder().build(), Occur.SHOULD)
.setMinimumNumberShouldMatch(1)
.build();
assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
query =
new BooleanQuery.Builder()
.add(new PhraseQuery.Builder().build(), Occur.SHOULD)
.setMinimumNumberShouldMatch(0)
.build();
assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
// Meaningful SHOULD clause count is less than MinimumNumberShouldMatch
query =
new BooleanQuery.Builder()
.add(new PhraseQuery.Builder().build(), Occur.SHOULD)
.add(new PhraseQuery.Builder().add(new Term("field", "a")).build(), Occur.SHOULD)
.setMinimumNumberShouldMatch(2)
.build();
assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
// Meaningful SHOULD clause count is equal to MinimumNumberShouldMatch
query =
new BooleanQuery.Builder()
.add(new PhraseQuery.Builder().add(new Term("field", "b")).build(), Occur.SHOULD)
.add(
new PhraseQuery.Builder()
.add(new Term("field", "a"))
.add(new Term("field", "c"))
.build(),
Occur.SHOULD)
.setMinimumNumberShouldMatch(2)
.build();
BooleanQuery expected =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("field", "b")), Occur.MUST)
.add(
new PhraseQuery.Builder()
.add(new Term("field", "a"))
.add(new Term("field", "c"))
.build(),
Occur.MUST)
.build();
assertEquals(expected, searcher.rewrite(query));
// Invalid Inner query get removed after rewrite
Query inner =
new BooleanQuery.Builder()
.add(new PhraseQuery.Builder().build(), Occur.SHOULD)
.add(new PhraseQuery.Builder().add(new Term("field", "a")).build(), Occur.SHOULD)
.setMinimumNumberShouldMatch(2)
.build();
query =
new BooleanQuery.Builder()
.add(inner, Occur.SHOULD)
.add(new PhraseQuery.Builder().add(new Term("field", "b")).build(), Occur.SHOULD)
.add(
new PhraseQuery.Builder()
.add(new Term("field", "a"))
.add(new Term("field", "c"))
.build(),
Occur.SHOULD)
.setMinimumNumberShouldMatch(2)
.build();
assertEquals(expected, searcher.rewrite(query));
query =
new BooleanQuery.Builder()
.add(inner, Occur.SHOULD)
.add(new PhraseQuery.Builder().add(new Term("field", "b")).build(), Occur.SHOULD)
.setMinimumNumberShouldMatch(2)
.build();
assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
}
}

View File

@ -253,7 +253,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
termsList.addAll(Arrays.asList(rareTerms));
String[] terms = termsList.toArray(new String[0]);
for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.length; minNrShouldMatch++) {
for (int minNrShouldMatch = 1; minNrShouldMatch < terms.length; minNrShouldMatch++) {
Scorer expected = scorer(terms, minNrShouldMatch, Mode.DOC_VALUES);
Scorer actual = scorer(terms, minNrShouldMatch, Mode.SCORER);
assertNext(expected, actual);
@ -273,7 +273,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
String[] terms = termsList.toArray(new String[0]);
for (int amount = 25; amount < 200; amount += 25) {
for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.length; minNrShouldMatch++) {
for (int minNrShouldMatch = 1; minNrShouldMatch < terms.length; minNrShouldMatch++) {
Scorer expected = scorer(terms, minNrShouldMatch, Mode.DOC_VALUES);
Scorer actual = scorer(terms, minNrShouldMatch, Mode.SCORER);
assertAdvance(expected, actual, amount);
@ -294,7 +294,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
Collections.shuffle(termsList, random());
for (int numTerms = 2; numTerms <= termsList.size(); numTerms++) {
String[] terms = termsList.subList(0, numTerms).toArray(new String[0]);
for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.length; minNrShouldMatch++) {
for (int minNrShouldMatch = 1; minNrShouldMatch < terms.length; minNrShouldMatch++) {
Scorer expected = scorer(terms, minNrShouldMatch, Mode.DOC_VALUES);
Scorer actual = scorer(terms, minNrShouldMatch, Mode.SCORER);
assertNext(expected, actual);
@ -318,7 +318,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
for (int amount = 25; amount < 200; amount += 25) {
for (int numTerms = 2; numTerms <= termsList.size(); numTerms++) {
String[] terms = termsList.subList(0, numTerms).toArray(new String[0]);
for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.length; minNrShouldMatch++) {
for (int minNrShouldMatch = 1; minNrShouldMatch < terms.length; minNrShouldMatch++) {
Scorer expected = scorer(terms, minNrShouldMatch, Mode.DOC_VALUES);
Scorer actual = scorer(terms, minNrShouldMatch, Mode.SCORER);
assertAdvance(expected, actual, amount);