mirror of https://github.com/apache/lucene.git
LUCENE-10618: Implement BooleanQuery rewrite rules based for minimumShouldMatch (#965)
This commit is contained in:
parent
6ba759df86
commit
08a9dfddeb
|
@ -93,6 +93,8 @@ Optimizations
|
|||
---------------------
|
||||
* LUCENE-8519: MultiDocValues.getNormValues should not call getMergedFieldInfos (Rushabh Shah)
|
||||
|
||||
* LUCENE-10618: Implement BooleanQuery rewrite rules based for minimumShouldMatch. (Fang Hou)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -533,6 +533,30 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
}
|
||||
}
|
||||
|
||||
// SHOULD clause count less than or equal to minimumNumberShouldMatch
|
||||
// Important(this can only be processed after nested clauses have been flattened)
|
||||
{
|
||||
final Collection<Query> shoulds = clauseSets.get(Occur.SHOULD);
|
||||
if (shoulds.size() > 0) {
|
||||
if (shoulds.size() < minimumNumberShouldMatch) {
|
||||
return new MatchNoDocsQuery("SHOULD clause count less than minimumNumberShouldMatch");
|
||||
}
|
||||
|
||||
if (shoulds.size() == minimumNumberShouldMatch) {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
for (BooleanClause clause : clauses) {
|
||||
if (clause.getOccur() == Occur.SHOULD) {
|
||||
builder.add(clause.getQuery(), Occur.MUST);
|
||||
} else {
|
||||
builder.add(clause);
|
||||
}
|
||||
}
|
||||
|
||||
return builder.build();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return super.rewrite(reader);
|
||||
}
|
||||
|
||||
|
|
|
@ -436,6 +436,31 @@ public class TestBooleanMinShouldMatch extends LuceneTestCase {
|
|||
assertSubsetOfSameScores(q2.build(), top1, top2);
|
||||
}
|
||||
|
||||
public void testFlattenInnerDisjunctions() throws Exception {
|
||||
Query q =
|
||||
new BooleanQuery.Builder()
|
||||
.setMinimumNumberShouldMatch(2)
|
||||
.add(new TermQuery(new Term("all", "all")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("data", "2")), BooleanClause.Occur.MUST)
|
||||
.build();
|
||||
verifyNrHits(q, 1);
|
||||
|
||||
Query inner =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("all", "all")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
q =
|
||||
new BooleanQuery.Builder()
|
||||
.setMinimumNumberShouldMatch(2)
|
||||
.add(inner, BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("data", "2")), BooleanClause.Occur.MUST)
|
||||
.build();
|
||||
|
||||
verifyNrHits(q, 0);
|
||||
}
|
||||
|
||||
protected void printHits(String test, ScoreDoc[] h, IndexSearcher searcher) throws Exception {
|
||||
|
||||
System.err.println("------- " + test + " -------");
|
||||
|
|
|
@ -604,7 +604,7 @@ public class TestBooleanRewrites extends LuceneTestCase {
|
|||
.add(inner, Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
|
||||
.build();
|
||||
assertSame(query, searcher.rewrite(query));
|
||||
assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
|
||||
|
||||
inner =
|
||||
new BooleanQuery.Builder()
|
||||
|
@ -783,4 +783,85 @@ public class TestBooleanRewrites extends LuceneTestCase {
|
|||
.build());
|
||||
assertEquals(expected, searcher.rewrite(query));
|
||||
}
|
||||
|
||||
public void testShouldClausesLessThanOrEqualToMinimumNumberShouldMatch() throws IOException {
|
||||
IndexSearcher searcher = newSearcher(new MultiReader());
|
||||
|
||||
// The only one SHOULD clause is MatchNoDocsQuery
|
||||
BooleanQuery query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new PhraseQuery.Builder().build(), Occur.SHOULD)
|
||||
.setMinimumNumberShouldMatch(1)
|
||||
.build();
|
||||
assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
|
||||
query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new PhraseQuery.Builder().build(), Occur.SHOULD)
|
||||
.setMinimumNumberShouldMatch(0)
|
||||
.build();
|
||||
assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
|
||||
|
||||
// Meaningful SHOULD clause count is less than MinimumNumberShouldMatch
|
||||
query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new PhraseQuery.Builder().build(), Occur.SHOULD)
|
||||
.add(new PhraseQuery.Builder().add(new Term("field", "a")).build(), Occur.SHOULD)
|
||||
.setMinimumNumberShouldMatch(2)
|
||||
.build();
|
||||
assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
|
||||
|
||||
// Meaningful SHOULD clause count is equal to MinimumNumberShouldMatch
|
||||
query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new PhraseQuery.Builder().add(new Term("field", "b")).build(), Occur.SHOULD)
|
||||
.add(
|
||||
new PhraseQuery.Builder()
|
||||
.add(new Term("field", "a"))
|
||||
.add(new Term("field", "c"))
|
||||
.build(),
|
||||
Occur.SHOULD)
|
||||
.setMinimumNumberShouldMatch(2)
|
||||
.build();
|
||||
BooleanQuery expected =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("field", "b")), Occur.MUST)
|
||||
.add(
|
||||
new PhraseQuery.Builder()
|
||||
.add(new Term("field", "a"))
|
||||
.add(new Term("field", "c"))
|
||||
.build(),
|
||||
Occur.MUST)
|
||||
.build();
|
||||
assertEquals(expected, searcher.rewrite(query));
|
||||
|
||||
// Invalid Inner query get removed after rewrite
|
||||
Query inner =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new PhraseQuery.Builder().build(), Occur.SHOULD)
|
||||
.add(new PhraseQuery.Builder().add(new Term("field", "a")).build(), Occur.SHOULD)
|
||||
.setMinimumNumberShouldMatch(2)
|
||||
.build();
|
||||
|
||||
query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(inner, Occur.SHOULD)
|
||||
.add(new PhraseQuery.Builder().add(new Term("field", "b")).build(), Occur.SHOULD)
|
||||
.add(
|
||||
new PhraseQuery.Builder()
|
||||
.add(new Term("field", "a"))
|
||||
.add(new Term("field", "c"))
|
||||
.build(),
|
||||
Occur.SHOULD)
|
||||
.setMinimumNumberShouldMatch(2)
|
||||
.build();
|
||||
assertEquals(expected, searcher.rewrite(query));
|
||||
|
||||
query =
|
||||
new BooleanQuery.Builder()
|
||||
.add(inner, Occur.SHOULD)
|
||||
.add(new PhraseQuery.Builder().add(new Term("field", "b")).build(), Occur.SHOULD)
|
||||
.setMinimumNumberShouldMatch(2)
|
||||
.build();
|
||||
assertEquals(new MatchNoDocsQuery(), searcher.rewrite(query));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -253,7 +253,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
|
|||
termsList.addAll(Arrays.asList(rareTerms));
|
||||
String[] terms = termsList.toArray(new String[0]);
|
||||
|
||||
for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.length; minNrShouldMatch++) {
|
||||
for (int minNrShouldMatch = 1; minNrShouldMatch < terms.length; minNrShouldMatch++) {
|
||||
Scorer expected = scorer(terms, minNrShouldMatch, Mode.DOC_VALUES);
|
||||
Scorer actual = scorer(terms, minNrShouldMatch, Mode.SCORER);
|
||||
assertNext(expected, actual);
|
||||
|
@ -273,7 +273,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
|
|||
String[] terms = termsList.toArray(new String[0]);
|
||||
|
||||
for (int amount = 25; amount < 200; amount += 25) {
|
||||
for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.length; minNrShouldMatch++) {
|
||||
for (int minNrShouldMatch = 1; minNrShouldMatch < terms.length; minNrShouldMatch++) {
|
||||
Scorer expected = scorer(terms, minNrShouldMatch, Mode.DOC_VALUES);
|
||||
Scorer actual = scorer(terms, minNrShouldMatch, Mode.SCORER);
|
||||
assertAdvance(expected, actual, amount);
|
||||
|
@ -294,7 +294,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
|
|||
Collections.shuffle(termsList, random());
|
||||
for (int numTerms = 2; numTerms <= termsList.size(); numTerms++) {
|
||||
String[] terms = termsList.subList(0, numTerms).toArray(new String[0]);
|
||||
for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.length; minNrShouldMatch++) {
|
||||
for (int minNrShouldMatch = 1; minNrShouldMatch < terms.length; minNrShouldMatch++) {
|
||||
Scorer expected = scorer(terms, minNrShouldMatch, Mode.DOC_VALUES);
|
||||
Scorer actual = scorer(terms, minNrShouldMatch, Mode.SCORER);
|
||||
assertNext(expected, actual);
|
||||
|
@ -318,7 +318,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
|
|||
for (int amount = 25; amount < 200; amount += 25) {
|
||||
for (int numTerms = 2; numTerms <= termsList.size(); numTerms++) {
|
||||
String[] terms = termsList.subList(0, numTerms).toArray(new String[0]);
|
||||
for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.length; minNrShouldMatch++) {
|
||||
for (int minNrShouldMatch = 1; minNrShouldMatch < terms.length; minNrShouldMatch++) {
|
||||
Scorer expected = scorer(terms, minNrShouldMatch, Mode.DOC_VALUES);
|
||||
Scorer actual = scorer(terms, minNrShouldMatch, Mode.SCORER);
|
||||
assertAdvance(expected, actual, amount);
|
||||
|
|
Loading…
Reference in New Issue