Fix some query extraction bugs. (#29283)
While playing with the percolator I found two bugs: - Sometimes we set a min_should_match that is greater than the number of extractions. While this doesn't cause direct trouble, it does when the query is nested into a boolean query and the boolean query tries to compute the min_should_match for the entire query based on its own min_should_match and those of the sub queries. So I changed the code to throw an exception when min_should_match is greater than the number of extractions. - Boolean queries claim matches are verified when in fact they shouldn't. This is due to the fact that boolean queries assume that they are verified if all sub clauses are verified but things are more complex than that, eg. conjunctions that are nested in a disjunction or disjunctions that are nested in a conjunction can generally not be verified without running the query.
This commit is contained in:
parent
dc1c16964a
commit
8cdd950056
|
@ -143,7 +143,7 @@ final class QueryAnalyzer {
|
|||
}
|
||||
|
||||
private static BiFunction<Query, Version, Result> matchNoDocsQuery() {
|
||||
return (query, version) -> new Result(true, Collections.emptySet(), 1);
|
||||
return (query, version) -> new Result(true, Collections.emptySet(), 0);
|
||||
}
|
||||
|
||||
private static BiFunction<Query, Version, Result> matchAllDocsQuery() {
|
||||
|
@ -179,28 +179,28 @@ final class QueryAnalyzer {
|
|||
for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
|
||||
terms.add(new QueryExtraction(new Term(iterator.field(), term)));
|
||||
}
|
||||
return new Result(true, terms, 1);
|
||||
return new Result(true, terms, Math.min(1, terms.size()));
|
||||
};
|
||||
}
|
||||
|
||||
private static BiFunction<Query, Version, Result> synonymQuery() {
|
||||
return (query, version) -> {
|
||||
Set<QueryExtraction> terms = ((SynonymQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet());
|
||||
return new Result(true, terms, 1);
|
||||
return new Result(true, terms, Math.min(1, terms.size()));
|
||||
};
|
||||
}
|
||||
|
||||
private static BiFunction<Query, Version, Result> commonTermsQuery() {
|
||||
return (query, version) -> {
|
||||
Set<QueryExtraction> terms = ((CommonTermsQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet());
|
||||
return new Result(false, terms, 1);
|
||||
return new Result(false, terms, Math.min(1, terms.size()));
|
||||
};
|
||||
}
|
||||
|
||||
private static BiFunction<Query, Version, Result> blendedTermQuery() {
|
||||
return (query, version) -> {
|
||||
Set<QueryExtraction> terms = ((BlendedTermQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet());
|
||||
return new Result(true, terms, 1);
|
||||
return new Result(true, terms, Math.min(1, terms.size()));
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -208,7 +208,7 @@ final class QueryAnalyzer {
|
|||
return (query, version) -> {
|
||||
Term[] terms = ((PhraseQuery) query).getTerms();
|
||||
if (terms.length == 0) {
|
||||
return new Result(true, Collections.emptySet(), 1);
|
||||
return new Result(true, Collections.emptySet(), 0);
|
||||
}
|
||||
|
||||
if (version.onOrAfter(Version.V_6_1_0)) {
|
||||
|
@ -232,7 +232,7 @@ final class QueryAnalyzer {
|
|||
return (query, version) -> {
|
||||
Term[][] terms = ((MultiPhraseQuery) query).getTermArrays();
|
||||
if (terms.length == 0) {
|
||||
return new Result(true, Collections.emptySet(), 1);
|
||||
return new Result(true, Collections.emptySet(), 0);
|
||||
}
|
||||
|
||||
if (version.onOrAfter(Version.V_6_1_0)) {
|
||||
|
@ -297,7 +297,7 @@ final class QueryAnalyzer {
|
|||
for (SpanQuery clause : spanOrQuery.getClauses()) {
|
||||
terms.addAll(analyze(clause, version).extractions);
|
||||
}
|
||||
return new Result(false, terms, 1);
|
||||
return new Result(false, terms, Math.min(1, terms.size()));
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -334,6 +334,9 @@ final class QueryAnalyzer {
|
|||
numOptionalClauses++;
|
||||
}
|
||||
}
|
||||
if (minimumShouldMatch > numOptionalClauses) {
|
||||
return new Result(false, Collections.emptySet(), 0);
|
||||
}
|
||||
if (numRequiredClauses > 0) {
|
||||
if (version.onOrAfter(Version.V_6_1_0)) {
|
||||
UnsupportedQueryException uqe = null;
|
||||
|
@ -345,7 +348,12 @@ final class QueryAnalyzer {
|
|||
// since they are completely optional.
|
||||
|
||||
try {
|
||||
results.add(analyze(clause.getQuery(), version));
|
||||
Result subResult = analyze(clause.getQuery(), version);
|
||||
if (subResult.matchAllDocs == false && subResult.extractions.isEmpty()) {
|
||||
// doesn't match anything
|
||||
return subResult;
|
||||
}
|
||||
results.add(subResult);
|
||||
} catch (UnsupportedQueryException e) {
|
||||
uqe = e;
|
||||
}
|
||||
|
@ -400,7 +408,11 @@ final class QueryAnalyzer {
|
|||
}
|
||||
msm += resultMsm;
|
||||
|
||||
verified &= result.verified;
|
||||
if (result.verified == false
|
||||
// If some inner extractions are optional, the result can't be verified
|
||||
|| result.minimumShouldMatch < result.extractions.size()) {
|
||||
verified = false;
|
||||
}
|
||||
matchAllDocs &= result.matchAllDocs;
|
||||
extractions.addAll(result.extractions);
|
||||
}
|
||||
|
@ -492,7 +504,7 @@ final class QueryAnalyzer {
|
|||
// Need to check whether upper is not smaller than lower, otherwise NumericUtils.subtract(...) fails IAE
|
||||
// If upper is really smaller than lower then we deal with like MatchNoDocsQuery. (verified and no extractions)
|
||||
if (new BytesRef(lowerPoint).compareTo(new BytesRef(upperPoint)) > 0) {
|
||||
return new Result(true, Collections.emptySet(), 1);
|
||||
return new Result(true, Collections.emptySet(), 0);
|
||||
}
|
||||
|
||||
byte[] interval = new byte[16];
|
||||
|
@ -537,7 +549,15 @@ final class QueryAnalyzer {
|
|||
for (int i = 0; i < disjunctions.size(); i++) {
|
||||
Query disjunct = disjunctions.get(i);
|
||||
Result subResult = analyze(disjunct, version);
|
||||
verified &= subResult.verified;
|
||||
if (subResult.verified == false
|
||||
// one of the sub queries requires more than one term to match, we can't
|
||||
// verify it with a single top-level min_should_match
|
||||
|| subResult.minimumShouldMatch > 1
|
||||
// One of the inner clauses has multiple extractions, we won't be able to
|
||||
// verify it with a single top-level min_should_match
|
||||
|| (subResult.extractions.size() > 1 && requiredShouldClauses > 1)) {
|
||||
verified = false;
|
||||
}
|
||||
if (subResult.matchAllDocs) {
|
||||
numMatchAllClauses++;
|
||||
}
|
||||
|
@ -683,6 +703,10 @@ final class QueryAnalyzer {
|
|||
final boolean matchAllDocs;
|
||||
|
||||
Result(boolean verified, Set<QueryExtraction> extractions, int minimumShouldMatch) {
|
||||
if (minimumShouldMatch > extractions.size()) {
|
||||
throw new IllegalArgumentException("minimumShouldMatch can't be greater than the number of extractions: "
|
||||
+ minimumShouldMatch + " > " + extractions.size());
|
||||
}
|
||||
this.extractions = extractions;
|
||||
this.verified = verified;
|
||||
this.minimumShouldMatch = minimumShouldMatch;
|
||||
|
|
|
@ -210,12 +210,13 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
|
|||
new BytesRef(randomFrom(stringContent.get(field1)))));
|
||||
queryFunctions.add(() -> new TermInSetQuery(field2, new BytesRef(randomFrom(stringContent.get(field1))),
|
||||
new BytesRef(randomFrom(stringContent.get(field1)))));
|
||||
int numRandomBoolQueries = randomIntBetween(16, 32);
|
||||
// many iterations with boolean queries, which are the most complex queries to deal with when nested
|
||||
int numRandomBoolQueries = 1000;
|
||||
for (int i = 0; i < numRandomBoolQueries; i++) {
|
||||
queryFunctions.add(() -> createRandomBooleanQuery(1, stringFields, stringContent, intFieldType, intValues));
|
||||
}
|
||||
queryFunctions.add(() -> {
|
||||
int numClauses = randomIntBetween(1, 16);
|
||||
int numClauses = randomIntBetween(1, 1 << randomIntBetween(2, 4));
|
||||
List<Query> clauses = new ArrayList<>();
|
||||
for (int i = 0; i < numClauses; i++) {
|
||||
String field = randomFrom(stringFields);
|
||||
|
@ -266,7 +267,7 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
|
|||
private BooleanQuery createRandomBooleanQuery(int depth, List<String> fields, Map<String, List<String>> content,
|
||||
MappedFieldType intFieldType, List<Integer> intValues) {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
int numClauses = randomIntBetween(1, 16);
|
||||
int numClauses = randomIntBetween(1, 1 << randomIntBetween(2, 4)); // use low numbers of clauses more often
|
||||
int numShouldClauses = 0;
|
||||
boolean onlyShouldClauses = rarely();
|
||||
for (int i = 0; i < numClauses; i++) {
|
||||
|
@ -313,7 +314,7 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
|
|||
numShouldClauses++;
|
||||
}
|
||||
}
|
||||
builder.setMinimumNumberShouldMatch(numShouldClauses);
|
||||
builder.setMinimumNumberShouldMatch(randomIntBetween(0, numShouldClauses));
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
|
|
|
@ -44,6 +44,7 @@ import org.apache.lucene.search.SynonymQuery;
|
|||
import org.apache.lucene.search.TermInSetQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.join.QueryBitSetProducer;
|
||||
import org.apache.lucene.search.join.ScoreMode;
|
||||
import org.apache.lucene.search.spans.SpanFirstQuery;
|
||||
|
@ -227,23 +228,87 @@ public class QueryAnalyzerTests extends ESTestCase {
|
|||
public void testExtractQueryMetadata_booleanQuery_msm() {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
builder.setMinimumNumberShouldMatch(2);
|
||||
TermQuery termQuery1 = new TermQuery(new Term("_field", "_term1"));
|
||||
Term term1 = new Term("_field", "_term1");
|
||||
TermQuery termQuery1 = new TermQuery(term1);
|
||||
builder.add(termQuery1, BooleanClause.Occur.SHOULD);
|
||||
TermQuery termQuery2 = new TermQuery(new Term("_field", "_term2"));
|
||||
Term term2 = new Term("_field", "_term2");
|
||||
TermQuery termQuery2 = new TermQuery(term2);
|
||||
builder.add(termQuery2, BooleanClause.Occur.SHOULD);
|
||||
TermQuery termQuery3 = new TermQuery(new Term("_field", "_term3"));
|
||||
Term term3 = new Term("_field", "_term3");
|
||||
TermQuery termQuery3 = new TermQuery(term3);
|
||||
builder.add(termQuery3, BooleanClause.Occur.SHOULD);
|
||||
|
||||
BooleanQuery booleanQuery = builder.build();
|
||||
Result result = analyze(booleanQuery, Version.CURRENT);
|
||||
assertThat(result.verified, is(true));
|
||||
assertThat(result.minimumShouldMatch, equalTo(2));
|
||||
List<QueryExtraction> extractions = new ArrayList<>(result.extractions);
|
||||
extractions.sort(Comparator.comparing(extraction -> extraction.term));
|
||||
assertThat(extractions.size(), equalTo(3));
|
||||
assertThat(extractions.get(0).term, equalTo(new Term("_field", "_term1")));
|
||||
assertThat(extractions.get(1).term, equalTo(new Term("_field", "_term2")));
|
||||
assertThat(extractions.get(2).term, equalTo(new Term("_field", "_term3")));
|
||||
assertTermsEqual(result.extractions, term1, term2, term3);
|
||||
|
||||
builder = new BooleanQuery.Builder()
|
||||
.add(new BooleanQuery.Builder()
|
||||
.add(termQuery1, Occur.SHOULD)
|
||||
.add(termQuery2, Occur.SHOULD)
|
||||
.build(), Occur.SHOULD)
|
||||
.add(termQuery3, Occur.SHOULD)
|
||||
.setMinimumNumberShouldMatch(2);
|
||||
booleanQuery = builder.build();
|
||||
result = analyze(booleanQuery, Version.CURRENT);
|
||||
assertThat(result.verified, is(false));
|
||||
assertThat(result.minimumShouldMatch, equalTo(2));
|
||||
assertTermsEqual(result.extractions, term1, term2, term3);
|
||||
|
||||
Term term4 = new Term("_field", "_term4");
|
||||
TermQuery termQuery4 = new TermQuery(term4);
|
||||
builder = new BooleanQuery.Builder()
|
||||
.add(new BooleanQuery.Builder()
|
||||
.add(termQuery1, Occur.MUST)
|
||||
.add(termQuery2, Occur.FILTER)
|
||||
.build(), Occur.SHOULD)
|
||||
.add(new BooleanQuery.Builder()
|
||||
.add(termQuery3, Occur.MUST)
|
||||
.add(termQuery4, Occur.FILTER)
|
||||
.build(), Occur.SHOULD);
|
||||
booleanQuery = builder.build();
|
||||
result = analyze(booleanQuery, Version.CURRENT);
|
||||
assertThat(result.verified, is(false));
|
||||
assertThat(result.minimumShouldMatch, equalTo(2));
|
||||
assertTermsEqual(result.extractions, term1, term2, term3, term4);
|
||||
|
||||
Term term5 = new Term("_field", "_term5");
|
||||
TermQuery termQuery5 = new TermQuery(term5);
|
||||
builder.add(termQuery5, Occur.SHOULD);
|
||||
booleanQuery = builder.build();
|
||||
result = analyze(booleanQuery, Version.CURRENT);
|
||||
assertThat(result.verified, is(false));
|
||||
assertThat(result.minimumShouldMatch, equalTo(1));
|
||||
assertTermsEqual(result.extractions, term1, term2, term3, term4, term5);
|
||||
|
||||
builder.setMinimumNumberShouldMatch(2);
|
||||
booleanQuery = builder.build();
|
||||
result = analyze(booleanQuery, Version.CURRENT);
|
||||
assertThat(result.verified, is(false));
|
||||
assertThat(result.minimumShouldMatch, equalTo(3));
|
||||
assertTermsEqual(result.extractions, term1, term2, term3, term4, term5);
|
||||
|
||||
builder.setMinimumNumberShouldMatch(3);
|
||||
booleanQuery = builder.build();
|
||||
result = analyze(booleanQuery, Version.CURRENT);
|
||||
assertThat(result.verified, is(false));
|
||||
assertThat(result.minimumShouldMatch, equalTo(5));
|
||||
assertTermsEqual(result.extractions, term1, term2, term3, term4, term5);
|
||||
|
||||
builder = new BooleanQuery.Builder()
|
||||
.add(new BooleanQuery.Builder()
|
||||
.add(termQuery1, Occur.SHOULD)
|
||||
.add(termQuery2, Occur.SHOULD)
|
||||
.build(), Occur.SHOULD)
|
||||
.add(new BooleanQuery.Builder().setMinimumNumberShouldMatch(1).build(), Occur.SHOULD)
|
||||
.setMinimumNumberShouldMatch(2);
|
||||
booleanQuery = builder.build();
|
||||
result = analyze(booleanQuery, Version.CURRENT);
|
||||
// ideally it would return no extractions, but the fact
|
||||
// that it doesn't consider them verified is probably good enough
|
||||
assertFalse(result.verified);
|
||||
}
|
||||
|
||||
public void testExtractQueryMetadata_booleanQuery_msm_pre6dot1() {
|
||||
|
@ -353,7 +418,7 @@ public class QueryAnalyzerTests extends ESTestCase {
|
|||
assertThat(result.minimumShouldMatch, equalTo(1));
|
||||
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.setMinimumNumberShouldMatch(randomIntBetween(2, 32));
|
||||
builder.setMinimumNumberShouldMatch(randomIntBetween(1, 2));
|
||||
builder.add(termQuery1, BooleanClause.Occur.SHOULD);
|
||||
builder.add(termQuery2, BooleanClause.Occur.SHOULD);
|
||||
result = analyze(builder.build(), Version.CURRENT);
|
||||
|
@ -379,6 +444,54 @@ public class QueryAnalyzerTests extends ESTestCase {
|
|||
result = analyze(builder.build(), Version.CURRENT);
|
||||
assertThat("Prohibited clause, so candidate matches are not verified", result.verified, is(false));
|
||||
assertThat(result.minimumShouldMatch, equalTo(1));
|
||||
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER);
|
||||
builder.add(termQuery2, BooleanClause.Occur.MUST_NOT);
|
||||
result = analyze(builder.build(), Version.CURRENT);
|
||||
assertThat("Prohibited clause, so candidate matches are not verified", result.verified, is(false));
|
||||
assertThat(result.minimumShouldMatch, equalTo(1));
|
||||
|
||||
TermQuery termQuery3 = new TermQuery(new Term("_field", "_term3"));
|
||||
builder = new BooleanQuery.Builder()
|
||||
.add(new BooleanQuery.Builder()
|
||||
.add(termQuery1, Occur.FILTER)
|
||||
.add(termQuery2, Occur.FILTER)
|
||||
.build(), Occur.SHOULD)
|
||||
.add(termQuery3, Occur.SHOULD);
|
||||
result = analyze(builder.build(), Version.CURRENT);
|
||||
assertThat("Inner clause that is not a pure disjunction, so candidate matches are not verified", result.verified, is(false));
|
||||
assertThat(result.minimumShouldMatch, equalTo(1));
|
||||
|
||||
builder = new BooleanQuery.Builder()
|
||||
.add(new BooleanQuery.Builder()
|
||||
.add(termQuery1, Occur.SHOULD)
|
||||
.add(termQuery2, Occur.SHOULD)
|
||||
.build(), Occur.SHOULD)
|
||||
.add(termQuery3, Occur.SHOULD);
|
||||
result = analyze(builder.build(), Version.CURRENT);
|
||||
assertThat("Inner clause that is a pure disjunction, so candidate matches are verified", result.verified, is(true));
|
||||
assertThat(result.minimumShouldMatch, equalTo(1));
|
||||
|
||||
builder = new BooleanQuery.Builder()
|
||||
.add(new BooleanQuery.Builder()
|
||||
.add(termQuery1, Occur.SHOULD)
|
||||
.add(termQuery2, Occur.SHOULD)
|
||||
.build(), Occur.MUST)
|
||||
.add(termQuery3, Occur.FILTER);
|
||||
result = analyze(builder.build(), Version.CURRENT);
|
||||
assertThat("Disjunctions of conjunctions can't be verified", result.verified, is(false));
|
||||
assertThat(result.minimumShouldMatch, equalTo(2));
|
||||
|
||||
builder = new BooleanQuery.Builder()
|
||||
.add(new BooleanQuery.Builder()
|
||||
.add(termQuery1, Occur.MUST)
|
||||
.add(termQuery2, Occur.FILTER)
|
||||
.build(), Occur.SHOULD)
|
||||
.add(termQuery3, Occur.SHOULD);
|
||||
result = analyze(builder.build(), Version.CURRENT);
|
||||
assertThat("Conjunctions of disjunctions can't be verified", result.verified, is(false));
|
||||
assertThat(result.minimumShouldMatch, equalTo(1));
|
||||
}
|
||||
|
||||
public void testBooleanQueryWithMustAndShouldClauses() {
|
||||
|
@ -564,16 +677,15 @@ public class QueryAnalyzerTests extends ESTestCase {
|
|||
Result result = analyze(new MatchNoDocsQuery("sometimes there is no reason at all"), Version.CURRENT);
|
||||
assertThat(result.verified, is(true));
|
||||
assertEquals(0, result.extractions.size());
|
||||
assertThat(result.minimumShouldMatch, equalTo(1));
|
||||
assertThat(result.minimumShouldMatch, equalTo(0));
|
||||
|
||||
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
||||
bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST);
|
||||
bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.MUST);
|
||||
result = analyze(bq.build(), Version.CURRENT);
|
||||
assertThat(result.verified, is(true));
|
||||
assertEquals(1, result.extractions.size());
|
||||
assertThat(result.minimumShouldMatch, equalTo(2));
|
||||
assertTermsEqual(result.extractions, new Term("field", "value"));
|
||||
assertEquals(0, result.extractions.size());
|
||||
assertThat(result.minimumShouldMatch, equalTo(0));
|
||||
|
||||
bq = new BooleanQuery.Builder();
|
||||
bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
|
||||
|
@ -785,7 +897,7 @@ public class QueryAnalyzerTests extends ESTestCase {
|
|||
SynonymQuery query = new SynonymQuery();
|
||||
Result result = analyze(query, Version.CURRENT);
|
||||
assertThat(result.verified, is(true));
|
||||
assertThat(result.minimumShouldMatch, equalTo(1));
|
||||
assertThat(result.minimumShouldMatch, equalTo(0));
|
||||
assertThat(result.extractions.isEmpty(), is(true));
|
||||
|
||||
query = new SynonymQuery(new Term("_field", "_value1"), new Term("_field", "_value2"));
|
||||
|
@ -997,7 +1109,7 @@ public class QueryAnalyzerTests extends ESTestCase {
|
|||
Query query = IntPoint.newRangeQuery("_field", 20, 10);
|
||||
Result result = analyze(query, Version.CURRENT);
|
||||
assertTrue(result.verified);
|
||||
assertThat(result.minimumShouldMatch, equalTo(1));
|
||||
assertThat(result.minimumShouldMatch, equalTo(0));
|
||||
assertThat(result.extractions.size(), equalTo(0));
|
||||
}
|
||||
|
||||
|
@ -1179,7 +1291,7 @@ public class QueryAnalyzerTests extends ESTestCase {
|
|||
BooleanClause.Occur.SHOULD
|
||||
);
|
||||
result = analyze(builder.build(), Version.CURRENT);
|
||||
assertThat(result.verified, is(true));
|
||||
assertThat(result.verified, is(false));
|
||||
assertThat(result.matchAllDocs, is(false));
|
||||
assertThat(result.minimumShouldMatch, equalTo(2));
|
||||
assertTermsEqual(result.extractions, new Term("field", "value1"), new Term("field", "value2"),
|
||||
|
|
Loading…
Reference in New Issue