From 8cdd950056b722e7086f48fb84cf6c29ccf31bd3 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 3 Apr 2018 16:44:26 +0200 Subject: [PATCH] Fix some query extraction bugs. (#29283) While playing with the percolator I found two bugs: - Sometimes we set a min_should_match that is greater than the number of extractions. While this doesn't cause direct trouble, it does when the query is nested into a boolean query and the boolean query tries to compute the min_should_match for the entire query based on its own min_should_match and those of the sub queries. So I changed the code to throw an exception when min_should_match is greater than the number of extractions. - Boolean queries claim matches are verified when in fact they shouldn't. This is due to the fact that boolean queries assume that they are verified if all sub clauses are verified but things are more complex than that, eg. conjunctions that are nested in a disjunction or disjunctions that are nested in a conjunction can generally not be verified without running the query. --- .../percolator/QueryAnalyzer.java | 48 ++++-- .../percolator/CandidateQueryTests.java | 9 +- .../percolator/QueryAnalyzerTests.java | 146 ++++++++++++++++-- 3 files changed, 170 insertions(+), 33 deletions(-) diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java index 24b210c29d5..8f1bb2a9310 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java @@ -143,7 +143,7 @@ final class QueryAnalyzer { } private static BiFunction matchNoDocsQuery() { - return (query, version) -> new Result(true, Collections.emptySet(), 1); + return (query, version) -> new Result(true, Collections.emptySet(), 0); } private static BiFunction matchAllDocsQuery() { @@ -179,28 +179,28 @@ final class QueryAnalyzer { for (BytesRef term = iterator.next(); term != null; term = iterator.next()) { terms.add(new QueryExtraction(new Term(iterator.field(), term))); } - return new Result(true, terms, 1); + return new Result(true, terms, Math.min(1, terms.size())); }; } private static BiFunction synonymQuery() { return (query, version) -> { Set terms = ((SynonymQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet()); - return new Result(true, terms, 1); + return new Result(true, terms, Math.min(1, terms.size())); }; } private static BiFunction commonTermsQuery() { return (query, version) -> { Set terms = ((CommonTermsQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet()); - return new Result(false, terms, 1); + return new Result(false, terms, Math.min(1, terms.size())); }; } private static BiFunction blendedTermQuery() { return (query, version) -> { Set terms = ((BlendedTermQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet()); - return new Result(true, terms, 1); + return new Result(true, terms, Math.min(1, terms.size())); }; } @@ -208,7 +208,7 @@ final class QueryAnalyzer { return (query, version) -> { Term[] terms = ((PhraseQuery) query).getTerms(); if (terms.length == 0) { - return new Result(true, Collections.emptySet(), 1); + return new Result(true, Collections.emptySet(), 0); } if (version.onOrAfter(Version.V_6_1_0)) { @@ -232,7 +232,7 @@ final class QueryAnalyzer { return (query, version) -> { Term[][] terms = ((MultiPhraseQuery) query).getTermArrays(); if (terms.length == 0) { - return new Result(true, Collections.emptySet(), 1); + return new Result(true, Collections.emptySet(), 0); } if (version.onOrAfter(Version.V_6_1_0)) { @@ -297,7 +297,7 @@ final class QueryAnalyzer { for (SpanQuery clause : spanOrQuery.getClauses()) { terms.addAll(analyze(clause, version).extractions); } - return new Result(false, terms, 1); + return new Result(false, terms, Math.min(1, terms.size())); }; } @@ -334,6 +334,9 @@ final class QueryAnalyzer { numOptionalClauses++; } } + if (minimumShouldMatch > numOptionalClauses) { + return new Result(false, Collections.emptySet(), 0); + } if (numRequiredClauses > 0) { if (version.onOrAfter(Version.V_6_1_0)) { UnsupportedQueryException uqe = null; @@ -345,7 +348,12 @@ final class QueryAnalyzer { // since they are completely optional. try { - results.add(analyze(clause.getQuery(), version)); + Result subResult = analyze(clause.getQuery(), version); + if (subResult.matchAllDocs == false && subResult.extractions.isEmpty()) { + // doesn't match anything + return subResult; + } + results.add(subResult); } catch (UnsupportedQueryException e) { uqe = e; } @@ -400,7 +408,11 @@ final class QueryAnalyzer { } msm += resultMsm; - verified &= result.verified; + if (result.verified == false + // If some inner extractions are optional, the result can't be verified + || result.minimumShouldMatch < result.extractions.size()) { + verified = false; + } matchAllDocs &= result.matchAllDocs; extractions.addAll(result.extractions); } @@ -492,7 +504,7 @@ final class QueryAnalyzer { // Need to check whether upper is not smaller than lower, otherwise NumericUtils.subtract(...) fails IAE // If upper is really smaller than lower then we deal with like MatchNoDocsQuery. (verified and no extractions) if (new BytesRef(lowerPoint).compareTo(new BytesRef(upperPoint)) > 0) { - return new Result(true, Collections.emptySet(), 1); + return new Result(true, Collections.emptySet(), 0); } byte[] interval = new byte[16]; @@ -537,7 +549,15 @@ final class QueryAnalyzer { for (int i = 0; i < disjunctions.size(); i++) { Query disjunct = disjunctions.get(i); Result subResult = analyze(disjunct, version); - verified &= subResult.verified; + if (subResult.verified == false + // one of the sub queries requires more than one term to match, we can't + // verify it with a single top-level min_should_match + || subResult.minimumShouldMatch > 1 + // One of the inner clauses has multiple extractions, we won't be able to + // verify it with a single top-level min_should_match + || (subResult.extractions.size() > 1 && requiredShouldClauses > 1)) { + verified = false; + } if (subResult.matchAllDocs) { numMatchAllClauses++; } @@ -683,6 +703,10 @@ final class QueryAnalyzer { final boolean matchAllDocs; Result(boolean verified, Set extractions, int minimumShouldMatch) { + if (minimumShouldMatch > extractions.size()) { + throw new IllegalArgumentException("minimumShouldMatch can't be greater than the number of extractions: " + + minimumShouldMatch + " > " + extractions.size()); + } this.extractions = extractions; this.verified = verified; this.minimumShouldMatch = minimumShouldMatch; diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java index 59f4e091140..27d72b29267 100644 --- a/modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java @@ -210,12 +210,13 @@ public class CandidateQueryTests extends ESSingleNodeTestCase { new BytesRef(randomFrom(stringContent.get(field1))))); queryFunctions.add(() -> new TermInSetQuery(field2, new BytesRef(randomFrom(stringContent.get(field1))), new BytesRef(randomFrom(stringContent.get(field1))))); - int numRandomBoolQueries = randomIntBetween(16, 32); + // many iterations with boolean queries, which are the most complex queries to deal with when nested + int numRandomBoolQueries = 1000; for (int i = 0; i < numRandomBoolQueries; i++) { queryFunctions.add(() -> createRandomBooleanQuery(1, stringFields, stringContent, intFieldType, intValues)); } queryFunctions.add(() -> { - int numClauses = randomIntBetween(1, 16); + int numClauses = randomIntBetween(1, 1 << randomIntBetween(2, 4)); List clauses = new ArrayList<>(); for (int i = 0; i < numClauses; i++) { String field = randomFrom(stringFields); @@ -266,7 +267,7 @@ public class CandidateQueryTests extends ESSingleNodeTestCase { private BooleanQuery createRandomBooleanQuery(int depth, List fields, Map> content, MappedFieldType intFieldType, List intValues) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); - int numClauses = randomIntBetween(1, 16); + int numClauses = randomIntBetween(1, 1 << randomIntBetween(2, 4)); // use low numbers of clauses more often int numShouldClauses = 0; boolean onlyShouldClauses = rarely(); for (int i = 0; i < numClauses; i++) { @@ -313,7 +314,7 @@ public class CandidateQueryTests extends ESSingleNodeTestCase { numShouldClauses++; } } - builder.setMinimumNumberShouldMatch(numShouldClauses); + builder.setMinimumNumberShouldMatch(randomIntBetween(0, numShouldClauses)); return builder.build(); } diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java index 5968f8c3f83..7bcdcd2e1f6 100644 --- a/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java @@ -44,6 +44,7 @@ import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.join.QueryBitSetProducer; import org.apache.lucene.search.join.ScoreMode; import org.apache.lucene.search.spans.SpanFirstQuery; @@ -227,23 +228,87 @@ public class QueryAnalyzerTests extends ESTestCase { public void testExtractQueryMetadata_booleanQuery_msm() { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.setMinimumNumberShouldMatch(2); - TermQuery termQuery1 = new TermQuery(new Term("_field", "_term1")); + Term term1 = new Term("_field", "_term1"); + TermQuery termQuery1 = new TermQuery(term1); builder.add(termQuery1, BooleanClause.Occur.SHOULD); - TermQuery termQuery2 = new TermQuery(new Term("_field", "_term2")); + Term term2 = new Term("_field", "_term2"); + TermQuery termQuery2 = new TermQuery(term2); builder.add(termQuery2, BooleanClause.Occur.SHOULD); - TermQuery termQuery3 = new TermQuery(new Term("_field", "_term3")); + Term term3 = new Term("_field", "_term3"); + TermQuery termQuery3 = new TermQuery(term3); builder.add(termQuery3, BooleanClause.Occur.SHOULD); BooleanQuery booleanQuery = builder.build(); Result result = analyze(booleanQuery, Version.CURRENT); assertThat(result.verified, is(true)); assertThat(result.minimumShouldMatch, equalTo(2)); - List extractions = new ArrayList<>(result.extractions); - extractions.sort(Comparator.comparing(extraction -> extraction.term)); - assertThat(extractions.size(), equalTo(3)); - assertThat(extractions.get(0).term, equalTo(new Term("_field", "_term1"))); - assertThat(extractions.get(1).term, equalTo(new Term("_field", "_term2"))); - assertThat(extractions.get(2).term, equalTo(new Term("_field", "_term3"))); + assertTermsEqual(result.extractions, term1, term2, term3); + + builder = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(termQuery1, Occur.SHOULD) + .add(termQuery2, Occur.SHOULD) + .build(), Occur.SHOULD) + .add(termQuery3, Occur.SHOULD) + .setMinimumNumberShouldMatch(2); + booleanQuery = builder.build(); + result = analyze(booleanQuery, Version.CURRENT); + assertThat(result.verified, is(false)); + assertThat(result.minimumShouldMatch, equalTo(2)); + assertTermsEqual(result.extractions, term1, term2, term3); + + Term term4 = new Term("_field", "_term4"); + TermQuery termQuery4 = new TermQuery(term4); + builder = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(termQuery1, Occur.MUST) + .add(termQuery2, Occur.FILTER) + .build(), Occur.SHOULD) + .add(new BooleanQuery.Builder() + .add(termQuery3, Occur.MUST) + .add(termQuery4, Occur.FILTER) + .build(), Occur.SHOULD); + booleanQuery = builder.build(); + result = analyze(booleanQuery, Version.CURRENT); + assertThat(result.verified, is(false)); + assertThat(result.minimumShouldMatch, equalTo(2)); + assertTermsEqual(result.extractions, term1, term2, term3, term4); + + Term term5 = new Term("_field", "_term5"); + TermQuery termQuery5 = new TermQuery(term5); + builder.add(termQuery5, Occur.SHOULD); + booleanQuery = builder.build(); + result = analyze(booleanQuery, Version.CURRENT); + assertThat(result.verified, is(false)); + assertThat(result.minimumShouldMatch, equalTo(1)); + assertTermsEqual(result.extractions, term1, term2, term3, term4, term5); + + builder.setMinimumNumberShouldMatch(2); + booleanQuery = builder.build(); + result = analyze(booleanQuery, Version.CURRENT); + assertThat(result.verified, is(false)); + assertThat(result.minimumShouldMatch, equalTo(3)); + assertTermsEqual(result.extractions, term1, term2, term3, term4, term5); + + builder.setMinimumNumberShouldMatch(3); + booleanQuery = builder.build(); + result = analyze(booleanQuery, Version.CURRENT); + assertThat(result.verified, is(false)); + assertThat(result.minimumShouldMatch, equalTo(5)); + assertTermsEqual(result.extractions, term1, term2, term3, term4, term5); + + builder = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(termQuery1, Occur.SHOULD) + .add(termQuery2, Occur.SHOULD) + .build(), Occur.SHOULD) + .add(new BooleanQuery.Builder().setMinimumNumberShouldMatch(1).build(), Occur.SHOULD) + .setMinimumNumberShouldMatch(2); + booleanQuery = builder.build(); + result = analyze(booleanQuery, Version.CURRENT); + // ideally it would return no extractions, but the fact + // that it doesn't consider them verified is probably good enough + assertFalse(result.verified); } public void testExtractQueryMetadata_booleanQuery_msm_pre6dot1() { @@ -353,7 +418,7 @@ public class QueryAnalyzerTests extends ESTestCase { assertThat(result.minimumShouldMatch, equalTo(1)); builder = new BooleanQuery.Builder(); - builder.setMinimumNumberShouldMatch(randomIntBetween(2, 32)); + builder.setMinimumNumberShouldMatch(randomIntBetween(1, 2)); builder.add(termQuery1, BooleanClause.Occur.SHOULD); builder.add(termQuery2, BooleanClause.Occur.SHOULD); result = analyze(builder.build(), Version.CURRENT); @@ -379,6 +444,54 @@ public class QueryAnalyzerTests extends ESTestCase { result = analyze(builder.build(), Version.CURRENT); assertThat("Prohibited clause, so candidate matches are not verified", result.verified, is(false)); assertThat(result.minimumShouldMatch, equalTo(1)); + + builder = new BooleanQuery.Builder(); + builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER); + builder.add(termQuery2, BooleanClause.Occur.MUST_NOT); + result = analyze(builder.build(), Version.CURRENT); + assertThat("Prohibited clause, so candidate matches are not verified", result.verified, is(false)); + assertThat(result.minimumShouldMatch, equalTo(1)); + + TermQuery termQuery3 = new TermQuery(new Term("_field", "_term3")); + builder = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(termQuery1, Occur.FILTER) + .add(termQuery2, Occur.FILTER) + .build(), Occur.SHOULD) + .add(termQuery3, Occur.SHOULD); + result = analyze(builder.build(), Version.CURRENT); + assertThat("Inner clause that is not a pure disjunction, so candidate matches are not verified", result.verified, is(false)); + assertThat(result.minimumShouldMatch, equalTo(1)); + + builder = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(termQuery1, Occur.SHOULD) + .add(termQuery2, Occur.SHOULD) + .build(), Occur.SHOULD) + .add(termQuery3, Occur.SHOULD); + result = analyze(builder.build(), Version.CURRENT); + assertThat("Inner clause that is a pure disjunction, so candidate matches are verified", result.verified, is(true)); + assertThat(result.minimumShouldMatch, equalTo(1)); + + builder = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(termQuery1, Occur.SHOULD) + .add(termQuery2, Occur.SHOULD) + .build(), Occur.MUST) + .add(termQuery3, Occur.FILTER); + result = analyze(builder.build(), Version.CURRENT); + assertThat("Disjunctions of conjunctions can't be verified", result.verified, is(false)); + assertThat(result.minimumShouldMatch, equalTo(2)); + + builder = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(termQuery1, Occur.MUST) + .add(termQuery2, Occur.FILTER) + .build(), Occur.SHOULD) + .add(termQuery3, Occur.SHOULD); + result = analyze(builder.build(), Version.CURRENT); + assertThat("Conjunctions of disjunctions can't be verified", result.verified, is(false)); + assertThat(result.minimumShouldMatch, equalTo(1)); } public void testBooleanQueryWithMustAndShouldClauses() { @@ -564,16 +677,15 @@ public class QueryAnalyzerTests extends ESTestCase { Result result = analyze(new MatchNoDocsQuery("sometimes there is no reason at all"), Version.CURRENT); assertThat(result.verified, is(true)); assertEquals(0, result.extractions.size()); - assertThat(result.minimumShouldMatch, equalTo(1)); + assertThat(result.minimumShouldMatch, equalTo(0)); BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST); bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.MUST); result = analyze(bq.build(), Version.CURRENT); assertThat(result.verified, is(true)); - assertEquals(1, result.extractions.size()); - assertThat(result.minimumShouldMatch, equalTo(2)); - assertTermsEqual(result.extractions, new Term("field", "value")); + assertEquals(0, result.extractions.size()); + assertThat(result.minimumShouldMatch, equalTo(0)); bq = new BooleanQuery.Builder(); bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD); @@ -785,7 +897,7 @@ public class QueryAnalyzerTests extends ESTestCase { SynonymQuery query = new SynonymQuery(); Result result = analyze(query, Version.CURRENT); assertThat(result.verified, is(true)); - assertThat(result.minimumShouldMatch, equalTo(1)); + assertThat(result.minimumShouldMatch, equalTo(0)); assertThat(result.extractions.isEmpty(), is(true)); query = new SynonymQuery(new Term("_field", "_value1"), new Term("_field", "_value2")); @@ -997,7 +1109,7 @@ public class QueryAnalyzerTests extends ESTestCase { Query query = IntPoint.newRangeQuery("_field", 20, 10); Result result = analyze(query, Version.CURRENT); assertTrue(result.verified); - assertThat(result.minimumShouldMatch, equalTo(1)); + assertThat(result.minimumShouldMatch, equalTo(0)); assertThat(result.extractions.size(), equalTo(0)); } @@ -1179,7 +1291,7 @@ public class QueryAnalyzerTests extends ESTestCase { BooleanClause.Occur.SHOULD ); result = analyze(builder.build(), Version.CURRENT); - assertThat(result.verified, is(true)); + assertThat(result.verified, is(false)); assertThat(result.matchAllDocs, is(false)); assertThat(result.minimumShouldMatch, equalTo(2)); assertTermsEqual(result.extractions, new Term("field", "value1"), new Term("field", "value2"),