From e9160fc014f3c4558629ec613b53e34d6983574e Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Wed, 29 Nov 2017 15:17:03 +0100 Subject: [PATCH] percolator: also extract match_all queries I've seen several cases where match_all queries were being used inside percolator queries, because these queries were created generated by other systems. Extracting these queries will allow the percolator at query time in a filter context to skip over these queries without parsing or validating that these queries actually match with the document being percolated. --- .../percolator/PercolatorFieldMapper.java | 27 +++++--- .../percolator/QueryAnalyzer.java | 35 +++++++++- .../percolator/CandidateQueryTests.java | 64 ++++++++++++++++++ .../percolator/QueryAnalyzerTests.java | 65 ++++++++++++++++--- 4 files changed, 170 insertions(+), 21 deletions(-) diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java index 1df6935c61a..e44a36cd267 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/PercolatorFieldMapper.java @@ -458,20 +458,27 @@ public class PercolatorFieldMapper extends FieldMapper { doc.add(new Field(pft.extractionResultField.name(), EXTRACTION_FAILED, extractionResultField.fieldType())); return; } - for (QueryAnalyzer.QueryExtraction term : result.extractions) { - if (term.term != null) { + for (QueryAnalyzer.QueryExtraction extraction : result.extractions) { + if (extraction.term != null) { BytesRefBuilder builder = new BytesRefBuilder(); - builder.append(new BytesRef(term.field())); + builder.append(new BytesRef(extraction.field())); builder.append(FIELD_VALUE_SEPARATOR); - builder.append(term.bytes()); + builder.append(extraction.bytes()); doc.add(new Field(queryTermsField.name(), builder.toBytesRef(), queryTermsField.fieldType())); - } else if (term.range != null) { - byte[] min = term.range.lowerPoint; - byte[] max = term.range.upperPoint; - doc.add(new BinaryRange(rangeFieldMapper.name(), encodeRange(term.range.fieldName, min, max))); + } else if (extraction.range != null) { + byte[] min = extraction.range.lowerPoint; + byte[] max = extraction.range.upperPoint; + doc.add(new BinaryRange(rangeFieldMapper.name(), encodeRange(extraction.range.fieldName, min, max))); } } - if (result.verified) { + + Version indexVersionCreated = context.mapperService().getIndexSettings().getIndexVersionCreated(); + if (result.matchAllDocs) { + doc.add(new Field(extractionResultField.name(), EXTRACTION_FAILED, extractionResultField.fieldType())); + if (result.verified) { + doc.add(new Field(extractionResultField.name(), EXTRACTION_COMPLETE, extractionResultField.fieldType())); + } + } else if (result.verified) { doc.add(new Field(extractionResultField.name(), EXTRACTION_COMPLETE, extractionResultField.fieldType())); } else { doc.add(new Field(extractionResultField.name(), EXTRACTION_PARTIAL, extractionResultField.fieldType())); @@ -481,7 +488,7 @@ public class PercolatorFieldMapper extends FieldMapper { for (IndexableField field : fields) { context.doc().add(field); } - if (context.mapperService().getIndexSettings().getIndexVersionCreated().onOrAfter(Version.V_6_1_0)) { + if (indexVersionCreated.onOrAfter(Version.V_6_1_0)) { doc.add(new NumericDocValuesField(minimumShouldMatchFieldMapper.name(), result.minimumShouldMatch)); } } diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java index 284eca1b59a..ae619a1d494 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java @@ -29,6 +29,7 @@ import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.IndexOrDocValuesQuery; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; @@ -70,6 +71,7 @@ final class QueryAnalyzer { static { Map, BiFunction> map = new HashMap<>(); map.put(MatchNoDocsQuery.class, matchNoDocsQuery()); + map.put(MatchAllDocsQuery.class, matchAllDocsQuery()); map.put(ConstantScoreQuery.class, constantScoreQuery()); map.put(BoostQuery.class, boostQuery()); map.put(TermQuery.class, termQuery()); @@ -142,6 +144,10 @@ final class QueryAnalyzer { return (query, version) -> new Result(true, Collections.emptySet(), 1); } + private static BiFunction matchAllDocsQuery() { + return (query, version) -> new Result(true, true); + } + private static BiFunction constantScoreQuery() { return (query, boosts) -> { Query wrappedQuery = ((ConstantScoreQuery) query).getQuery(); @@ -356,6 +362,7 @@ final class QueryAnalyzer { int msm = 0; boolean requiredShouldClauses = minimumShouldMatch > 0 && numOptionalClauses > 0; boolean verified = uqe == null && numProhibitedClauses == 0 && requiredShouldClauses == false; + boolean matchAllDocs = true; Set extractions = new HashSet<>(); Set seenRangeFields = new HashSet<>(); for (Result result : results) { @@ -376,9 +383,14 @@ final class QueryAnalyzer { msm += result.minimumShouldMatch; } verified &= result.verified; + matchAllDocs &= result.matchAllDocs; extractions.addAll(result.extractions); } - return new Result(verified, extractions, msm); + if (matchAllDocs) { + return new Result(matchAllDocs, verified); + } else { + return new Result(verified, extractions, msm); + } } } else { Set bestClause = null; @@ -498,12 +510,15 @@ final class QueryAnalyzer { if (version.before(Version.V_6_1_0)) { verified &= requiredShouldClauses <= 1; } - + int numMatchAllClauses = 0; Set terms = new HashSet<>(); for (int i = 0; i < disjunctions.size(); i++) { Query disjunct = disjunctions.get(i); Result subResult = analyze(disjunct, version); verified &= subResult.verified; + if (subResult.matchAllDocs) { + numMatchAllClauses++; + } terms.addAll(subResult.extractions); QueryExtraction[] t = subResult.extractions.toArray(new QueryExtraction[1]); @@ -512,6 +527,7 @@ final class QueryAnalyzer { rangeFieldNames[i] = t[0].range.fieldName; } } + boolean matchAllDocs = numMatchAllClauses > 0 && numMatchAllClauses >= requiredShouldClauses; int msm = 0; if (version.onOrAfter(Version.V_6_1_0)) { @@ -532,7 +548,11 @@ final class QueryAnalyzer { } else { msm = 1; } - return new Result(verified, terms, msm); + if (matchAllDocs) { + return new Result(matchAllDocs, verified); + } else { + return new Result(verified, terms, msm); + } } static Set selectBestExtraction(Set extractions1, Set extractions2) { @@ -619,11 +639,20 @@ final class QueryAnalyzer { final Set extractions; final boolean verified; final int minimumShouldMatch; + final boolean matchAllDocs; Result(boolean verified, Set extractions, int minimumShouldMatch) { this.extractions = extractions; this.verified = verified; this.minimumShouldMatch = minimumShouldMatch; + this.matchAllDocs = false; + } + + Result(boolean matchAllDocs, boolean verified) { + this.extractions = Collections.emptySet(); + this.verified = verified; + this.minimumShouldMatch = 0; + this.matchAllDocs = matchAllDocs; } } diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java index 4ecd82fd876..fea5bd893a9 100644 --- a/modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java @@ -56,6 +56,8 @@ import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; @@ -193,6 +195,8 @@ public class CandidateQueryTests extends ESSingleNodeTestCase { builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); if (randomBoolean()) { builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT); + } else if (randomBoolean()) { + builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT); } return builder.build(); }); @@ -202,6 +206,20 @@ public class CandidateQueryTests extends ESSingleNodeTestCase { builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); if (randomBoolean()) { builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT); + } else if (randomBoolean()) { + builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT); + } + return builder.build(); + }); + queryFunctions.add((id) -> { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); + builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD); + if (randomBoolean()) { + builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); + } + if (randomBoolean()) { + builder.setMinimumNumberShouldMatch(2); } return builder.build(); }); @@ -467,6 +485,52 @@ public class CandidateQueryTests extends ESSingleNodeTestCase { duelRun(queryStore, memoryIndex, shardSearcher); } + public void testPercolateMatchAll() throws Exception { + List docs = new ArrayList<>(); + addQuery(new MatchAllDocsQuery(), docs); + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.add(new TermQuery(new Term("field", "value1")), BooleanClause.Occur.MUST); + builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); + addQuery(builder.build(), docs); + builder = new BooleanQuery.Builder(); + builder.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST); + builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); + builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); + addQuery(builder.build(), docs); + builder = new BooleanQuery.Builder(); + builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); + builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT); + addQuery(builder.build(), docs); + builder = new BooleanQuery.Builder(); + builder.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.SHOULD); + builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); + addQuery(builder.build(), docs); + indexWriter.addDocuments(docs); + indexWriter.close(); + directoryReader = DirectoryReader.open(directory); + IndexSearcher shardSearcher = newSearcher(directoryReader); + shardSearcher.setQueryCache(null); + + MemoryIndex memoryIndex = new MemoryIndex(); + memoryIndex.addField("field", "value1", new WhitespaceAnalyzer()); + IndexSearcher percolateSearcher = memoryIndex.createSearcher(); + PercolateQuery query = (PercolateQuery) fieldType.percolateQuery("_name", queryStore, + Collections.singletonList(new BytesArray("{}")), percolateSearcher, Version.CURRENT); + TopDocs topDocs = shardSearcher.search(query, 10, new Sort(SortField.FIELD_DOC), true, true); + assertEquals(3L, topDocs.totalHits); + assertEquals(3, topDocs.scoreDocs.length); + assertEquals(0, topDocs.scoreDocs[0].doc); + assertEquals(1, topDocs.scoreDocs[1].doc); + assertEquals(4, topDocs.scoreDocs[2].doc); + + topDocs = shardSearcher.search(new ConstantScoreQuery(query), 10); + assertEquals(3L, topDocs.totalHits); + assertEquals(3, topDocs.scoreDocs.length); + assertEquals(0, topDocs.scoreDocs[0].doc); + assertEquals(1, topDocs.scoreDocs[1].doc); + assertEquals(4, topDocs.scoreDocs[2].doc); + } + public void testPercolateSmallAndLargeDocument() throws Exception { List docs = new ArrayList<>(); BooleanQuery.Builder builder = new BooleanQuery.Builder(); diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java index f2f5a4e5861..34637465af5 100644 --- a/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java @@ -594,13 +594,18 @@ public class QueryAnalyzerTests extends ESTestCase { } public void testExtractQueryMetadata_matchAllDocsQuery() { - expectThrows(UnsupportedQueryException.class, () -> analyze(new MatchAllDocsQuery(), Version.CURRENT)); + Result result = analyze(new MatchAllDocsQuery(), Version.CURRENT); + assertThat(result.verified, is(true)); + assertThat(result.matchAllDocs, is(true)); + assertThat(result.minimumShouldMatch, equalTo(0)); + assertThat(result.extractions.size(), equalTo(0)); BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); - Result result = analyze(builder.build(), Version.CURRENT); - assertThat(result.verified, is(false)); + result = analyze(builder.build(), Version.CURRENT); + assertThat(result.verified, is(true)); + assertThat(result.matchAllDocs, is(false)); assertThat(result.minimumShouldMatch, equalTo(1)); assertTermsEqual(result.extractions, new Term("field", "value")); @@ -609,34 +614,78 @@ public class QueryAnalyzerTests extends ESTestCase { builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); BooleanQuery bq1 = builder.build(); - expectThrows(UnsupportedQueryException.class, () -> analyze(bq1, Version.CURRENT)); + result = analyze(bq1, Version.CURRENT); + assertThat(result.verified, is(true)); + assertThat(result.matchAllDocs, is(true)); + assertThat(result.minimumShouldMatch, equalTo(0)); + assertThat(result.extractions.size(), equalTo(0)); builder = new BooleanQuery.Builder(); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); BooleanQuery bq2 = builder.build(); - expectThrows(UnsupportedQueryException.class, () -> analyze(bq2, Version.CURRENT)); + result = analyze(bq2, Version.CURRENT); + assertThat(result.verified, is(false)); + assertThat(result.matchAllDocs, is(true)); + assertThat(result.minimumShouldMatch, equalTo(0)); + assertThat(result.extractions.size(), equalTo(0)); builder = new BooleanQuery.Builder(); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); BooleanQuery bq3 = builder.build(); - expectThrows(UnsupportedQueryException.class, () -> analyze(bq3, Version.CURRENT)); + result = analyze(bq3, Version.CURRENT); + assertThat(result.verified, is(true)); + assertThat(result.matchAllDocs, is(true)); + assertThat(result.minimumShouldMatch, equalTo(0)); + assertThat(result.extractions.size(), equalTo(0)); builder = new BooleanQuery.Builder(); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); BooleanQuery bq4 = builder.build(); - expectThrows(UnsupportedQueryException.class, () -> analyze(bq4, Version.CURRENT)); + result = analyze(bq4, Version.CURRENT); + assertThat(result.verified, is(false)); + assertThat(result.matchAllDocs, is(true)); + assertThat(result.minimumShouldMatch, equalTo(0)); + assertThat(result.extractions.size(), equalTo(0)); builder = new BooleanQuery.Builder(); builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); BooleanQuery bq5 = builder.build(); - expectThrows(UnsupportedQueryException.class, () -> analyze(bq5, Version.CURRENT)); + result = analyze(bq5, Version.CURRENT); + assertThat(result.verified, is(true)); + assertThat(result.matchAllDocs, is(true)); + assertThat(result.minimumShouldMatch, equalTo(0)); + assertThat(result.extractions.size(), equalTo(0)); + + builder = new BooleanQuery.Builder(); + builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); + builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD); + builder.setMinimumNumberShouldMatch(2); + BooleanQuery bq6 = builder.build(); + result = analyze(bq6, Version.CURRENT); + assertThat(result.verified, is(true)); + assertThat(result.matchAllDocs, is(false)); + assertThat(result.minimumShouldMatch, equalTo(1)); + assertThat(result.extractions.size(), equalTo(1)); + assertTermsEqual(result.extractions, new Term("field", "value")); + + builder = new BooleanQuery.Builder(); + builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); + builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); + builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD); + builder.setMinimumNumberShouldMatch(2); + BooleanQuery bq7 = builder.build(); + result = analyze(bq7, Version.CURRENT); + assertThat(result.verified, is(true)); + assertThat(result.matchAllDocs, is(true)); + assertThat(result.minimumShouldMatch, equalTo(0)); + assertThat(result.extractions.size(), equalTo(0)); } public void testExtractQueryMetadata_unsupportedQuery() {