percolator: also extract match_all queries

I've seen several cases where match_all queries were being used inside percolator queries,
because these queries were created generated by other systems.

Extracting these queries will allow the percolator at query time in a filter context
to skip over these queries without parsing or validating that these queries actually
match with the document being percolated.
This commit is contained in:
Martijn van Groningen 2017-11-29 15:17:03 +01:00
parent af8bd8bbcf
commit e9160fc014
No known key found for this signature in database
GPG Key ID: AB236F4FCF2AF12A
4 changed files with 170 additions and 21 deletions

View File

@ -458,21 +458,28 @@ public class PercolatorFieldMapper extends FieldMapper {
doc.add(new Field(pft.extractionResultField.name(), EXTRACTION_FAILED, extractionResultField.fieldType())); doc.add(new Field(pft.extractionResultField.name(), EXTRACTION_FAILED, extractionResultField.fieldType()));
return; return;
} }
for (QueryAnalyzer.QueryExtraction term : result.extractions) { for (QueryAnalyzer.QueryExtraction extraction : result.extractions) {
if (term.term != null) { if (extraction.term != null) {
BytesRefBuilder builder = new BytesRefBuilder(); BytesRefBuilder builder = new BytesRefBuilder();
builder.append(new BytesRef(term.field())); builder.append(new BytesRef(extraction.field()));
builder.append(FIELD_VALUE_SEPARATOR); builder.append(FIELD_VALUE_SEPARATOR);
builder.append(term.bytes()); builder.append(extraction.bytes());
doc.add(new Field(queryTermsField.name(), builder.toBytesRef(), queryTermsField.fieldType())); doc.add(new Field(queryTermsField.name(), builder.toBytesRef(), queryTermsField.fieldType()));
} else if (term.range != null) { } else if (extraction.range != null) {
byte[] min = term.range.lowerPoint; byte[] min = extraction.range.lowerPoint;
byte[] max = term.range.upperPoint; byte[] max = extraction.range.upperPoint;
doc.add(new BinaryRange(rangeFieldMapper.name(), encodeRange(term.range.fieldName, min, max))); doc.add(new BinaryRange(rangeFieldMapper.name(), encodeRange(extraction.range.fieldName, min, max)));
} }
} }
Version indexVersionCreated = context.mapperService().getIndexSettings().getIndexVersionCreated();
if (result.matchAllDocs) {
doc.add(new Field(extractionResultField.name(), EXTRACTION_FAILED, extractionResultField.fieldType()));
if (result.verified) { if (result.verified) {
doc.add(new Field(extractionResultField.name(), EXTRACTION_COMPLETE, extractionResultField.fieldType())); doc.add(new Field(extractionResultField.name(), EXTRACTION_COMPLETE, extractionResultField.fieldType()));
}
} else if (result.verified) {
doc.add(new Field(extractionResultField.name(), EXTRACTION_COMPLETE, extractionResultField.fieldType()));
} else { } else {
doc.add(new Field(extractionResultField.name(), EXTRACTION_PARTIAL, extractionResultField.fieldType())); doc.add(new Field(extractionResultField.name(), EXTRACTION_PARTIAL, extractionResultField.fieldType()));
} }
@ -481,7 +488,7 @@ public class PercolatorFieldMapper extends FieldMapper {
for (IndexableField field : fields) { for (IndexableField field : fields) {
context.doc().add(field); context.doc().add(field);
} }
if (context.mapperService().getIndexSettings().getIndexVersionCreated().onOrAfter(Version.V_6_1_0)) { if (indexVersionCreated.onOrAfter(Version.V_6_1_0)) {
doc.add(new NumericDocValuesField(minimumShouldMatchFieldMapper.name(), result.minimumShouldMatch)); doc.add(new NumericDocValuesField(minimumShouldMatchFieldMapper.name(), result.minimumShouldMatch));
} }
} }

View File

@ -29,6 +29,7 @@ import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PhraseQuery;
@ -70,6 +71,7 @@ final class QueryAnalyzer {
static { static {
Map<Class<? extends Query>, BiFunction<Query, Version, Result>> map = new HashMap<>(); Map<Class<? extends Query>, BiFunction<Query, Version, Result>> map = new HashMap<>();
map.put(MatchNoDocsQuery.class, matchNoDocsQuery()); map.put(MatchNoDocsQuery.class, matchNoDocsQuery());
map.put(MatchAllDocsQuery.class, matchAllDocsQuery());
map.put(ConstantScoreQuery.class, constantScoreQuery()); map.put(ConstantScoreQuery.class, constantScoreQuery());
map.put(BoostQuery.class, boostQuery()); map.put(BoostQuery.class, boostQuery());
map.put(TermQuery.class, termQuery()); map.put(TermQuery.class, termQuery());
@ -142,6 +144,10 @@ final class QueryAnalyzer {
return (query, version) -> new Result(true, Collections.emptySet(), 1); return (query, version) -> new Result(true, Collections.emptySet(), 1);
} }
private static BiFunction<Query, Version, Result> matchAllDocsQuery() {
return (query, version) -> new Result(true, true);
}
private static BiFunction<Query, Version, Result> constantScoreQuery() { private static BiFunction<Query, Version, Result> constantScoreQuery() {
return (query, boosts) -> { return (query, boosts) -> {
Query wrappedQuery = ((ConstantScoreQuery) query).getQuery(); Query wrappedQuery = ((ConstantScoreQuery) query).getQuery();
@ -356,6 +362,7 @@ final class QueryAnalyzer {
int msm = 0; int msm = 0;
boolean requiredShouldClauses = minimumShouldMatch > 0 && numOptionalClauses > 0; boolean requiredShouldClauses = minimumShouldMatch > 0 && numOptionalClauses > 0;
boolean verified = uqe == null && numProhibitedClauses == 0 && requiredShouldClauses == false; boolean verified = uqe == null && numProhibitedClauses == 0 && requiredShouldClauses == false;
boolean matchAllDocs = true;
Set<QueryExtraction> extractions = new HashSet<>(); Set<QueryExtraction> extractions = new HashSet<>();
Set<String> seenRangeFields = new HashSet<>(); Set<String> seenRangeFields = new HashSet<>();
for (Result result : results) { for (Result result : results) {
@ -376,10 +383,15 @@ final class QueryAnalyzer {
msm += result.minimumShouldMatch; msm += result.minimumShouldMatch;
} }
verified &= result.verified; verified &= result.verified;
matchAllDocs &= result.matchAllDocs;
extractions.addAll(result.extractions); extractions.addAll(result.extractions);
} }
if (matchAllDocs) {
return new Result(matchAllDocs, verified);
} else {
return new Result(verified, extractions, msm); return new Result(verified, extractions, msm);
} }
}
} else { } else {
Set<QueryExtraction> bestClause = null; Set<QueryExtraction> bestClause = null;
UnsupportedQueryException uqe = null; UnsupportedQueryException uqe = null;
@ -498,12 +510,15 @@ final class QueryAnalyzer {
if (version.before(Version.V_6_1_0)) { if (version.before(Version.V_6_1_0)) {
verified &= requiredShouldClauses <= 1; verified &= requiredShouldClauses <= 1;
} }
int numMatchAllClauses = 0;
Set<QueryExtraction> terms = new HashSet<>(); Set<QueryExtraction> terms = new HashSet<>();
for (int i = 0; i < disjunctions.size(); i++) { for (int i = 0; i < disjunctions.size(); i++) {
Query disjunct = disjunctions.get(i); Query disjunct = disjunctions.get(i);
Result subResult = analyze(disjunct, version); Result subResult = analyze(disjunct, version);
verified &= subResult.verified; verified &= subResult.verified;
if (subResult.matchAllDocs) {
numMatchAllClauses++;
}
terms.addAll(subResult.extractions); terms.addAll(subResult.extractions);
QueryExtraction[] t = subResult.extractions.toArray(new QueryExtraction[1]); QueryExtraction[] t = subResult.extractions.toArray(new QueryExtraction[1]);
@ -512,6 +527,7 @@ final class QueryAnalyzer {
rangeFieldNames[i] = t[0].range.fieldName; rangeFieldNames[i] = t[0].range.fieldName;
} }
} }
boolean matchAllDocs = numMatchAllClauses > 0 && numMatchAllClauses >= requiredShouldClauses;
int msm = 0; int msm = 0;
if (version.onOrAfter(Version.V_6_1_0)) { if (version.onOrAfter(Version.V_6_1_0)) {
@ -532,8 +548,12 @@ final class QueryAnalyzer {
} else { } else {
msm = 1; msm = 1;
} }
if (matchAllDocs) {
return new Result(matchAllDocs, verified);
} else {
return new Result(verified, terms, msm); return new Result(verified, terms, msm);
} }
}
static Set<QueryExtraction> selectBestExtraction(Set<QueryExtraction> extractions1, Set<QueryExtraction> extractions2) { static Set<QueryExtraction> selectBestExtraction(Set<QueryExtraction> extractions1, Set<QueryExtraction> extractions2) {
assert extractions1 != null || extractions2 != null; assert extractions1 != null || extractions2 != null;
@ -619,11 +639,20 @@ final class QueryAnalyzer {
final Set<QueryExtraction> extractions; final Set<QueryExtraction> extractions;
final boolean verified; final boolean verified;
final int minimumShouldMatch; final int minimumShouldMatch;
final boolean matchAllDocs;
Result(boolean verified, Set<QueryExtraction> extractions, int minimumShouldMatch) { Result(boolean verified, Set<QueryExtraction> extractions, int minimumShouldMatch) {
this.extractions = extractions; this.extractions = extractions;
this.verified = verified; this.verified = verified;
this.minimumShouldMatch = minimumShouldMatch; this.minimumShouldMatch = minimumShouldMatch;
this.matchAllDocs = false;
}
Result(boolean matchAllDocs, boolean verified) {
this.extractions = Collections.emptySet();
this.verified = verified;
this.minimumShouldMatch = 0;
this.matchAllDocs = matchAllDocs;
} }
} }

View File

@ -56,6 +56,8 @@ import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
@ -193,6 +195,8 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
if (randomBoolean()) { if (randomBoolean()) {
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT); builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
} else if (randomBoolean()) {
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
} }
return builder.build(); return builder.build();
}); });
@ -202,6 +206,20 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
if (randomBoolean()) { if (randomBoolean()) {
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT); builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
} else if (randomBoolean()) {
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
}
return builder.build();
});
queryFunctions.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
if (randomBoolean()) {
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
}
if (randomBoolean()) {
builder.setMinimumNumberShouldMatch(2);
} }
return builder.build(); return builder.build();
}); });
@ -467,6 +485,52 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
duelRun(queryStore, memoryIndex, shardSearcher); duelRun(queryStore, memoryIndex, shardSearcher);
} }
public void testPercolateMatchAll() throws Exception {
List<ParseContext.Document> docs = new ArrayList<>();
addQuery(new MatchAllDocsQuery(), docs);
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", "value1")), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
addQuery(builder.build(), docs);
builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
addQuery(builder.build(), docs);
builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
addQuery(builder.build(), docs);
builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
addQuery(builder.build(), docs);
indexWriter.addDocuments(docs);
indexWriter.close();
directoryReader = DirectoryReader.open(directory);
IndexSearcher shardSearcher = newSearcher(directoryReader);
shardSearcher.setQueryCache(null);
MemoryIndex memoryIndex = new MemoryIndex();
memoryIndex.addField("field", "value1", new WhitespaceAnalyzer());
IndexSearcher percolateSearcher = memoryIndex.createSearcher();
PercolateQuery query = (PercolateQuery) fieldType.percolateQuery("_name", queryStore,
Collections.singletonList(new BytesArray("{}")), percolateSearcher, Version.CURRENT);
TopDocs topDocs = shardSearcher.search(query, 10, new Sort(SortField.FIELD_DOC), true, true);
assertEquals(3L, topDocs.totalHits);
assertEquals(3, topDocs.scoreDocs.length);
assertEquals(0, topDocs.scoreDocs[0].doc);
assertEquals(1, topDocs.scoreDocs[1].doc);
assertEquals(4, topDocs.scoreDocs[2].doc);
topDocs = shardSearcher.search(new ConstantScoreQuery(query), 10);
assertEquals(3L, topDocs.totalHits);
assertEquals(3, topDocs.scoreDocs.length);
assertEquals(0, topDocs.scoreDocs[0].doc);
assertEquals(1, topDocs.scoreDocs[1].doc);
assertEquals(4, topDocs.scoreDocs[2].doc);
}
public void testPercolateSmallAndLargeDocument() throws Exception { public void testPercolateSmallAndLargeDocument() throws Exception {
List<ParseContext.Document> docs = new ArrayList<>(); List<ParseContext.Document> docs = new ArrayList<>();
BooleanQuery.Builder builder = new BooleanQuery.Builder(); BooleanQuery.Builder builder = new BooleanQuery.Builder();

View File

@ -594,13 +594,18 @@ public class QueryAnalyzerTests extends ESTestCase {
} }
public void testExtractQueryMetadata_matchAllDocsQuery() { public void testExtractQueryMetadata_matchAllDocsQuery() {
expectThrows(UnsupportedQueryException.class, () -> analyze(new MatchAllDocsQuery(), Version.CURRENT)); Result result = analyze(new MatchAllDocsQuery(), Version.CURRENT);
assertThat(result.verified, is(true));
assertThat(result.matchAllDocs, is(true));
assertThat(result.minimumShouldMatch, equalTo(0));
assertThat(result.extractions.size(), equalTo(0));
BooleanQuery.Builder builder = new BooleanQuery.Builder(); BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST); builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
Result result = analyze(builder.build(), Version.CURRENT); result = analyze(builder.build(), Version.CURRENT);
assertThat(result.verified, is(false)); assertThat(result.verified, is(true));
assertThat(result.matchAllDocs, is(false));
assertThat(result.minimumShouldMatch, equalTo(1)); assertThat(result.minimumShouldMatch, equalTo(1));
assertTermsEqual(result.extractions, new Term("field", "value")); assertTermsEqual(result.extractions, new Term("field", "value"));
@ -609,34 +614,78 @@ public class QueryAnalyzerTests extends ESTestCase {
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
BooleanQuery bq1 = builder.build(); BooleanQuery bq1 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> analyze(bq1, Version.CURRENT)); result = analyze(bq1, Version.CURRENT);
assertThat(result.verified, is(true));
assertThat(result.matchAllDocs, is(true));
assertThat(result.minimumShouldMatch, equalTo(0));
assertThat(result.extractions.size(), equalTo(0));
builder = new BooleanQuery.Builder(); builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
BooleanQuery bq2 = builder.build(); BooleanQuery bq2 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> analyze(bq2, Version.CURRENT)); result = analyze(bq2, Version.CURRENT);
assertThat(result.verified, is(false));
assertThat(result.matchAllDocs, is(true));
assertThat(result.minimumShouldMatch, equalTo(0));
assertThat(result.extractions.size(), equalTo(0));
builder = new BooleanQuery.Builder(); builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
BooleanQuery bq3 = builder.build(); BooleanQuery bq3 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> analyze(bq3, Version.CURRENT)); result = analyze(bq3, Version.CURRENT);
assertThat(result.verified, is(true));
assertThat(result.matchAllDocs, is(true));
assertThat(result.minimumShouldMatch, equalTo(0));
assertThat(result.extractions.size(), equalTo(0));
builder = new BooleanQuery.Builder(); builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
BooleanQuery bq4 = builder.build(); BooleanQuery bq4 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> analyze(bq4, Version.CURRENT)); result = analyze(bq4, Version.CURRENT);
assertThat(result.verified, is(false));
assertThat(result.matchAllDocs, is(true));
assertThat(result.minimumShouldMatch, equalTo(0));
assertThat(result.extractions.size(), equalTo(0));
builder = new BooleanQuery.Builder(); builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD); builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD); builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
BooleanQuery bq5 = builder.build(); BooleanQuery bq5 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> analyze(bq5, Version.CURRENT)); result = analyze(bq5, Version.CURRENT);
assertThat(result.verified, is(true));
assertThat(result.matchAllDocs, is(true));
assertThat(result.minimumShouldMatch, equalTo(0));
assertThat(result.extractions.size(), equalTo(0));
builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
builder.setMinimumNumberShouldMatch(2);
BooleanQuery bq6 = builder.build();
result = analyze(bq6, Version.CURRENT);
assertThat(result.verified, is(true));
assertThat(result.matchAllDocs, is(false));
assertThat(result.minimumShouldMatch, equalTo(1));
assertThat(result.extractions.size(), equalTo(1));
assertTermsEqual(result.extractions, new Term("field", "value"));
builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
builder.setMinimumNumberShouldMatch(2);
BooleanQuery bq7 = builder.build();
result = analyze(bq7, Version.CURRENT);
assertThat(result.verified, is(true));
assertThat(result.matchAllDocs, is(true));
assertThat(result.minimumShouldMatch, equalTo(0));
assertThat(result.extractions.size(), equalTo(0));
} }
public void testExtractQueryMetadata_unsupportedQuery() { public void testExtractQueryMetadata_unsupportedQuery() {