percolator: also extract match_all queries

I've seen several cases where match_all queries were being used inside percolator queries,
because these queries were created generated by other systems.

Extracting these queries will allow the percolator at query time in a filter context
to skip over these queries without parsing or validating that these queries actually
match with the document being percolated.
This commit is contained in:
Martijn van Groningen 2017-11-29 15:17:03 +01:00
parent af8bd8bbcf
commit e9160fc014
No known key found for this signature in database
GPG Key ID: AB236F4FCF2AF12A
4 changed files with 170 additions and 21 deletions

View File

@ -458,20 +458,27 @@ public class PercolatorFieldMapper extends FieldMapper {
doc.add(new Field(pft.extractionResultField.name(), EXTRACTION_FAILED, extractionResultField.fieldType()));
return;
}
for (QueryAnalyzer.QueryExtraction term : result.extractions) {
if (term.term != null) {
for (QueryAnalyzer.QueryExtraction extraction : result.extractions) {
if (extraction.term != null) {
BytesRefBuilder builder = new BytesRefBuilder();
builder.append(new BytesRef(term.field()));
builder.append(new BytesRef(extraction.field()));
builder.append(FIELD_VALUE_SEPARATOR);
builder.append(term.bytes());
builder.append(extraction.bytes());
doc.add(new Field(queryTermsField.name(), builder.toBytesRef(), queryTermsField.fieldType()));
} else if (term.range != null) {
byte[] min = term.range.lowerPoint;
byte[] max = term.range.upperPoint;
doc.add(new BinaryRange(rangeFieldMapper.name(), encodeRange(term.range.fieldName, min, max)));
} else if (extraction.range != null) {
byte[] min = extraction.range.lowerPoint;
byte[] max = extraction.range.upperPoint;
doc.add(new BinaryRange(rangeFieldMapper.name(), encodeRange(extraction.range.fieldName, min, max)));
}
}
if (result.verified) {
Version indexVersionCreated = context.mapperService().getIndexSettings().getIndexVersionCreated();
if (result.matchAllDocs) {
doc.add(new Field(extractionResultField.name(), EXTRACTION_FAILED, extractionResultField.fieldType()));
if (result.verified) {
doc.add(new Field(extractionResultField.name(), EXTRACTION_COMPLETE, extractionResultField.fieldType()));
}
} else if (result.verified) {
doc.add(new Field(extractionResultField.name(), EXTRACTION_COMPLETE, extractionResultField.fieldType()));
} else {
doc.add(new Field(extractionResultField.name(), EXTRACTION_PARTIAL, extractionResultField.fieldType()));
@ -481,7 +488,7 @@ public class PercolatorFieldMapper extends FieldMapper {
for (IndexableField field : fields) {
context.doc().add(field);
}
if (context.mapperService().getIndexSettings().getIndexVersionCreated().onOrAfter(Version.V_6_1_0)) {
if (indexVersionCreated.onOrAfter(Version.V_6_1_0)) {
doc.add(new NumericDocValuesField(minimumShouldMatchFieldMapper.name(), result.minimumShouldMatch));
}
}

View File

@ -29,6 +29,7 @@ import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
@ -70,6 +71,7 @@ final class QueryAnalyzer {
static {
Map<Class<? extends Query>, BiFunction<Query, Version, Result>> map = new HashMap<>();
map.put(MatchNoDocsQuery.class, matchNoDocsQuery());
map.put(MatchAllDocsQuery.class, matchAllDocsQuery());
map.put(ConstantScoreQuery.class, constantScoreQuery());
map.put(BoostQuery.class, boostQuery());
map.put(TermQuery.class, termQuery());
@ -142,6 +144,10 @@ final class QueryAnalyzer {
return (query, version) -> new Result(true, Collections.emptySet(), 1);
}
private static BiFunction<Query, Version, Result> matchAllDocsQuery() {
return (query, version) -> new Result(true, true);
}
private static BiFunction<Query, Version, Result> constantScoreQuery() {
return (query, boosts) -> {
Query wrappedQuery = ((ConstantScoreQuery) query).getQuery();
@ -356,6 +362,7 @@ final class QueryAnalyzer {
int msm = 0;
boolean requiredShouldClauses = minimumShouldMatch > 0 && numOptionalClauses > 0;
boolean verified = uqe == null && numProhibitedClauses == 0 && requiredShouldClauses == false;
boolean matchAllDocs = true;
Set<QueryExtraction> extractions = new HashSet<>();
Set<String> seenRangeFields = new HashSet<>();
for (Result result : results) {
@ -376,9 +383,14 @@ final class QueryAnalyzer {
msm += result.minimumShouldMatch;
}
verified &= result.verified;
matchAllDocs &= result.matchAllDocs;
extractions.addAll(result.extractions);
}
return new Result(verified, extractions, msm);
if (matchAllDocs) {
return new Result(matchAllDocs, verified);
} else {
return new Result(verified, extractions, msm);
}
}
} else {
Set<QueryExtraction> bestClause = null;
@ -498,12 +510,15 @@ final class QueryAnalyzer {
if (version.before(Version.V_6_1_0)) {
verified &= requiredShouldClauses <= 1;
}
int numMatchAllClauses = 0;
Set<QueryExtraction> terms = new HashSet<>();
for (int i = 0; i < disjunctions.size(); i++) {
Query disjunct = disjunctions.get(i);
Result subResult = analyze(disjunct, version);
verified &= subResult.verified;
if (subResult.matchAllDocs) {
numMatchAllClauses++;
}
terms.addAll(subResult.extractions);
QueryExtraction[] t = subResult.extractions.toArray(new QueryExtraction[1]);
@ -512,6 +527,7 @@ final class QueryAnalyzer {
rangeFieldNames[i] = t[0].range.fieldName;
}
}
boolean matchAllDocs = numMatchAllClauses > 0 && numMatchAllClauses >= requiredShouldClauses;
int msm = 0;
if (version.onOrAfter(Version.V_6_1_0)) {
@ -532,7 +548,11 @@ final class QueryAnalyzer {
} else {
msm = 1;
}
return new Result(verified, terms, msm);
if (matchAllDocs) {
return new Result(matchAllDocs, verified);
} else {
return new Result(verified, terms, msm);
}
}
static Set<QueryExtraction> selectBestExtraction(Set<QueryExtraction> extractions1, Set<QueryExtraction> extractions2) {
@ -619,11 +639,20 @@ final class QueryAnalyzer {
final Set<QueryExtraction> extractions;
final boolean verified;
final int minimumShouldMatch;
final boolean matchAllDocs;
Result(boolean verified, Set<QueryExtraction> extractions, int minimumShouldMatch) {
this.extractions = extractions;
this.verified = verified;
this.minimumShouldMatch = minimumShouldMatch;
this.matchAllDocs = false;
}
Result(boolean matchAllDocs, boolean verified) {
this.extractions = Collections.emptySet();
this.verified = verified;
this.minimumShouldMatch = 0;
this.matchAllDocs = matchAllDocs;
}
}

View File

@ -56,6 +56,8 @@ import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
@ -193,6 +195,8 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
if (randomBoolean()) {
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
} else if (randomBoolean()) {
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
}
return builder.build();
});
@ -202,6 +206,20 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
if (randomBoolean()) {
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
} else if (randomBoolean()) {
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
}
return builder.build();
});
queryFunctions.add((id) -> {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
if (randomBoolean()) {
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
}
if (randomBoolean()) {
builder.setMinimumNumberShouldMatch(2);
}
return builder.build();
});
@ -467,6 +485,52 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
duelRun(queryStore, memoryIndex, shardSearcher);
}
public void testPercolateMatchAll() throws Exception {
List<ParseContext.Document> docs = new ArrayList<>();
addQuery(new MatchAllDocsQuery(), docs);
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", "value1")), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
addQuery(builder.build(), docs);
builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
addQuery(builder.build(), docs);
builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
addQuery(builder.build(), docs);
builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
addQuery(builder.build(), docs);
indexWriter.addDocuments(docs);
indexWriter.close();
directoryReader = DirectoryReader.open(directory);
IndexSearcher shardSearcher = newSearcher(directoryReader);
shardSearcher.setQueryCache(null);
MemoryIndex memoryIndex = new MemoryIndex();
memoryIndex.addField("field", "value1", new WhitespaceAnalyzer());
IndexSearcher percolateSearcher = memoryIndex.createSearcher();
PercolateQuery query = (PercolateQuery) fieldType.percolateQuery("_name", queryStore,
Collections.singletonList(new BytesArray("{}")), percolateSearcher, Version.CURRENT);
TopDocs topDocs = shardSearcher.search(query, 10, new Sort(SortField.FIELD_DOC), true, true);
assertEquals(3L, topDocs.totalHits);
assertEquals(3, topDocs.scoreDocs.length);
assertEquals(0, topDocs.scoreDocs[0].doc);
assertEquals(1, topDocs.scoreDocs[1].doc);
assertEquals(4, topDocs.scoreDocs[2].doc);
topDocs = shardSearcher.search(new ConstantScoreQuery(query), 10);
assertEquals(3L, topDocs.totalHits);
assertEquals(3, topDocs.scoreDocs.length);
assertEquals(0, topDocs.scoreDocs[0].doc);
assertEquals(1, topDocs.scoreDocs[1].doc);
assertEquals(4, topDocs.scoreDocs[2].doc);
}
public void testPercolateSmallAndLargeDocument() throws Exception {
List<ParseContext.Document> docs = new ArrayList<>();
BooleanQuery.Builder builder = new BooleanQuery.Builder();

View File

@ -594,13 +594,18 @@ public class QueryAnalyzerTests extends ESTestCase {
}
public void testExtractQueryMetadata_matchAllDocsQuery() {
expectThrows(UnsupportedQueryException.class, () -> analyze(new MatchAllDocsQuery(), Version.CURRENT));
Result result = analyze(new MatchAllDocsQuery(), Version.CURRENT);
assertThat(result.verified, is(true));
assertThat(result.matchAllDocs, is(true));
assertThat(result.minimumShouldMatch, equalTo(0));
assertThat(result.extractions.size(), equalTo(0));
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
Result result = analyze(builder.build(), Version.CURRENT);
assertThat(result.verified, is(false));
result = analyze(builder.build(), Version.CURRENT);
assertThat(result.verified, is(true));
assertThat(result.matchAllDocs, is(false));
assertThat(result.minimumShouldMatch, equalTo(1));
assertTermsEqual(result.extractions, new Term("field", "value"));
@ -609,34 +614,78 @@ public class QueryAnalyzerTests extends ESTestCase {
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
BooleanQuery bq1 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> analyze(bq1, Version.CURRENT));
result = analyze(bq1, Version.CURRENT);
assertThat(result.verified, is(true));
assertThat(result.matchAllDocs, is(true));
assertThat(result.minimumShouldMatch, equalTo(0));
assertThat(result.extractions.size(), equalTo(0));
builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
BooleanQuery bq2 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> analyze(bq2, Version.CURRENT));
result = analyze(bq2, Version.CURRENT);
assertThat(result.verified, is(false));
assertThat(result.matchAllDocs, is(true));
assertThat(result.minimumShouldMatch, equalTo(0));
assertThat(result.extractions.size(), equalTo(0));
builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
BooleanQuery bq3 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> analyze(bq3, Version.CURRENT));
result = analyze(bq3, Version.CURRENT);
assertThat(result.verified, is(true));
assertThat(result.matchAllDocs, is(true));
assertThat(result.minimumShouldMatch, equalTo(0));
assertThat(result.extractions.size(), equalTo(0));
builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
BooleanQuery bq4 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> analyze(bq4, Version.CURRENT));
result = analyze(bq4, Version.CURRENT);
assertThat(result.verified, is(false));
assertThat(result.matchAllDocs, is(true));
assertThat(result.minimumShouldMatch, equalTo(0));
assertThat(result.extractions.size(), equalTo(0));
builder = new BooleanQuery.Builder();
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
BooleanQuery bq5 = builder.build();
expectThrows(UnsupportedQueryException.class, () -> analyze(bq5, Version.CURRENT));
result = analyze(bq5, Version.CURRENT);
assertThat(result.verified, is(true));
assertThat(result.matchAllDocs, is(true));
assertThat(result.minimumShouldMatch, equalTo(0));
assertThat(result.extractions.size(), equalTo(0));
builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
builder.setMinimumNumberShouldMatch(2);
BooleanQuery bq6 = builder.build();
result = analyze(bq6, Version.CURRENT);
assertThat(result.verified, is(true));
assertThat(result.matchAllDocs, is(false));
assertThat(result.minimumShouldMatch, equalTo(1));
assertThat(result.extractions.size(), equalTo(1));
assertTermsEqual(result.extractions, new Term("field", "value"));
builder = new BooleanQuery.Builder();
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
builder.setMinimumNumberShouldMatch(2);
BooleanQuery bq7 = builder.build();
result = analyze(bq7, Version.CURRENT);
assertThat(result.verified, is(true));
assertThat(result.matchAllDocs, is(true));
assertThat(result.minimumShouldMatch, equalTo(0));
assertThat(result.extractions.size(), equalTo(0));
}
public void testExtractQueryMetadata_unsupportedQuery() {