percolator: also extract match_all queries
I've seen several cases where match_all queries were being used inside percolator queries, because these queries were created generated by other systems. Extracting these queries will allow the percolator at query time in a filter context to skip over these queries without parsing or validating that these queries actually match with the document being percolated.
This commit is contained in:
parent
af8bd8bbcf
commit
e9160fc014
|
@ -458,20 +458,27 @@ public class PercolatorFieldMapper extends FieldMapper {
|
|||
doc.add(new Field(pft.extractionResultField.name(), EXTRACTION_FAILED, extractionResultField.fieldType()));
|
||||
return;
|
||||
}
|
||||
for (QueryAnalyzer.QueryExtraction term : result.extractions) {
|
||||
if (term.term != null) {
|
||||
for (QueryAnalyzer.QueryExtraction extraction : result.extractions) {
|
||||
if (extraction.term != null) {
|
||||
BytesRefBuilder builder = new BytesRefBuilder();
|
||||
builder.append(new BytesRef(term.field()));
|
||||
builder.append(new BytesRef(extraction.field()));
|
||||
builder.append(FIELD_VALUE_SEPARATOR);
|
||||
builder.append(term.bytes());
|
||||
builder.append(extraction.bytes());
|
||||
doc.add(new Field(queryTermsField.name(), builder.toBytesRef(), queryTermsField.fieldType()));
|
||||
} else if (term.range != null) {
|
||||
byte[] min = term.range.lowerPoint;
|
||||
byte[] max = term.range.upperPoint;
|
||||
doc.add(new BinaryRange(rangeFieldMapper.name(), encodeRange(term.range.fieldName, min, max)));
|
||||
} else if (extraction.range != null) {
|
||||
byte[] min = extraction.range.lowerPoint;
|
||||
byte[] max = extraction.range.upperPoint;
|
||||
doc.add(new BinaryRange(rangeFieldMapper.name(), encodeRange(extraction.range.fieldName, min, max)));
|
||||
}
|
||||
}
|
||||
if (result.verified) {
|
||||
|
||||
Version indexVersionCreated = context.mapperService().getIndexSettings().getIndexVersionCreated();
|
||||
if (result.matchAllDocs) {
|
||||
doc.add(new Field(extractionResultField.name(), EXTRACTION_FAILED, extractionResultField.fieldType()));
|
||||
if (result.verified) {
|
||||
doc.add(new Field(extractionResultField.name(), EXTRACTION_COMPLETE, extractionResultField.fieldType()));
|
||||
}
|
||||
} else if (result.verified) {
|
||||
doc.add(new Field(extractionResultField.name(), EXTRACTION_COMPLETE, extractionResultField.fieldType()));
|
||||
} else {
|
||||
doc.add(new Field(extractionResultField.name(), EXTRACTION_PARTIAL, extractionResultField.fieldType()));
|
||||
|
@ -481,7 +488,7 @@ public class PercolatorFieldMapper extends FieldMapper {
|
|||
for (IndexableField field : fields) {
|
||||
context.doc().add(field);
|
||||
}
|
||||
if (context.mapperService().getIndexSettings().getIndexVersionCreated().onOrAfter(Version.V_6_1_0)) {
|
||||
if (indexVersionCreated.onOrAfter(Version.V_6_1_0)) {
|
||||
doc.add(new NumericDocValuesField(minimumShouldMatchFieldMapper.name(), result.minimumShouldMatch));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.search.BoostQuery;
|
|||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||
import org.apache.lucene.search.IndexOrDocValuesQuery;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
import org.apache.lucene.search.MultiPhraseQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
|
@ -70,6 +71,7 @@ final class QueryAnalyzer {
|
|||
static {
|
||||
Map<Class<? extends Query>, BiFunction<Query, Version, Result>> map = new HashMap<>();
|
||||
map.put(MatchNoDocsQuery.class, matchNoDocsQuery());
|
||||
map.put(MatchAllDocsQuery.class, matchAllDocsQuery());
|
||||
map.put(ConstantScoreQuery.class, constantScoreQuery());
|
||||
map.put(BoostQuery.class, boostQuery());
|
||||
map.put(TermQuery.class, termQuery());
|
||||
|
@ -142,6 +144,10 @@ final class QueryAnalyzer {
|
|||
return (query, version) -> new Result(true, Collections.emptySet(), 1);
|
||||
}
|
||||
|
||||
private static BiFunction<Query, Version, Result> matchAllDocsQuery() {
|
||||
return (query, version) -> new Result(true, true);
|
||||
}
|
||||
|
||||
private static BiFunction<Query, Version, Result> constantScoreQuery() {
|
||||
return (query, boosts) -> {
|
||||
Query wrappedQuery = ((ConstantScoreQuery) query).getQuery();
|
||||
|
@ -356,6 +362,7 @@ final class QueryAnalyzer {
|
|||
int msm = 0;
|
||||
boolean requiredShouldClauses = minimumShouldMatch > 0 && numOptionalClauses > 0;
|
||||
boolean verified = uqe == null && numProhibitedClauses == 0 && requiredShouldClauses == false;
|
||||
boolean matchAllDocs = true;
|
||||
Set<QueryExtraction> extractions = new HashSet<>();
|
||||
Set<String> seenRangeFields = new HashSet<>();
|
||||
for (Result result : results) {
|
||||
|
@ -376,9 +383,14 @@ final class QueryAnalyzer {
|
|||
msm += result.minimumShouldMatch;
|
||||
}
|
||||
verified &= result.verified;
|
||||
matchAllDocs &= result.matchAllDocs;
|
||||
extractions.addAll(result.extractions);
|
||||
}
|
||||
return new Result(verified, extractions, msm);
|
||||
if (matchAllDocs) {
|
||||
return new Result(matchAllDocs, verified);
|
||||
} else {
|
||||
return new Result(verified, extractions, msm);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Set<QueryExtraction> bestClause = null;
|
||||
|
@ -498,12 +510,15 @@ final class QueryAnalyzer {
|
|||
if (version.before(Version.V_6_1_0)) {
|
||||
verified &= requiredShouldClauses <= 1;
|
||||
}
|
||||
|
||||
int numMatchAllClauses = 0;
|
||||
Set<QueryExtraction> terms = new HashSet<>();
|
||||
for (int i = 0; i < disjunctions.size(); i++) {
|
||||
Query disjunct = disjunctions.get(i);
|
||||
Result subResult = analyze(disjunct, version);
|
||||
verified &= subResult.verified;
|
||||
if (subResult.matchAllDocs) {
|
||||
numMatchAllClauses++;
|
||||
}
|
||||
terms.addAll(subResult.extractions);
|
||||
|
||||
QueryExtraction[] t = subResult.extractions.toArray(new QueryExtraction[1]);
|
||||
|
@ -512,6 +527,7 @@ final class QueryAnalyzer {
|
|||
rangeFieldNames[i] = t[0].range.fieldName;
|
||||
}
|
||||
}
|
||||
boolean matchAllDocs = numMatchAllClauses > 0 && numMatchAllClauses >= requiredShouldClauses;
|
||||
|
||||
int msm = 0;
|
||||
if (version.onOrAfter(Version.V_6_1_0)) {
|
||||
|
@ -532,7 +548,11 @@ final class QueryAnalyzer {
|
|||
} else {
|
||||
msm = 1;
|
||||
}
|
||||
return new Result(verified, terms, msm);
|
||||
if (matchAllDocs) {
|
||||
return new Result(matchAllDocs, verified);
|
||||
} else {
|
||||
return new Result(verified, terms, msm);
|
||||
}
|
||||
}
|
||||
|
||||
static Set<QueryExtraction> selectBestExtraction(Set<QueryExtraction> extractions1, Set<QueryExtraction> extractions2) {
|
||||
|
@ -619,11 +639,20 @@ final class QueryAnalyzer {
|
|||
final Set<QueryExtraction> extractions;
|
||||
final boolean verified;
|
||||
final int minimumShouldMatch;
|
||||
final boolean matchAllDocs;
|
||||
|
||||
Result(boolean verified, Set<QueryExtraction> extractions, int minimumShouldMatch) {
|
||||
this.extractions = extractions;
|
||||
this.verified = verified;
|
||||
this.minimumShouldMatch = minimumShouldMatch;
|
||||
this.matchAllDocs = false;
|
||||
}
|
||||
|
||||
Result(boolean matchAllDocs, boolean verified) {
|
||||
this.extractions = Collections.emptySet();
|
||||
this.verified = verified;
|
||||
this.minimumShouldMatch = 0;
|
||||
this.matchAllDocs = matchAllDocs;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -56,6 +56,8 @@ import org.apache.lucene.search.MatchNoDocsQuery;
|
|||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TermInSetQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
|
@ -193,6 +195,8 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
|
|||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||
if (randomBoolean()) {
|
||||
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
|
||||
} else if (randomBoolean()) {
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
|
||||
}
|
||||
return builder.build();
|
||||
});
|
||||
|
@ -202,6 +206,20 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
|
|||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||
if (randomBoolean()) {
|
||||
builder.add(new MatchNoDocsQuery("no reason"), BooleanClause.Occur.MUST_NOT);
|
||||
} else if (randomBoolean()) {
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
|
||||
}
|
||||
return builder.build();
|
||||
});
|
||||
queryFunctions.add((id) -> {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||
builder.add(new TermQuery(new Term("field", id)), BooleanClause.Occur.SHOULD);
|
||||
if (randomBoolean()) {
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
builder.setMinimumNumberShouldMatch(2);
|
||||
}
|
||||
return builder.build();
|
||||
});
|
||||
|
@ -467,6 +485,52 @@ public class CandidateQueryTests extends ESSingleNodeTestCase {
|
|||
duelRun(queryStore, memoryIndex, shardSearcher);
|
||||
}
|
||||
|
||||
public void testPercolateMatchAll() throws Exception {
|
||||
List<ParseContext.Document> docs = new ArrayList<>();
|
||||
addQuery(new MatchAllDocsQuery(), docs);
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
builder.add(new TermQuery(new Term("field", "value1")), BooleanClause.Occur.MUST);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||
addQuery(builder.build(), docs);
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.MUST);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||
addQuery(builder.build(), docs);
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
|
||||
addQuery(builder.build(), docs);
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.add(new TermQuery(new Term("field", "value2")), BooleanClause.Occur.SHOULD);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||
addQuery(builder.build(), docs);
|
||||
indexWriter.addDocuments(docs);
|
||||
indexWriter.close();
|
||||
directoryReader = DirectoryReader.open(directory);
|
||||
IndexSearcher shardSearcher = newSearcher(directoryReader);
|
||||
shardSearcher.setQueryCache(null);
|
||||
|
||||
MemoryIndex memoryIndex = new MemoryIndex();
|
||||
memoryIndex.addField("field", "value1", new WhitespaceAnalyzer());
|
||||
IndexSearcher percolateSearcher = memoryIndex.createSearcher();
|
||||
PercolateQuery query = (PercolateQuery) fieldType.percolateQuery("_name", queryStore,
|
||||
Collections.singletonList(new BytesArray("{}")), percolateSearcher, Version.CURRENT);
|
||||
TopDocs topDocs = shardSearcher.search(query, 10, new Sort(SortField.FIELD_DOC), true, true);
|
||||
assertEquals(3L, topDocs.totalHits);
|
||||
assertEquals(3, topDocs.scoreDocs.length);
|
||||
assertEquals(0, topDocs.scoreDocs[0].doc);
|
||||
assertEquals(1, topDocs.scoreDocs[1].doc);
|
||||
assertEquals(4, topDocs.scoreDocs[2].doc);
|
||||
|
||||
topDocs = shardSearcher.search(new ConstantScoreQuery(query), 10);
|
||||
assertEquals(3L, topDocs.totalHits);
|
||||
assertEquals(3, topDocs.scoreDocs.length);
|
||||
assertEquals(0, topDocs.scoreDocs[0].doc);
|
||||
assertEquals(1, topDocs.scoreDocs[1].doc);
|
||||
assertEquals(4, topDocs.scoreDocs[2].doc);
|
||||
}
|
||||
|
||||
public void testPercolateSmallAndLargeDocument() throws Exception {
|
||||
List<ParseContext.Document> docs = new ArrayList<>();
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
|
|
|
@ -594,13 +594,18 @@ public class QueryAnalyzerTests extends ESTestCase {
|
|||
}
|
||||
|
||||
public void testExtractQueryMetadata_matchAllDocsQuery() {
|
||||
expectThrows(UnsupportedQueryException.class, () -> analyze(new MatchAllDocsQuery(), Version.CURRENT));
|
||||
Result result = analyze(new MatchAllDocsQuery(), Version.CURRENT);
|
||||
assertThat(result.verified, is(true));
|
||||
assertThat(result.matchAllDocs, is(true));
|
||||
assertThat(result.minimumShouldMatch, equalTo(0));
|
||||
assertThat(result.extractions.size(), equalTo(0));
|
||||
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||
Result result = analyze(builder.build(), Version.CURRENT);
|
||||
assertThat(result.verified, is(false));
|
||||
result = analyze(builder.build(), Version.CURRENT);
|
||||
assertThat(result.verified, is(true));
|
||||
assertThat(result.matchAllDocs, is(false));
|
||||
assertThat(result.minimumShouldMatch, equalTo(1));
|
||||
assertTermsEqual(result.extractions, new Term("field", "value"));
|
||||
|
||||
|
@ -609,34 +614,78 @@ public class QueryAnalyzerTests extends ESTestCase {
|
|||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||
BooleanQuery bq1 = builder.build();
|
||||
expectThrows(UnsupportedQueryException.class, () -> analyze(bq1, Version.CURRENT));
|
||||
result = analyze(bq1, Version.CURRENT);
|
||||
assertThat(result.verified, is(true));
|
||||
assertThat(result.matchAllDocs, is(true));
|
||||
assertThat(result.minimumShouldMatch, equalTo(0));
|
||||
assertThat(result.extractions.size(), equalTo(0));
|
||||
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
|
||||
BooleanQuery bq2 = builder.build();
|
||||
expectThrows(UnsupportedQueryException.class, () -> analyze(bq2, Version.CURRENT));
|
||||
result = analyze(bq2, Version.CURRENT);
|
||||
assertThat(result.verified, is(false));
|
||||
assertThat(result.matchAllDocs, is(true));
|
||||
assertThat(result.minimumShouldMatch, equalTo(0));
|
||||
assertThat(result.extractions.size(), equalTo(0));
|
||||
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||
BooleanQuery bq3 = builder.build();
|
||||
expectThrows(UnsupportedQueryException.class, () -> analyze(bq3, Version.CURRENT));
|
||||
result = analyze(bq3, Version.CURRENT);
|
||||
assertThat(result.verified, is(true));
|
||||
assertThat(result.matchAllDocs, is(true));
|
||||
assertThat(result.minimumShouldMatch, equalTo(0));
|
||||
assertThat(result.extractions.size(), equalTo(0));
|
||||
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST_NOT);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||
BooleanQuery bq4 = builder.build();
|
||||
expectThrows(UnsupportedQueryException.class, () -> analyze(bq4, Version.CURRENT));
|
||||
result = analyze(bq4, Version.CURRENT);
|
||||
assertThat(result.verified, is(false));
|
||||
assertThat(result.matchAllDocs, is(true));
|
||||
assertThat(result.minimumShouldMatch, equalTo(0));
|
||||
assertThat(result.extractions.size(), equalTo(0));
|
||||
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||
BooleanQuery bq5 = builder.build();
|
||||
expectThrows(UnsupportedQueryException.class, () -> analyze(bq5, Version.CURRENT));
|
||||
result = analyze(bq5, Version.CURRENT);
|
||||
assertThat(result.verified, is(true));
|
||||
assertThat(result.matchAllDocs, is(true));
|
||||
assertThat(result.minimumShouldMatch, equalTo(0));
|
||||
assertThat(result.extractions.size(), equalTo(0));
|
||||
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
|
||||
builder.setMinimumNumberShouldMatch(2);
|
||||
BooleanQuery bq6 = builder.build();
|
||||
result = analyze(bq6, Version.CURRENT);
|
||||
assertThat(result.verified, is(true));
|
||||
assertThat(result.matchAllDocs, is(false));
|
||||
assertThat(result.minimumShouldMatch, equalTo(1));
|
||||
assertThat(result.extractions.size(), equalTo(1));
|
||||
assertTermsEqual(result.extractions, new Term("field", "value"));
|
||||
|
||||
builder = new BooleanQuery.Builder();
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||
builder.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
|
||||
builder.setMinimumNumberShouldMatch(2);
|
||||
BooleanQuery bq7 = builder.build();
|
||||
result = analyze(bq7, Version.CURRENT);
|
||||
assertThat(result.verified, is(true));
|
||||
assertThat(result.matchAllDocs, is(true));
|
||||
assertThat(result.minimumShouldMatch, equalTo(0));
|
||||
assertThat(result.extractions.size(), equalTo(0));
|
||||
}
|
||||
|
||||
public void testExtractQueryMetadata_unsupportedQuery() {
|
||||
|
|
Loading…
Reference in New Issue