percolator: Add support dor MatchNoDocsQuery in query terms extract service

Before the query extraction would have been aborted and the percolator query would be marked as unknown. This resulted in a situation that these queries always need to be evaluated by the memory index at search time. By adding support for this query many more percolator query candidate hits can skip the expensive memory index verification step. For example the `match` query parser returns a MatchNoDocsQuery if the query terms are removed by text analysis (lets query text only contained stop words).
2016-05-20 17:28:00 +02:00 · 2016-05-20 17:28:00 +02:00 · c1a0929123
parent 7cf758943e
commit c1a0929123
3 changed files with 52 additions and 11 deletions
--- a/core/src/main/java/org/elasticsearch/index/percolator/ExtractQueryTermsService.java
+++ b/core/src/main/java/org/elasticsearch/index/percolator/ExtractQueryTermsService.java
@ -37,19 +37,16 @@ import org.apache.lucene.search.ConstantScoreQuery;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
 import org.apache.lucene.search.spans.SpanContainingQuery;
 import org.apache.lucene.search.spans.SpanFirstQuery;
 import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
 import org.apache.lucene.search.spans.SpanNearQuery;
 import org.apache.lucene.search.spans.SpanNotQuery;
 import org.apache.lucene.search.spans.SpanOrQuery;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.search.spans.SpanWithinQuery;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.elasticsearch.common.logging.LoggerMessageFormat;
 import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
 import org.elasticsearch.index.mapper.ParseContext;
 import java.io.IOException;
@ -75,8 +72,8 @@ public final class ExtractQueryTermsService {
     * @param query                 The query to extract terms from
     * @param document              The document to add the extracted terms to
     * @param queryTermsFieldField  The field in the document holding the extracted terms
-     * @param unknownQueryField The field used to mark a document that not all query terms could be extracted. For example
+     * @param unknownQueryField     The field used to mark a document that not all query terms could be extracted.
-     *                          the query contained an unsupported query (e.g. WildcardQuery).
+     *                              For example the query contained an unsupported query (e.g. WildcardQuery).
     * @param fieldType The field type for the query metadata field
     */
    public static void extractQueryTerms(Query query, ParseContext.Document document, String queryTermsFieldField, String unknownQueryField, FieldType fieldType) {
@ -106,7 +103,10 @@ public final class ExtractQueryTermsService {
     * an UnsupportedQueryException is thrown.
     */
    static Set<Term> extractQueryTerms(Query query) {
-        if (query instanceof TermQuery) {
+        if (query instanceof MatchNoDocsQuery) {
            // no terms to extract as this query matches no docs
            return Collections.emptySet();
        } else if (query instanceof TermQuery) {
            return Collections.singleton(((TermQuery) query).getTerm());
        } else if (query instanceof TermsQuery) {
            Set<Term> terms = new HashSet<>();
--- a/core/src/test/java/org/elasticsearch/index/percolator/ExtractQueryTermsServiceTests.java
+++ b/core/src/test/java/org/elasticsearch/index/percolator/ExtractQueryTermsServiceTests.java
@ -41,6 +41,7 @@ import org.apache.lucene.search.spans.SpanNotQuery;
 import org.apache.lucene.search.spans.SpanOrQuery;
 import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
 import org.elasticsearch.index.mapper.ParseContext;
 import org.elasticsearch.test.ESTestCase;
@ -296,6 +297,23 @@ public class ExtractQueryTermsServiceTests extends ESTestCase {
        assertTermsEqual(terms, spanTermQuery1.getTerm());
    }
    public void testExtractQueryMetadata_matchNoDocsQuery() {
        Set<Term> terms = ExtractQueryTermsService.extractQueryTerms(new MatchNoDocsQuery("sometimes there is no reason at all"));
        assertEquals(0, terms.size());
        BooleanQuery.Builder bq = new BooleanQuery.Builder();
        bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST);
        bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.MUST);
        terms = ExtractQueryTermsService.extractQueryTerms(bq.build());
        assertEquals(0, terms.size());
        bq = new BooleanQuery.Builder();
        bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD);
        bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.SHOULD);
        terms = ExtractQueryTermsService.extractQueryTerms(bq.build());
        assertTermsEqual(terms, new Term("field", "value"));
    }
    public void testExtractQueryMetadata_unsupportedQuery() {
        TermRangeQuery termRangeQuery = new TermRangeQuery("_field", null, null, true, false);
--- a/docs/reference/query-dsl/percolate-query.asciidoc
+++ b/docs/reference/query-dsl/percolate-query.asciidoc
@ -334,3 +334,26 @@ At search time, the document specified in the request gets parsed into a Lucene
 temporary Lucene index. This in-memory index can just hold this one document and it is optimized for that. After this
 a special query is build based on the terms in the in-memory index that select candidate percolator queries based on
 their indexed query terms. These queries are then evaluated by the in-memory index if they actually match.
 The selecting of candidate percolator queries matches is an important performance optimization during the execution
 of the `percolate` query as it can significantly reduce the number of candidate matches the in-memory index need to
 evaluate. The reason the `percolate` query can do this is because during indexing of the percolator queries the query
 terms are being extracted and indexed with the percolator query. Unfortunately the percolator cannot extract terms from
 all queries (for example the `wildcard` or `geo_shape` query) and as a result of that in certain cases the percolator
 can't do the selecting optimization (for example if an unsupported query is defined in a required clause of a boolean query
 or the unsupported query is the only query in the percolator document).  These queries are marked by the percolator and
 can be found by running the following search:
 [source,js]
 --------------------------------------------------
 curl -XGET "http://localhost:9200/_search" -d'
 {
  "query": {
    "term" : {
      "query.unknown_query" : ""
    }
  }
 }'
 --------------------------------------------------
 NOTE: The above example assumes that there is a `query` field of type `percolator` in the mappings.