`query_string` should use indexed prefixes (#36895)

The QueryStringQueryBuilder does not currently delegate to the field mapper's prefixQuery method, so does not use indexed prefixes. This commit corrects this. It also fixes a bug where a query a* would not match the word a if indexed prefixes were used with a minchar setting of 2.
2019-01-02 20:12:24 +00:00 · 2019-01-02 20:12:24 +00:00 · 7a0047744d
parent 265fdce312
commit 7a0047744d
5 changed files with 68 additions and 17 deletions
--- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/190_index_prefix_search.yml
+++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/190_index_prefix_search.yml
@ -13,15 +13,15 @@ setup:
                text:
                  type: text
                  index_prefixes:
-                    min_chars: 1
-                    max_chars: 10
+                    min_chars: 2
+                    max_chars: 5

  - do:
      index:
          index:  test
          type:   test
          id:     1
-          body:   { text: some short words and a stupendously long one }
+          body:   { text: some short words with a stupendously long one }

  - do:
      indices.refresh:
@ -63,10 +63,11 @@ setup:
        rest_total_hits_as_int: true
        index: test
        body:
+          explain: true
          query:
            query_string:
              default_field: text
-              query: s*
+              query: a*
              boost: 2

  - match: {hits.total: 1}
--- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
@ -33,6 +33,8 @@ import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.AutomatonQuery;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.ConstantScoreQuery;
 import org.apache.lucene.search.MultiPhraseQuery;
 import org.apache.lucene.search.MultiTermQuery;
@ -156,7 +158,7 @@ public class TextFieldMapper extends FieldMapper {
            if (maxChars >= 20) {
                throw new IllegalArgumentException("max_chars [" + maxChars + "] must be less than 20");
            }
-            this.prefixFieldType = new PrefixFieldType(name() + "._index_prefix", minChars, maxChars);
+            this.prefixFieldType = new PrefixFieldType(name(), name() + "._index_prefix", minChars, maxChars);
            fieldType().setPrefixFieldType(this.prefixFieldType);
            return this;
        }
@ -347,14 +349,16 @@ public class TextFieldMapper extends FieldMapper {

        final int minChars;
        final int maxChars;
+        final String parentField;

-        PrefixFieldType(String name, int minChars, int maxChars) {
+        PrefixFieldType(String parentField, String name, int minChars, int maxChars) {
            setTokenized(true);
            setOmitNorms(true);
            setIndexOptions(IndexOptions.DOCS);
            setName(name);
            this.minChars = minChars;
            this.maxChars = maxChars;
+            this.parentField = parentField;
        }

        PrefixFieldType setAnalyzer(NamedAnalyzer delegate) {
@ -387,12 +391,15 @@ public class TextFieldMapper extends FieldMapper {
            Automaton automaton = Operations.concatenate(automata);
            AutomatonQuery query = new AutomatonQuery(new Term(name(), value + "*"), automaton);
            query.setRewriteMethod(method);
-            return query;
+            return new BooleanQuery.Builder()
+                .add(query, BooleanClause.Occur.SHOULD)
+                .add(new TermQuery(new Term(parentField, value)), BooleanClause.Occur.SHOULD)
+                .build();
        }

        @Override
        public PrefixFieldType clone() {
-            return new PrefixFieldType(name(), minChars, maxChars);
+            return new PrefixFieldType(parentField, name(), minChars, maxChars);
        }

        @Override
--- a/server/src/main/java/org/elasticsearch/index/search/QueryStringQueryParser.java
+++ b/server/src/main/java/org/elasticsearch/index/search/QueryStringQueryParser.java
@ -42,15 +42,14 @@ import org.apache.lucene.search.spans.SpanNearQuery;
 import org.apache.lucene.search.spans.SpanOrQuery;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.util.BytesRef;
-import org.elasticsearch.core.internal.io.IOUtils;
 import org.elasticsearch.common.lucene.search.Queries;
 import org.elasticsearch.common.regex.Regex;
 import org.elasticsearch.common.unit.Fuzziness;
+import org.elasticsearch.core.internal.io.IOUtils;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.mapper.FieldNamesFieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.MapperService;
-import org.elasticsearch.index.mapper.StringFieldType;
 import org.elasticsearch.index.query.ExistsQueryBuilder;
 import org.elasticsearch.index.query.MultiMatchQueryBuilder;
 import org.elasticsearch.index.query.QueryShardContext;
@ -507,7 +506,7 @@ public class QueryStringQueryParser extends XQueryParser {
            }
            setAnalyzer(forceAnalyzer == null ? queryBuilder.context.getSearchAnalyzer(currentFieldType) : forceAnalyzer);
            Query query = null;
-            if (currentFieldType instanceof StringFieldType == false) {
+            if (currentFieldType.tokenized() == false) {
                query = currentFieldType.prefixQuery(termStr, getMultiTermRewriteMethod(), context);
            } else {
                query = getPossiblyAnalyzedPrefixQuery(currentFieldType.name(), termStr);
@ -525,7 +524,8 @@ public class QueryStringQueryParser extends XQueryParser {

    private Query getPossiblyAnalyzedPrefixQuery(String field, String termStr) throws ParseException {
        if (analyzeWildcard == false) {
-            return super.getPrefixQuery(field, termStr);
+            return currentFieldType.prefixQuery(getAnalyzer().normalize(field, termStr).utf8ToString(),
+                getMultiTermRewriteMethod(), context);
        }
        List<List<String> > tlist;
        // get Analyzer from superclass and tokenize the term
@ -568,7 +568,7 @@ public class QueryStringQueryParser extends XQueryParser {
        }

        if (tlist.size() == 1 && tlist.get(0).size() == 1) {
-            return super.getPrefixQuery(field, tlist.get(0).get(0));
+            return currentFieldType.prefixQuery(tlist.get(0).get(0), getMultiTermRewriteMethod(), context);
        }

        // build a boolean query with prefix on the last position only.
@ -579,7 +579,7 @@ public class QueryStringQueryParser extends XQueryParser {
            Query posQuery;
            if (plist.size() == 1) {
                if (isLastPos) {
-                    posQuery = super.getPrefixQuery(field, plist.get(0));
+                    posQuery = currentFieldType.prefixQuery(plist.get(0), getMultiTermRewriteMethod(), context);
                } else {
                    posQuery = newTermQuery(new Term(field, plist.get(0)));
                }
--- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java
+++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java
@ -21,6 +21,8 @@ package org.elasticsearch.index.mapper;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.AutomatonQuery;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.ConstantScoreQuery;
 import org.apache.lucene.search.FuzzyQuery;
 import org.apache.lucene.search.PrefixQuery;
@ -40,6 +42,7 @@ import java.util.Arrays;
 import java.util.List;

 import static org.apache.lucene.search.MultiTermQuery.CONSTANT_SCORE_REWRITE;
+import static org.hamcrest.Matchers.equalTo;

 public class TextFieldTypeTests extends FieldTypeTestCase {
    @Override
@ -90,7 +93,7 @@ public class TextFieldTypeTests extends FieldTypeTestCase {
                TextFieldMapper.TextFieldType tft = (TextFieldMapper.TextFieldType)ft;
                TextFieldMapper.PrefixFieldType pft = tft.getPrefixFieldType();
                if (pft == null) {
-                    tft.setPrefixFieldType(new TextFieldMapper.PrefixFieldType(ft.name(), 3, 3));
+                    tft.setPrefixFieldType(new TextFieldMapper.PrefixFieldType(ft.name(), ft.name() + "._index_prefix", 3, 3));
                }
                else {
                    tft.setPrefixFieldType(null);
@ -156,7 +159,7 @@ public class TextFieldTypeTests extends FieldTypeTestCase {
    public void testIndexPrefixes() {
        TextFieldMapper.TextFieldType ft = new TextFieldMapper.TextFieldType();
        ft.setName("field");
-        ft.setPrefixFieldType(new TextFieldMapper.PrefixFieldType("field._index_prefix", 2, 10));
+        ft.setPrefixFieldType(new TextFieldMapper.PrefixFieldType("field", "field._index_prefix", 2, 10));

        Query q = ft.prefixQuery("goin", CONSTANT_SCORE_REWRITE, null);
        assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field._index_prefix", "goin"))), q);
@ -167,6 +170,12 @@ public class TextFieldTypeTests extends FieldTypeTestCase {
        q = ft.prefixQuery("g", CONSTANT_SCORE_REWRITE, null);
        Automaton automaton
            = Operations.concatenate(Arrays.asList(Automata.makeChar('g'), Automata.makeAnyChar()));
-        assertEquals(new ConstantScoreQuery(new AutomatonQuery(new Term("field._index_prefix", "g*"), automaton)), q);
+
+        Query expected = new ConstantScoreQuery(new BooleanQuery.Builder()
+            .add(new AutomatonQuery(new Term("field._index_prefix", "g*"), automaton), BooleanClause.Occur.SHOULD)
+            .add(new TermQuery(new Term("field", "g")), BooleanClause.Occur.SHOULD)
+            .build());
+
+        assertThat(q, equalTo(expected));
    }
 }
--- a/server/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java
+++ b/server/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java
@ -22,6 +22,7 @@ package org.elasticsearch.index.query;
 import org.apache.lucene.analysis.MockSynonymAnalyzer;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queries.BlendedTermQuery;
+import org.apache.lucene.search.AutomatonQuery;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanQuery;
@ -45,6 +46,9 @@ import org.apache.lucene.search.spans.SpanNearQuery;
 import org.apache.lucene.search.spans.SpanOrQuery;
 import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.automaton.Automata;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
 import org.elasticsearch.Version;
 import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest;
@ -69,6 +73,7 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;

+import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
 import static org.elasticsearch.index.query.AbstractQueryBuilder.parseInnerQueryBuilder;
 import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertBooleanSubQuery;
@ -78,6 +83,20 @@ import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.instanceOf;

 public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStringQueryBuilder> {
+
+    @Override
+    protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
+        XContentBuilder mapping = jsonBuilder().startObject().startObject("_doc").startObject("properties")
+            .startObject("prefix_field")
+            .field("type", "text")
+            .startObject("index_prefixes").endObject()
+            .endObject()
+            .endObject().endObject().endObject();
+
+        mapperService.merge("_doc",
+            new CompressedXContent(Strings.toString(mapping)), MapperService.MergeReason.MAPPING_UPDATE);
+    }
+
    @Override
    protected QueryStringQueryBuilder doCreateTestQueryBuilder() {
        int numTerms = randomIntBetween(0, 5);
@ -535,6 +554,21 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStr
        }
    }

+    public void testToQueryWildcardWithIndexedPrefixes() throws Exception {
+        QueryStringQueryParser queryParser = new QueryStringQueryParser(createShardContext(), "prefix_field");
+        Query query = queryParser.parse("foo*");
+        Query expectedQuery = new ConstantScoreQuery(new TermQuery(new Term("prefix_field._index_prefix", "foo")));
+        assertThat(query, equalTo(expectedQuery));
+
+        query = queryParser.parse("g*");
+        Automaton a = Operations.concatenate(Arrays.asList(Automata.makeChar('g'), Automata.makeAnyChar()));
+        expectedQuery = new ConstantScoreQuery(new BooleanQuery.Builder()
+            .add(new AutomatonQuery(new Term("prefix_field._index_prefix", "g*"), a), Occur.SHOULD)
+            .add(new TermQuery(new Term("prefix_field", "g")), Occur.SHOULD)
+            .build());
+        assertThat(query, equalTo(expectedQuery));
+    }
+
    public void testToQueryWilcardQueryWithSynonyms() throws Exception {
        for (Operator op : Operator.values()) {
            BooleanClause.Occur defaultOp = op.toBooleanClauseOccur();