Handle leniency for phrase query on a field indexed without positions (#26388)

This change rewrite phrase query built on a field indexed without positions to match_no_docs query when the `lenient` option is set to true. This change affects all full text queries.
2017-08-25 16:41:01 +02:00 · 2017-08-25 16:41:01 +02:00 · 74cd32942a
parent ad8f359deb
commit 74cd32942a
4 changed files with 54 additions and 9 deletions
--- a/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java
+++ b/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java
@ -22,6 +22,7 @@ package org.elasticsearch.index.search;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queries.ExtendedCommonTermsQuery;
 import org.apache.lucene.search.BooleanClause;
@ -239,6 +240,10 @@ public class MatchQuery {
        }
    }

+    private boolean hasPositions(MappedFieldType fieldType) {
+        return fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+    }
+
    public Query parse(Type type, String fieldName, Object value) throws IOException {
        MappedFieldType fieldType = context.fieldMapper(fieldName);
        if (fieldType == null) {
@ -263,7 +268,11 @@ public class MatchQuery {
        assert analyzer != null;
        MatchQueryBuilder builder = new MatchQueryBuilder(analyzer, fieldType);
        builder.setEnablePositionIncrements(this.enablePositionIncrements);
+        if (hasPositions(fieldType)) {
            builder.setAutoGenerateMultiTermSynonymsPhraseQuery(this.autoGenerateSynonymsPhraseQuery);
+        } else {
+            builder.setAutoGenerateMultiTermSynonymsPhraseQuery(false);
+        }

        Query query = null;
        switch (type) {
@ -331,6 +340,20 @@ public class MatchQuery {
            return blendTermsQuery(terms, mapper);
        }

+        @Override
+        protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException {
+            if (hasPositions(mapper) == false) {
+                IllegalStateException exc =
+                    new IllegalStateException("field:[" + field + "] was indexed without position data; cannot run PhraseQuery");
+                if (lenient) {
+                    return newLenientFieldQuery(field, exc);
+                } else {
+                    throw exc;
+                }
+            }
+            return super.analyzePhrase(field, stream, slop);
+        }
+
        /**
         * Checks if graph analysis should be enabled for the field depending
         * on the provided {@link Analyzer}
--- a/core/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java
+++ b/core/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java
@ -437,8 +437,12 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil

    @Override
    protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
-        mapperService.merge("doc", new CompressedXContent(PutMappingRequest.buildFromSimplifiedDef("doc",
-            "string_boost", "type=text,boost=4").string()), MapperService.MergeReason.MAPPING_UPDATE, false);
+        mapperService.merge("doc", new CompressedXContent(PutMappingRequest.buildFromSimplifiedDef(
+                "doc",
+                "string_boost", "type=text,boost=4", "string_no_pos",
+                "type=text,index_options=docs").string()
+            ),
+            MapperService.MergeReason.MAPPING_UPDATE, false);
    }

    public void testMatchPhrasePrefixWithBoost() throws Exception {
@ -463,6 +467,16 @@ public class MatchQueryBuilderTests extends AbstractQueryTestCase<MatchQueryBuil
            Query query = builder.toQuery(context);
            assertThat(query, instanceOf(MultiPhrasePrefixQuery.class));
        }
+    }

+    public void testLenientPhraseQuery() throws Exception {
+        assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
+        QueryShardContext context = createShardContext();
+        MatchQuery b = new MatchQuery(context);
+        b.setLenient(true);
+        Query query = b.parse(Type.PHRASE, "string_no_pos", "foo bar");
+        assertThat(query, instanceOf(MatchNoDocsQuery.class));
+        assertThat(query.toString(),
+            containsString("field:[string_no_pos] was indexed without position data; cannot run PhraseQuery"));
    }
 }
--- a/core/src/test/java/org/elasticsearch/search/query/QueryStringIT.java
+++ b/core/src/test/java/org/elasticsearch/search/query/QueryStringIT.java
@ -242,16 +242,24 @@ public class QueryStringIT extends ESIntegTestCase {
    }


-    @LuceneTestCase.AwaitsFix(bugUrl="currently can't perform phrase queries on fields that don't support positions")
    public void testPhraseQueryOnFieldWithNoPositions() throws Exception {
        List<IndexRequestBuilder> reqs = new ArrayList<>();
        reqs.add(client().prepareIndex("test", "doc", "1").setSource("f1", "foo bar", "f4", "eggplant parmesan"));
        reqs.add(client().prepareIndex("test", "doc", "2").setSource("f1", "foo bar", "f4", "chicken parmesan"));
        indexRandom(true, false, reqs);

-        SearchResponse resp = client().prepareSearch("test").setQuery(queryStringQuery("\"eggplant parmesan\"")).get();
-        assertHits(resp.getHits(), "1");
-        assertHitCount(resp, 1L);
+        SearchResponse resp = client().prepareSearch("test")
+            .setQuery(queryStringQuery("\"eggplant parmesan\"").lenient(true)).get();
+        assertHitCount(resp, 0L);
+
+        Exception exc = expectThrows(Exception.class,
+            () -> client().prepareSearch("test").setQuery(
+                queryStringQuery("f4:\"eggplant parmesan\"").lenient(false)
+            ).get()
+        );
+        IllegalStateException ise = (IllegalStateException) ExceptionsHelper.unwrap(exc, IllegalStateException.class);
+        assertNotNull(ise);
+        assertThat(ise.getMessage(), containsString("field:[f4] was indexed without position data; cannot run PhraseQuery"));
    }

    public void testBooleanStrictQuery() throws Exception {
--- a/core/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java
+++ b/core/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java
@ -177,8 +177,8 @@ public class SearchQueryIT extends ESIntegTestCase {
        assertHitCount(searchResponse, 1L);

        assertFailures(client().prepareSearch().setQuery(matchQuery("field1", "quick brown").type(Type.PHRASE).slop(0)),
-                    RestStatus.INTERNAL_SERVER_ERROR,
-                    containsString("field \"field1\" was indexed without position data; cannot run PhraseQuery"));
+                    RestStatus.BAD_REQUEST,
+                    containsString("field:[field1] was indexed without position data; cannot run PhraseQuery"));
    }

    // see #3521