Highlighters skip ignored keyword values (#53408) (#53604)

Keyword field values with length more than ignore_above are not indexed. But highlighters still were retrieving these values from _source and were trying to highlight them. This sometimes lead to errors if a field length exceeded max_analyzed_offset. But also this is an overall wrong behaviour to attempt to highlight something that was ignored during indexing. This PR checks if a keyword value was ignored because of its length, and if yes, skips highlighting it. Backport: #53408 Closes #43800
2020-03-16 11:06:25 -04:00 · 2020-03-16 11:06:25 -04:00 · a906f8a0e4
parent 376b2ae735
commit a906f8a0e4
5 changed files with 101 additions and 7 deletions
--- a/docs/reference/migration/migrate_7_7.asciidoc
+++ b/docs/reference/migration/migrate_7_7.asciidoc
@ -73,3 +73,13 @@ The listener thread pool is no longer used internally by Elasticsearch.
 Therefore, these settings have been deprecated. You can safely remove these
 settings from the configuration of your nodes.

+[discrete]
+[[breaking_77_highlighters_changes]]
+=== Highlighters changes
+
+[discrete]
+==== Ignored keyword values are no longer highlighted
+If a keyword value was ignored during indexing because of its length
+(`ignore_above` parameter was applied), {es} doesn't attempt to
+highlight it anymore, which means no highlights are produced for
+ignored values.
--- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/40_keyword_ignore.yml
+++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/40_keyword_ignore.yml
@ -0,0 +1,64 @@
+---
+setup:
+  - do:
+      indices.create:
+          index: test-index
+          body:
+            mappings:
+              "properties":
+                "k1":
+                  "type": "keyword"
+                "k2":
+                  "type": "keyword"
+                  "ignore_above": 3
+  - do:
+      bulk:
+        index: test-index
+        refresh: true
+        body:
+          - '{"index": {"_id": "1"}}'
+          - '{"k1": "123", "k2" : "123"}'
+          - '{"index": {"_id": "2"}}'
+          - '{"k1": "1234", "k2" : "1234"}'
+
+---
+"Plain Highligher should skip highlighting ignored keyword values":
+  - skip:
+      version: " - 7.6.99"
+      reason: "skip highlighting of ignored values was introduced in 7.7"
+  - do:
+      search:
+        index: test-index
+        body:
+          query:
+            prefix:
+              k1: "12"
+          highlight:
+            require_field_match: false
+            fields:
+              k2:
+                type: plain
+
+  - match: {hits.hits.0.highlight.k2.0: "<em>123</em>"}
+  - is_false: hits.hits.1.highlight # no highlight for a value that was ignored
+
+---
+"Unified Highligher should skip highlighting ignored keyword values":
+  - skip:
+      version: " - 7.6.99"
+      reason: "skip highlighting of ignored values was introduced in 7.7"
+  - do:
+      search:
+        index: test-index
+        body:
+          query:
+            prefix:
+              k1: "12"
+          highlight:
+            require_field_match: false
+            fields:
+              k2:
+                type: unified
+
+  - match: {hits.hits.0.highlight.k2.0: "<em>123</em>"}
+  - is_false: hits.hits.1.highlight # no highlight for a value that was ignored
--- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
@ -314,8 +314,7 @@ public final class KeywordFieldMapper extends FieldMapper {

    /** Values that have more chars than the return value of this method will
     *  be skipped at parsing time. */
-    // pkg-private for testing
-    int ignoreAbove() {
+    public int ignoreAbove() {
        return ignoreAbove;
    }

--- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java
@ -36,6 +36,7 @@ import org.apache.lucene.util.CollectionUtil;
 import org.elasticsearch.ExceptionsHelper;
 import org.elasticsearch.common.text.Text;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.query.QueryShardContext;
 import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
@ -102,6 +103,12 @@ public class PlainHighlighter implements Highlighter {
        ArrayList<TextFragment> fragsList = new ArrayList<>();
        List<Object> textsToHighlight;
        Analyzer analyzer = context.getMapperService().documentMapper(hitContext.hit().getType()).mappers().indexAnalyzer();
+        Integer keywordIgnoreAbove = null;
+        if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
+            KeywordFieldMapper mapper = (KeywordFieldMapper) context.getMapperService().documentMapper()
+                .mappers().getMapper(highlighterContext.fieldName);
+            keywordIgnoreAbove = mapper.ignoreAbove();
+        };
        final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();

        try {
@ -110,7 +117,11 @@ public class PlainHighlighter implements Highlighter {

            for (Object textToHighlight : textsToHighlight) {
                String text = convertFieldValue(fieldType, textToHighlight);
-                if (text.length() > maxAnalyzedOffset) {
+                int textLength = text.length();
+                if (keywordIgnoreAbove != null  && textLength > keywordIgnoreAbove) {
+                    continue; // skip highlighting keyword terms that were ignored during indexing
+                }
+                if (textLength > maxAnalyzedOffset) {
                    throw new IllegalArgumentException(
                        "The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() +
                            "] doc of [" + context.index().getName() + "] index " +
--- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java
+++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java
@ -36,6 +36,7 @@ import org.elasticsearch.common.text.Text;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.mapper.DocumentMapper;
 import org.elasticsearch.index.mapper.IdFieldMapper;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.query.QueryShardContext;
 import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
@ -65,11 +66,16 @@ public class UnifiedHighlighter implements Highlighter {
        FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
        Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
        final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();
+        Integer keywordIgnoreAbove = null;
+        if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
+            KeywordFieldMapper mapper = (KeywordFieldMapper) context.getMapperService().documentMapper()
+                .mappers().getMapper(highlighterContext.fieldName);
+            keywordIgnoreAbove = mapper.ignoreAbove();
+        }

        List<Snippet> snippets = new ArrayList<>();
        int numberOfFragments = field.fieldOptions().numberOfFragments();
        try {
-
            final Analyzer analyzer = getAnalyzer(context.getMapperService().documentMapper(hitContext.hit().getType()),
                    hitContext);
            List<Object> fieldValues = loadFieldValues(fieldType, field, context, hitContext,
@ -82,7 +88,11 @@ public class UnifiedHighlighter implements Highlighter {
            final CustomUnifiedHighlighter highlighter;
            final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
            final OffsetSource offsetSource = getOffsetSource(fieldType);
-            if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValue.length() > maxAnalyzedOffset)) {
+            int fieldValueLength = fieldValue.length();
+            if (keywordIgnoreAbove != null  && fieldValueLength > keywordIgnoreAbove) {
+                return null; // skip highlighting keyword terms that were ignored during indexing
+            }
+            if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValueLength > maxAnalyzedOffset)) {
                throw new IllegalArgumentException(
                    "The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() +
                        "] doc of [" + context.index().getName() + "] index " + "has exceeded [" +