Highlighters skip ignored keyword values (#53408) (#53604)

Keyword field values with length more than ignore_above are not
indexed. But highlighters still were retrieving these values
from _source and were trying to highlight them. This sometimes lead to
errors if a field length exceeded  max_analyzed_offset. But also this
is an overall wrong behaviour to attempt to highlight something that was
ignored during indexing.

This PR checks if a keyword value was ignored because of its length,
and if yes, skips highlighting it.

Backport: #53408
Closes #43800
This commit is contained in:
Mayya Sharipova 2020-03-16 11:06:25 -04:00 committed by GitHub
parent 376b2ae735
commit a906f8a0e4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 101 additions and 7 deletions

View File

@ -73,3 +73,13 @@ The listener thread pool is no longer used internally by Elasticsearch.
Therefore, these settings have been deprecated. You can safely remove these
settings from the configuration of your nodes.
[discrete]
[[breaking_77_highlighters_changes]]
=== Highlighters changes
[discrete]
==== Ignored keyword values are no longer highlighted
If a keyword value was ignored during indexing because of its length
(`ignore_above` parameter was applied), {es} doesn't attempt to
highlight it anymore, which means no highlights are produced for
ignored values.

View File

@ -0,0 +1,64 @@
---
setup:
- do:
indices.create:
index: test-index
body:
mappings:
"properties":
"k1":
"type": "keyword"
"k2":
"type": "keyword"
"ignore_above": 3
- do:
bulk:
index: test-index
refresh: true
body:
- '{"index": {"_id": "1"}}'
- '{"k1": "123", "k2" : "123"}'
- '{"index": {"_id": "2"}}'
- '{"k1": "1234", "k2" : "1234"}'
---
"Plain Highligher should skip highlighting ignored keyword values":
- skip:
version: " - 7.6.99"
reason: "skip highlighting of ignored values was introduced in 7.7"
- do:
search:
index: test-index
body:
query:
prefix:
k1: "12"
highlight:
require_field_match: false
fields:
k2:
type: plain
- match: {hits.hits.0.highlight.k2.0: "<em>123</em>"}
- is_false: hits.hits.1.highlight # no highlight for a value that was ignored
---
"Unified Highligher should skip highlighting ignored keyword values":
- skip:
version: " - 7.6.99"
reason: "skip highlighting of ignored values was introduced in 7.7"
- do:
search:
index: test-index
body:
query:
prefix:
k1: "12"
highlight:
require_field_match: false
fields:
k2:
type: unified
- match: {hits.hits.0.highlight.k2.0: "<em>123</em>"}
- is_false: hits.hits.1.highlight # no highlight for a value that was ignored

View File

@ -314,8 +314,7 @@ public final class KeywordFieldMapper extends FieldMapper {
/** Values that have more chars than the return value of this method will
* be skipped at parsing time. */
// pkg-private for testing
int ignoreAbove() {
public int ignoreAbove() {
return ignoreAbove;
}

View File

@ -36,6 +36,7 @@ import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
@ -102,6 +103,12 @@ public class PlainHighlighter implements Highlighter {
ArrayList<TextFragment> fragsList = new ArrayList<>();
List<Object> textsToHighlight;
Analyzer analyzer = context.getMapperService().documentMapper(hitContext.hit().getType()).mappers().indexAnalyzer();
Integer keywordIgnoreAbove = null;
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
KeywordFieldMapper mapper = (KeywordFieldMapper) context.getMapperService().documentMapper()
.mappers().getMapper(highlighterContext.fieldName);
keywordIgnoreAbove = mapper.ignoreAbove();
};
final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();
try {
@ -110,7 +117,11 @@ public class PlainHighlighter implements Highlighter {
for (Object textToHighlight : textsToHighlight) {
String text = convertFieldValue(fieldType, textToHighlight);
if (text.length() > maxAnalyzedOffset) {
int textLength = text.length();
if (keywordIgnoreAbove != null && textLength > keywordIgnoreAbove) {
continue; // skip highlighting keyword terms that were ignored during indexing
}
if (textLength > maxAnalyzedOffset) {
throw new IllegalArgumentException(
"The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() +
"] doc of [" + context.index().getName() + "] index " +

View File

@ -36,6 +36,7 @@ import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
@ -65,11 +66,16 @@ public class UnifiedHighlighter implements Highlighter {
FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();
Integer keywordIgnoreAbove = null;
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
KeywordFieldMapper mapper = (KeywordFieldMapper) context.getMapperService().documentMapper()
.mappers().getMapper(highlighterContext.fieldName);
keywordIgnoreAbove = mapper.ignoreAbove();
}
List<Snippet> snippets = new ArrayList<>();
int numberOfFragments = field.fieldOptions().numberOfFragments();
try {
final Analyzer analyzer = getAnalyzer(context.getMapperService().documentMapper(hitContext.hit().getType()),
hitContext);
List<Object> fieldValues = loadFieldValues(fieldType, field, context, hitContext,
@ -82,7 +88,11 @@ public class UnifiedHighlighter implements Highlighter {
final CustomUnifiedHighlighter highlighter;
final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
final OffsetSource offsetSource = getOffsetSource(fieldType);
if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValue.length() > maxAnalyzedOffset)) {
int fieldValueLength = fieldValue.length();
if (keywordIgnoreAbove != null && fieldValueLength > keywordIgnoreAbove) {
return null; // skip highlighting keyword terms that were ignored during indexing
}
if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValueLength > maxAnalyzedOffset)) {
throw new IllegalArgumentException(
"The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() +
"] doc of [" + context.index().getName() + "] index " + "has exceeded [" +