Keyword field values with length more than ignore_above are not indexed. But highlighters still were retrieving these values from _source and were trying to highlight them. This sometimes lead to errors if a field length exceeded max_analyzed_offset. But also this is an overall wrong behaviour to attempt to highlight something that was ignored during indexing. This PR checks if a keyword value was ignored because of its length, and if yes, skips highlighting it. Backport: #53408 Closes #43800
This commit is contained in:
parent
376b2ae735
commit
a906f8a0e4
|
@ -73,3 +73,13 @@ The listener thread pool is no longer used internally by Elasticsearch.
|
|||
Therefore, these settings have been deprecated. You can safely remove these
|
||||
settings from the configuration of your nodes.
|
||||
|
||||
[discrete]
|
||||
[[breaking_77_highlighters_changes]]
|
||||
=== Highlighters changes
|
||||
|
||||
[discrete]
|
||||
==== Ignored keyword values are no longer highlighted
|
||||
If a keyword value was ignored during indexing because of its length
|
||||
(`ignore_above` parameter was applied), {es} doesn't attempt to
|
||||
highlight it anymore, which means no highlights are produced for
|
||||
ignored values.
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
---
|
||||
setup:
|
||||
- do:
|
||||
indices.create:
|
||||
index: test-index
|
||||
body:
|
||||
mappings:
|
||||
"properties":
|
||||
"k1":
|
||||
"type": "keyword"
|
||||
"k2":
|
||||
"type": "keyword"
|
||||
"ignore_above": 3
|
||||
- do:
|
||||
bulk:
|
||||
index: test-index
|
||||
refresh: true
|
||||
body:
|
||||
- '{"index": {"_id": "1"}}'
|
||||
- '{"k1": "123", "k2" : "123"}'
|
||||
- '{"index": {"_id": "2"}}'
|
||||
- '{"k1": "1234", "k2" : "1234"}'
|
||||
|
||||
---
|
||||
"Plain Highligher should skip highlighting ignored keyword values":
|
||||
- skip:
|
||||
version: " - 7.6.99"
|
||||
reason: "skip highlighting of ignored values was introduced in 7.7"
|
||||
- do:
|
||||
search:
|
||||
index: test-index
|
||||
body:
|
||||
query:
|
||||
prefix:
|
||||
k1: "12"
|
||||
highlight:
|
||||
require_field_match: false
|
||||
fields:
|
||||
k2:
|
||||
type: plain
|
||||
|
||||
- match: {hits.hits.0.highlight.k2.0: "<em>123</em>"}
|
||||
- is_false: hits.hits.1.highlight # no highlight for a value that was ignored
|
||||
|
||||
---
|
||||
"Unified Highligher should skip highlighting ignored keyword values":
|
||||
- skip:
|
||||
version: " - 7.6.99"
|
||||
reason: "skip highlighting of ignored values was introduced in 7.7"
|
||||
- do:
|
||||
search:
|
||||
index: test-index
|
||||
body:
|
||||
query:
|
||||
prefix:
|
||||
k1: "12"
|
||||
highlight:
|
||||
require_field_match: false
|
||||
fields:
|
||||
k2:
|
||||
type: unified
|
||||
|
||||
- match: {hits.hits.0.highlight.k2.0: "<em>123</em>"}
|
||||
- is_false: hits.hits.1.highlight # no highlight for a value that was ignored
|
|
@ -314,8 +314,7 @@ public final class KeywordFieldMapper extends FieldMapper {
|
|||
|
||||
/** Values that have more chars than the return value of this method will
|
||||
* be skipped at parsing time. */
|
||||
// pkg-private for testing
|
||||
int ignoreAbove() {
|
||||
public int ignoreAbove() {
|
||||
return ignoreAbove;
|
||||
}
|
||||
|
||||
|
|
|
@ -36,6 +36,7 @@ import org.apache.lucene.util.CollectionUtil;
|
|||
import org.elasticsearch.ExceptionsHelper;
|
||||
import org.elasticsearch.common.text.Text;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.mapper.KeywordFieldMapper;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.query.QueryShardContext;
|
||||
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
|
||||
|
@ -102,6 +103,12 @@ public class PlainHighlighter implements Highlighter {
|
|||
ArrayList<TextFragment> fragsList = new ArrayList<>();
|
||||
List<Object> textsToHighlight;
|
||||
Analyzer analyzer = context.getMapperService().documentMapper(hitContext.hit().getType()).mappers().indexAnalyzer();
|
||||
Integer keywordIgnoreAbove = null;
|
||||
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
|
||||
KeywordFieldMapper mapper = (KeywordFieldMapper) context.getMapperService().documentMapper()
|
||||
.mappers().getMapper(highlighterContext.fieldName);
|
||||
keywordIgnoreAbove = mapper.ignoreAbove();
|
||||
};
|
||||
final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();
|
||||
|
||||
try {
|
||||
|
@ -110,7 +117,11 @@ public class PlainHighlighter implements Highlighter {
|
|||
|
||||
for (Object textToHighlight : textsToHighlight) {
|
||||
String text = convertFieldValue(fieldType, textToHighlight);
|
||||
if (text.length() > maxAnalyzedOffset) {
|
||||
int textLength = text.length();
|
||||
if (keywordIgnoreAbove != null && textLength > keywordIgnoreAbove) {
|
||||
continue; // skip highlighting keyword terms that were ignored during indexing
|
||||
}
|
||||
if (textLength > maxAnalyzedOffset) {
|
||||
throw new IllegalArgumentException(
|
||||
"The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() +
|
||||
"] doc of [" + context.index().getName() + "] index " +
|
||||
|
|
|
@ -36,6 +36,7 @@ import org.elasticsearch.common.text.Text;
|
|||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.mapper.DocumentMapper;
|
||||
import org.elasticsearch.index.mapper.IdFieldMapper;
|
||||
import org.elasticsearch.index.mapper.KeywordFieldMapper;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.query.QueryShardContext;
|
||||
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
|
||||
|
@ -65,11 +66,16 @@ public class UnifiedHighlighter implements Highlighter {
|
|||
FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
|
||||
Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
|
||||
final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();
|
||||
Integer keywordIgnoreAbove = null;
|
||||
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
|
||||
KeywordFieldMapper mapper = (KeywordFieldMapper) context.getMapperService().documentMapper()
|
||||
.mappers().getMapper(highlighterContext.fieldName);
|
||||
keywordIgnoreAbove = mapper.ignoreAbove();
|
||||
}
|
||||
|
||||
List<Snippet> snippets = new ArrayList<>();
|
||||
int numberOfFragments = field.fieldOptions().numberOfFragments();
|
||||
try {
|
||||
|
||||
final Analyzer analyzer = getAnalyzer(context.getMapperService().documentMapper(hitContext.hit().getType()),
|
||||
hitContext);
|
||||
List<Object> fieldValues = loadFieldValues(fieldType, field, context, hitContext,
|
||||
|
@ -82,7 +88,11 @@ public class UnifiedHighlighter implements Highlighter {
|
|||
final CustomUnifiedHighlighter highlighter;
|
||||
final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
|
||||
final OffsetSource offsetSource = getOffsetSource(fieldType);
|
||||
if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValue.length() > maxAnalyzedOffset)) {
|
||||
int fieldValueLength = fieldValue.length();
|
||||
if (keywordIgnoreAbove != null && fieldValueLength > keywordIgnoreAbove) {
|
||||
return null; // skip highlighting keyword terms that were ignored during indexing
|
||||
}
|
||||
if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValueLength > maxAnalyzedOffset)) {
|
||||
throw new IllegalArgumentException(
|
||||
"The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() +
|
||||
"] doc of [" + context.index().getName() + "] index " + "has exceeded [" +
|
||||
|
|
Loading…
Reference in New Issue