Unified highlighter should ignore terms that targets the _id field (#41275)

The `_id` field uses a binary encoding to index terms that is not compatible with
the utf8 automaton that the unified highlighter creates to reanalyze the input.
For these reason this commit ignores terms that target the `_id` field when
`require_field_match` is set to false.

Closes #37525
This commit is contained in:
Jim Ferenczi 2019-04-18 22:30:51 +02:00 committed by jimczi
parent 068f8ba223
commit 754037b71e
2 changed files with 27 additions and 1 deletions

View File

@ -35,6 +35,7 @@ import org.elasticsearch.common.Strings;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
import org.elasticsearch.search.fetch.FetchSubPhase;
@ -109,7 +110,9 @@ public class UnifiedHighlighter implements Highlighter {
final String fieldName = highlighterContext.fieldName;
highlighter.setFieldMatcher((name) -> fieldName.equals(name));
} else {
highlighter.setFieldMatcher((name) -> true);
// ignore terms that targets the _id field since they use a different encoding
// that is not compatible with utf8
highlighter.setFieldMatcher(name -> IdFieldMapper.NAME.equals(name) == false);
}
Snippet[] fieldSnippets = highlighter.highlightField(highlighterContext.fieldName,

View File

@ -2947,6 +2947,29 @@ public class HighlighterSearchIT extends ESIntegTestCase {
}
}
public void testDisableHighlightIdField() throws Exception {
assertAcked(prepareCreate("test")
.addMapping("doc", "keyword", "type=keyword"));
ensureGreen();
client().prepareIndex("test", "doc", "d33f85bf1e51e84d9ab38948db9f3a068e1fe5294f1d8603914ac8c7bcc39ca1")
.setSource("keyword", "Hello World")
.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE)
.get();
for (String highlighterType : new String[] {"plain", "unified"}) {
SearchResponse searchResponse = client().prepareSearch()
.setQuery(matchQuery("_id", "d33f85bf1e51e84d9ab38948db9f3a068e1fe5294f1d8603914ac8c7bcc39ca1"))
.highlighter(
new HighlightBuilder().field(new Field("*")
.highlighterType(highlighterType).requireFieldMatch(false))
)
.get();
assertHitCount(searchResponse, 1);
assertNull(searchResponse.getHits().getAt(0).getHighlightFields().get("_id"));
}
}
public static class MockAnalysisPlugin extends Plugin implements AnalysisPlugin {
public final class MockSnowBall extends TokenFilter {