Fix highlighting on a stored keyword field (#21645)

* Fix highlighting on a stored keyword field

The highlighter converts stored keyword fields using toString().
Since the keyword fields are stored as utf8 bytes the conversion is broken.
This change uses BytesRef.utf8toString() to convert the field value in a valid string.

Fixes #21636

* Replace BytesRef#utf8ToString with MappedFieldType#valueForDisplay
This commit is contained in:
Jim Ferenczi 2016-11-21 10:29:30 +01:00 committed by GitHub
parent 6daeb56969
commit 90247446aa
2 changed files with 30 additions and 3 deletions

View File

@ -31,6 +31,7 @@ import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.ExceptionsHelper;
@ -106,7 +107,12 @@ public class PlainHighlighter implements Highlighter {
textsToHighlight = HighlightUtils.loadFieldValues(field, mapper, context, hitContext);
for (Object textToHighlight : textsToHighlight) {
String text = textToHighlight.toString();
String text;
if (textToHighlight instanceof BytesRef) {
text = mapper.fieldType().valueForDisplay(textToHighlight).toString();
} else {
text = textToHighlight.toString();
}
try (TokenStream tokenStream = analyzer.tokenStream(mapper.fieldType().name(), text)) {
if (!tokenStream.hasAttribute(CharTermAttribute.class) || !tokenStream.hasAttribute(OffsetAttribute.class)) {

View File

@ -24,7 +24,6 @@ import org.elasticsearch.Version;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.action.support.WriteRequest;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.geo.GeoPoint;
@ -41,7 +40,6 @@ import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
import org.elasticsearch.index.search.MatchQuery;
import org.elasticsearch.indices.IndicesRequestCache;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.search.SearchHit;
@ -106,6 +104,29 @@ public class HighlighterSearchIT extends ESIntegTestCase {
return Collections.singletonList(InternalSettingsPlugin.class);
}
public void testHighlightingWithStoredKeyword() throws IOException {
XContentBuilder mappings = jsonBuilder();
mappings.startObject();
mappings.startObject("type")
.startObject("properties")
.startObject("text")
.field("type", "keyword")
.field("store", true)
.endObject()
.endObject()
.endObject();
mappings.endObject();
assertAcked(prepareCreate("test")
.addMapping("type", mappings));
client().prepareIndex("test", "type", "1")
.setSource(jsonBuilder().startObject().field("text", "foo").endObject())
.get();
refresh();
SearchResponse search = client().prepareSearch().setQuery(matchQuery("text", "foo"))
.highlighter(new HighlightBuilder().field(new Field("text"))).get();
assertHighlight(search, 0, "text", 0, equalTo("<em>foo</em>"));
}
public void testHighlightingWithWildcardName() throws IOException {
// test the kibana case with * as fieldname that will try highlight all fields including meta fields
XContentBuilder mappings = jsonBuilder();