Fix highlighting on a keyword field that defines a normalizer (#27604)
* Fix highlighting on a keyword field that defines a normalizer The `plain` and sometimes the `unified` highlighters need to re-analyze the content to highlight a field This change makes sure that we don't ignore the normalizer defined on the keyword field for this analysis.
This commit is contained in:
parent
17a2d574de
commit
da50fa4540
|
@ -31,7 +31,6 @@ import org.apache.lucene.search.highlight.SimpleFragmenter;
|
||||||
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
|
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
|
||||||
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
|
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
|
||||||
import org.apache.lucene.search.highlight.TextFragment;
|
import org.apache.lucene.search.highlight.TextFragment;
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.BytesRefHash;
|
import org.apache.lucene.util.BytesRefHash;
|
||||||
import org.apache.lucene.util.CollectionUtil;
|
import org.apache.lucene.util.CollectionUtil;
|
||||||
import org.elasticsearch.ExceptionsHelper;
|
import org.elasticsearch.ExceptionsHelper;
|
||||||
|
@ -48,6 +47,9 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import static org.elasticsearch.search.fetch.subphase.highlight.UnifiedHighlighter.convertFieldValue;
|
||||||
|
import static org.elasticsearch.search.fetch.subphase.highlight.UnifiedHighlighter.getAnalyzer;
|
||||||
|
|
||||||
public class PlainHighlighter implements Highlighter {
|
public class PlainHighlighter implements Highlighter {
|
||||||
private static final String CACHE_KEY = "highlight-plain";
|
private static final String CACHE_KEY = "highlight-plain";
|
||||||
|
|
||||||
|
@ -100,18 +102,12 @@ public class PlainHighlighter implements Highlighter {
|
||||||
int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? 1 : field.fieldOptions().numberOfFragments();
|
int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? 1 : field.fieldOptions().numberOfFragments();
|
||||||
ArrayList<TextFragment> fragsList = new ArrayList<>();
|
ArrayList<TextFragment> fragsList = new ArrayList<>();
|
||||||
List<Object> textsToHighlight;
|
List<Object> textsToHighlight;
|
||||||
Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().getType()).mappers().indexAnalyzer();
|
Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), mapper.fieldType());
|
||||||
|
|
||||||
try {
|
try {
|
||||||
textsToHighlight = HighlightUtils.loadFieldValues(field, mapper, context, hitContext);
|
textsToHighlight = HighlightUtils.loadFieldValues(field, mapper, context, hitContext);
|
||||||
|
|
||||||
for (Object textToHighlight : textsToHighlight) {
|
for (Object textToHighlight : textsToHighlight) {
|
||||||
String text;
|
String text = convertFieldValue(mapper.fieldType(), textToHighlight);
|
||||||
if (textToHighlight instanceof BytesRef) {
|
|
||||||
text = mapper.fieldType().valueForDisplay(textToHighlight).toString();
|
|
||||||
} else {
|
|
||||||
text = textToHighlight.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
try (TokenStream tokenStream = analyzer.tokenStream(mapper.fieldType().name(), text)) {
|
try (TokenStream tokenStream = analyzer.tokenStream(mapper.fieldType().name(), text)) {
|
||||||
if (!tokenStream.hasAttribute(CharTermAttribute.class) || !tokenStream.hasAttribute(OffsetAttribute.class)) {
|
if (!tokenStream.hasAttribute(CharTermAttribute.class) || !tokenStream.hasAttribute(OffsetAttribute.class)) {
|
||||||
|
|
|
@ -32,8 +32,11 @@ import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.CollectionUtil;
|
import org.apache.lucene.util.CollectionUtil;
|
||||||
import org.elasticsearch.common.Strings;
|
import org.elasticsearch.common.Strings;
|
||||||
import org.elasticsearch.common.text.Text;
|
import org.elasticsearch.common.text.Text;
|
||||||
|
import org.elasticsearch.index.mapper.DocumentMapper;
|
||||||
import org.elasticsearch.index.mapper.FieldMapper;
|
import org.elasticsearch.index.mapper.FieldMapper;
|
||||||
|
import org.elasticsearch.index.mapper.KeywordFieldMapper;
|
||||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||||
|
import org.elasticsearch.index.mapper.MapperService;
|
||||||
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
|
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
|
||||||
import org.elasticsearch.search.fetch.FetchSubPhase;
|
import org.elasticsearch.search.fetch.FetchSubPhase;
|
||||||
import org.elasticsearch.search.internal.SearchContext;
|
import org.elasticsearch.search.internal.SearchContext;
|
||||||
|
@ -50,8 +53,6 @@ import java.util.stream.Collectors;
|
||||||
import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
|
import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
|
||||||
|
|
||||||
public class UnifiedHighlighter implements Highlighter {
|
public class UnifiedHighlighter implements Highlighter {
|
||||||
private static final String CACHE_KEY = "highlight-unified";
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean canHighlight(FieldMapper fieldMapper) {
|
public boolean canHighlight(FieldMapper fieldMapper) {
|
||||||
return true;
|
return true;
|
||||||
|
@ -63,36 +64,20 @@ public class UnifiedHighlighter implements Highlighter {
|
||||||
SearchContextHighlight.Field field = highlighterContext.field;
|
SearchContextHighlight.Field field = highlighterContext.field;
|
||||||
SearchContext context = highlighterContext.context;
|
SearchContext context = highlighterContext.context;
|
||||||
FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
|
FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
|
||||||
|
Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
|
||||||
if (!hitContext.cache().containsKey(CACHE_KEY)) {
|
CustomPassageFormatter passageFormatter = new CustomPassageFormatter(field.fieldOptions().preTags()[0],
|
||||||
hitContext.cache().put(CACHE_KEY, new HighlighterEntry());
|
field.fieldOptions().postTags()[0], encoder);
|
||||||
}
|
|
||||||
|
|
||||||
HighlighterEntry highlighterEntry = (HighlighterEntry) hitContext.cache().get(CACHE_KEY);
|
|
||||||
MapperHighlighterEntry mapperHighlighterEntry = highlighterEntry.mappers.get(fieldMapper);
|
|
||||||
|
|
||||||
if (mapperHighlighterEntry == null) {
|
|
||||||
Encoder encoder = field.fieldOptions().encoder().equals("html") ?
|
|
||||||
HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
|
|
||||||
CustomPassageFormatter passageFormatter =
|
|
||||||
new CustomPassageFormatter(field.fieldOptions().preTags()[0],
|
|
||||||
field.fieldOptions().postTags()[0], encoder);
|
|
||||||
mapperHighlighterEntry = new MapperHighlighterEntry(passageFormatter);
|
|
||||||
}
|
|
||||||
|
|
||||||
List<Snippet> snippets = new ArrayList<>();
|
List<Snippet> snippets = new ArrayList<>();
|
||||||
int numberOfFragments;
|
int numberOfFragments;
|
||||||
try {
|
try {
|
||||||
Analyzer analyzer =
|
|
||||||
context.mapperService().documentMapper(hitContext.hit().getType()).mappers().indexAnalyzer();
|
final Analyzer analyzer =
|
||||||
|
getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldMapper.fieldType());
|
||||||
List<Object> fieldValues = HighlightUtils.loadFieldValues(field, fieldMapper, context, hitContext);
|
List<Object> fieldValues = HighlightUtils.loadFieldValues(field, fieldMapper, context, hitContext);
|
||||||
fieldValues = fieldValues.stream().map(obj -> {
|
fieldValues = fieldValues.stream()
|
||||||
if (obj instanceof BytesRef) {
|
.map((s) -> convertFieldValue(fieldMapper.fieldType(), s))
|
||||||
return fieldMapper.fieldType().valueForDisplay(obj).toString();
|
.collect(Collectors.toList());
|
||||||
} else {
|
|
||||||
return obj;
|
|
||||||
}
|
|
||||||
}).collect(Collectors.toList());
|
|
||||||
final IndexSearcher searcher = new IndexSearcher(hitContext.reader());
|
final IndexSearcher searcher = new IndexSearcher(hitContext.reader());
|
||||||
final CustomUnifiedHighlighter highlighter;
|
final CustomUnifiedHighlighter highlighter;
|
||||||
final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
|
final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
|
||||||
|
@ -102,15 +87,14 @@ public class UnifiedHighlighter implements Highlighter {
|
||||||
// breaks the text on, so we don't lose the distinction between the different values of a field and we
|
// breaks the text on, so we don't lose the distinction between the different values of a field and we
|
||||||
// get back a snippet per value
|
// get back a snippet per value
|
||||||
CustomSeparatorBreakIterator breakIterator = new CustomSeparatorBreakIterator(MULTIVAL_SEP_CHAR);
|
CustomSeparatorBreakIterator breakIterator = new CustomSeparatorBreakIterator(MULTIVAL_SEP_CHAR);
|
||||||
highlighter = new CustomUnifiedHighlighter(searcher, analyzer, offsetSource,
|
highlighter = new CustomUnifiedHighlighter(searcher, analyzer, offsetSource, passageFormatter,
|
||||||
mapperHighlighterEntry.passageFormatter, field.fieldOptions().boundaryScannerLocale(),
|
field.fieldOptions().boundaryScannerLocale(), breakIterator, fieldValue, field.fieldOptions().noMatchSize());
|
||||||
breakIterator, fieldValue, field.fieldOptions().noMatchSize());
|
|
||||||
numberOfFragments = fieldValues.size(); // we are highlighting the whole content, one snippet per value
|
numberOfFragments = fieldValues.size(); // we are highlighting the whole content, one snippet per value
|
||||||
} else {
|
} else {
|
||||||
//using paragraph separator we make sure that each field value holds a discrete passage for highlighting
|
//using paragraph separator we make sure that each field value holds a discrete passage for highlighting
|
||||||
BreakIterator bi = getBreakIterator(field);
|
BreakIterator bi = getBreakIterator(field);
|
||||||
highlighter = new CustomUnifiedHighlighter(searcher, analyzer, offsetSource,
|
highlighter = new CustomUnifiedHighlighter(searcher, analyzer, offsetSource, passageFormatter,
|
||||||
mapperHighlighterEntry.passageFormatter, field.fieldOptions().boundaryScannerLocale(), bi,
|
field.fieldOptions().boundaryScannerLocale(), bi,
|
||||||
fieldValue, field.fieldOptions().noMatchSize());
|
fieldValue, field.fieldOptions().noMatchSize());
|
||||||
numberOfFragments = field.fieldOptions().numberOfFragments();
|
numberOfFragments = field.fieldOptions().numberOfFragments();
|
||||||
}
|
}
|
||||||
|
@ -210,6 +194,24 @@ public class UnifiedHighlighter implements Highlighter {
|
||||||
return filteredSnippets;
|
return filteredSnippets;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) {
|
||||||
|
if (type instanceof KeywordFieldMapper.KeywordFieldType) {
|
||||||
|
KeywordFieldMapper.KeywordFieldType keywordFieldType = (KeywordFieldMapper.KeywordFieldType) type;
|
||||||
|
if (keywordFieldType.normalizer() != null) {
|
||||||
|
return keywordFieldType.normalizer();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return docMapper.mappers().indexAnalyzer();
|
||||||
|
}
|
||||||
|
|
||||||
|
static String convertFieldValue(MappedFieldType type, Object value) {
|
||||||
|
if (value instanceof BytesRef) {
|
||||||
|
return type.valueForDisplay(value).toString();
|
||||||
|
} else {
|
||||||
|
return value.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static String mergeFieldValues(List<Object> fieldValues, char valuesSeparator) {
|
private static String mergeFieldValues(List<Object> fieldValues, char valuesSeparator) {
|
||||||
//postings highlighter accepts all values in a single string, as offsets etc. need to match with content
|
//postings highlighter accepts all values in a single string, as offsets etc. need to match with content
|
||||||
//loaded from stored fields, we merge all values using a proper separator
|
//loaded from stored fields, we merge all values using a proper separator
|
||||||
|
@ -226,17 +228,4 @@ public class UnifiedHighlighter implements Highlighter {
|
||||||
}
|
}
|
||||||
return OffsetSource.ANALYSIS;
|
return OffsetSource.ANALYSIS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static class HighlighterEntry {
|
|
||||||
Map<FieldMapper, MapperHighlighterEntry> mappers = new HashMap<>();
|
|
||||||
}
|
|
||||||
|
|
||||||
private static class MapperHighlighterEntry {
|
|
||||||
final CustomPassageFormatter passageFormatter;
|
|
||||||
|
|
||||||
private MapperHighlighterEntry(CustomPassageFormatter passageFormatter) {
|
|
||||||
this.passageFormatter = passageFormatter;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2915,4 +2915,32 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
assertThat(field.getFragments()[0].string(), equalTo("<em>brown</em>"));
|
assertThat(field.getFragments()[0].string(), equalTo("<em>brown</em>"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testWithNormalizer() throws Exception {
|
||||||
|
Builder builder = Settings.builder()
|
||||||
|
.put(indexSettings())
|
||||||
|
.putList("index.analysis.normalizer.my_normalizer.filter", "lowercase");
|
||||||
|
|
||||||
|
assertAcked(prepareCreate("test").setSettings(builder.build())
|
||||||
|
.addMapping("doc", "keyword",
|
||||||
|
"type=keyword,normalizer=my_normalizer"));
|
||||||
|
ensureGreen();
|
||||||
|
|
||||||
|
client().prepareIndex("test", "doc", "0")
|
||||||
|
.setSource("keyword", "Hello World")
|
||||||
|
.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE)
|
||||||
|
.get();
|
||||||
|
|
||||||
|
for (String highlighterType : new String[] {"unified", "plain"}) {
|
||||||
|
SearchResponse searchResponse = client().prepareSearch()
|
||||||
|
.setQuery(matchQuery("keyword", "hello world"))
|
||||||
|
.highlighter(new HighlightBuilder()
|
||||||
|
.field(new Field("keyword").highlighterType(highlighterType)))
|
||||||
|
.get();
|
||||||
|
assertHitCount(searchResponse, 1);
|
||||||
|
HighlightField field = searchResponse.getHits().getAt(0).getHighlightFields().get("keyword");
|
||||||
|
assertThat(field.getFragments().length, equalTo(1));
|
||||||
|
assertThat(field.getFragments()[0].string(), equalTo("<em>Hello World</em>"));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue