Unified highlighter should respect no_match_size with number_of_fragments set to 0 (#41069)
The unified highlighter returns the first sentence of the text when number_of_fragments is set to 0 (full highlighting). This is a legacy of the removed postings highlighter that was based on sentence break only. This commit changes this behavior in order to respect the provided no_match_size value when number_of_fragments is set to 0. This means that the behavior will be consistent for any value of the number_of_fragments option. Closes #41066
This commit is contained in:
parent
9bf8bd40ae
commit
043c1f5d42
|
@ -124,8 +124,6 @@ public class UnifiedHighlighter implements Highlighter {
|
|||
"Failed to highlight field [" + highlighterContext.fieldName + "]", e);
|
||||
}
|
||||
|
||||
snippets = filterSnippets(snippets, field.fieldOptions().numberOfFragments());
|
||||
|
||||
if (field.fieldOptions().scoreOrdered()) {
|
||||
//let's sort the snippets by score if needed
|
||||
CollectionUtil.introSort(snippets, (o1, o2) -> Double.compare(o2.getScore(), o1.getScore()));
|
||||
|
@ -185,41 +183,6 @@ public class UnifiedHighlighter implements Highlighter {
|
|||
}
|
||||
}
|
||||
|
||||
protected static List<Snippet> filterSnippets(List<Snippet> snippets, int numberOfFragments) {
|
||||
|
||||
//We need to filter the snippets as due to no_match_size we could have
|
||||
//either highlighted snippets or non highlighted ones and we don't want to mix those up
|
||||
List<Snippet> filteredSnippets = new ArrayList<>(snippets.size());
|
||||
for (Snippet snippet : snippets) {
|
||||
if (snippet.isHighlighted()) {
|
||||
filteredSnippets.add(snippet);
|
||||
}
|
||||
}
|
||||
|
||||
//if there's at least one highlighted snippet, we return all the highlighted ones
|
||||
//otherwise we return the first non highlighted one if available
|
||||
if (filteredSnippets.size() == 0) {
|
||||
if (snippets.size() > 0) {
|
||||
Snippet snippet = snippets.get(0);
|
||||
//if we tried highlighting the whole content using whole break iterator (as number_of_fragments was 0)
|
||||
//we need to return the first sentence of the content rather than the whole content
|
||||
if (numberOfFragments == 0) {
|
||||
BreakIterator bi = BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||
String text = snippet.getText();
|
||||
bi.setText(text);
|
||||
int next = bi.next();
|
||||
if (next != BreakIterator.DONE) {
|
||||
String newText = text.substring(0, next).trim();
|
||||
snippet = new Snippet(newText, snippet.getScore(), snippet.isHighlighted());
|
||||
}
|
||||
}
|
||||
filteredSnippets.add(snippet);
|
||||
}
|
||||
}
|
||||
|
||||
return filteredSnippets;
|
||||
}
|
||||
|
||||
protected static String convertFieldValue(MappedFieldType type, Object value) {
|
||||
if (value instanceof BytesRef) {
|
||||
return type.valueForDisplay(value).toString();
|
||||
|
|
|
@ -1715,9 +1715,11 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
|||
assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some"));
|
||||
|
||||
// We can also ask for a fragment longer than the input string and get the whole string
|
||||
field.highlighterType("plain").noMatchSize(text.length() * 2);
|
||||
response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get();
|
||||
assertHighlight(response, 0, "text", 0, 1, equalTo(text));
|
||||
for (String type : new String[] { "plain", "unified" }) {
|
||||
field.highlighterType(type).noMatchSize(text.length() * 2).numOfFragments(0);
|
||||
response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get();
|
||||
assertHighlight(response, 0, "text", 0, 1, equalTo(text));
|
||||
}
|
||||
|
||||
field.highlighterType("fvh");
|
||||
response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get();
|
||||
|
|
Loading…
Reference in New Issue