Unified highlighter should respect no_match_size with number_of_fragments set to 0 (#41069)

The unified highlighter returns the first sentence of the text when number_of_fragments
is set to 0 (full highlighting). This is a legacy of the removed postings highlighter
that was based on sentence break only. This commit changes this behavior in order
to respect the provided no_match_size value when number_of_fragments is set to 0.
This means that the behavior will be consistent for any value of the number_of_fragments option.

Closes #41066
This commit is contained in:
Jim Ferenczi 2019-04-16 18:16:55 +02:00 committed by jimczi
parent 9bf8bd40ae
commit 043c1f5d42
2 changed files with 5 additions and 40 deletions

View File

@ -124,8 +124,6 @@ public class UnifiedHighlighter implements Highlighter {
"Failed to highlight field [" + highlighterContext.fieldName + "]", e);
}
snippets = filterSnippets(snippets, field.fieldOptions().numberOfFragments());
if (field.fieldOptions().scoreOrdered()) {
//let's sort the snippets by score if needed
CollectionUtil.introSort(snippets, (o1, o2) -> Double.compare(o2.getScore(), o1.getScore()));
@ -185,41 +183,6 @@ public class UnifiedHighlighter implements Highlighter {
}
}
protected static List<Snippet> filterSnippets(List<Snippet> snippets, int numberOfFragments) {
//We need to filter the snippets as due to no_match_size we could have
//either highlighted snippets or non highlighted ones and we don't want to mix those up
List<Snippet> filteredSnippets = new ArrayList<>(snippets.size());
for (Snippet snippet : snippets) {
if (snippet.isHighlighted()) {
filteredSnippets.add(snippet);
}
}
//if there's at least one highlighted snippet, we return all the highlighted ones
//otherwise we return the first non highlighted one if available
if (filteredSnippets.size() == 0) {
if (snippets.size() > 0) {
Snippet snippet = snippets.get(0);
//if we tried highlighting the whole content using whole break iterator (as number_of_fragments was 0)
//we need to return the first sentence of the content rather than the whole content
if (numberOfFragments == 0) {
BreakIterator bi = BreakIterator.getSentenceInstance(Locale.ROOT);
String text = snippet.getText();
bi.setText(text);
int next = bi.next();
if (next != BreakIterator.DONE) {
String newText = text.substring(0, next).trim();
snippet = new Snippet(newText, snippet.getScore(), snippet.isHighlighted());
}
}
filteredSnippets.add(snippet);
}
}
return filteredSnippets;
}
protected static String convertFieldValue(MappedFieldType type, Object value) {
if (value instanceof BytesRef) {
return type.valueForDisplay(value).toString();

View File

@ -1715,9 +1715,11 @@ public class HighlighterSearchIT extends ESIntegTestCase {
assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some"));
// We can also ask for a fragment longer than the input string and get the whole string
field.highlighterType("plain").noMatchSize(text.length() * 2);
response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get();
assertHighlight(response, 0, "text", 0, 1, equalTo(text));
for (String type : new String[] { "plain", "unified" }) {
field.highlighterType(type).noMatchSize(text.length() * 2).numOfFragments(0);
response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get();
assertHighlight(response, 0, "text", 0, 1, equalTo(text));
}
field.highlighterType("fvh");
response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get();