From 043c1f5d428f11f9303e43058d5a61685a638608 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Tue, 16 Apr 2019 18:16:55 +0200 Subject: [PATCH] Unified highlighter should respect no_match_size with number_of_fragments set to 0 (#41069) The unified highlighter returns the first sentence of the text when number_of_fragments is set to 0 (full highlighting). This is a legacy of the removed postings highlighter that was based on sentence break only. This commit changes this behavior in order to respect the provided no_match_size value when number_of_fragments is set to 0. This means that the behavior will be consistent for any value of the number_of_fragments option. Closes #41066 --- .../highlight/UnifiedHighlighter.java | 37 ------------------- .../highlight/HighlighterSearchIT.java | 8 ++-- 2 files changed, 5 insertions(+), 40 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java index af37fa4edab..2d570d2b7c7 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java @@ -124,8 +124,6 @@ public class UnifiedHighlighter implements Highlighter { "Failed to highlight field [" + highlighterContext.fieldName + "]", e); } - snippets = filterSnippets(snippets, field.fieldOptions().numberOfFragments()); - if (field.fieldOptions().scoreOrdered()) { //let's sort the snippets by score if needed CollectionUtil.introSort(snippets, (o1, o2) -> Double.compare(o2.getScore(), o1.getScore())); @@ -185,41 +183,6 @@ public class UnifiedHighlighter implements Highlighter { } } - protected static List filterSnippets(List snippets, int numberOfFragments) { - - //We need to filter the snippets as due to no_match_size we could have - //either highlighted snippets or non highlighted ones and we don't want to mix those up - List filteredSnippets = new ArrayList<>(snippets.size()); - for (Snippet snippet : snippets) { - if (snippet.isHighlighted()) { - filteredSnippets.add(snippet); - } - } - - //if there's at least one highlighted snippet, we return all the highlighted ones - //otherwise we return the first non highlighted one if available - if (filteredSnippets.size() == 0) { - if (snippets.size() > 0) { - Snippet snippet = snippets.get(0); - //if we tried highlighting the whole content using whole break iterator (as number_of_fragments was 0) - //we need to return the first sentence of the content rather than the whole content - if (numberOfFragments == 0) { - BreakIterator bi = BreakIterator.getSentenceInstance(Locale.ROOT); - String text = snippet.getText(); - bi.setText(text); - int next = bi.next(); - if (next != BreakIterator.DONE) { - String newText = text.substring(0, next).trim(); - snippet = new Snippet(newText, snippet.getScore(), snippet.isHighlighted()); - } - } - filteredSnippets.add(snippet); - } - } - - return filteredSnippets; - } - protected static String convertFieldValue(MappedFieldType type, Object value) { if (value instanceof BytesRef) { return type.valueForDisplay(value).toString(); diff --git a/server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java index e111abe0d51..d1a66969531 100644 --- a/server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java +++ b/server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java @@ -1715,9 +1715,11 @@ public class HighlighterSearchIT extends ESIntegTestCase { assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some")); // We can also ask for a fragment longer than the input string and get the whole string - field.highlighterType("plain").noMatchSize(text.length() * 2); - response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); - assertHighlight(response, 0, "text", 0, 1, equalTo(text)); + for (String type : new String[] { "plain", "unified" }) { + field.highlighterType(type).noMatchSize(text.length() * 2).numOfFragments(0); + response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); + assertHighlight(response, 0, "text", 0, 1, equalTo(text)); + } field.highlighterType("fvh"); response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get();