Include all sentences smaller than fragment_size in the unified highlighter (#28132)
The unified highlighter selects a single sentence per fragment from the offset of the first highlighted term. This change modifies this selection and allows more than one sentence in a single fragment. The expansion is done forward (on the right of the matching offset), sentences are added to the current fragment iff the overall size of the fragment is smaller than the maximum length (fragment_size). We should also add a way to expand the left context with the surrounding sentences but this is currently avoided because the unified highlighter in Lucene uses only the first offset that matches the query to derive the start and end offset of the next fragment. If we expand on the left we could split multiple terms that would be grouped otherwise. Breaking this limitation implies some changes in the core of the unified highlighter. Closes #28089
This commit is contained in:
parent
3c032f84f4
commit
87c841d178
|
@ -23,15 +23,23 @@ import java.text.CharacterIterator;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A custom break iterator that scans text to find break-delimited passages bounded by
|
* A custom break iterator that is used to find break-delimited passages bounded by
|
||||||
* a provided maximum length. This class delegates the boundary search to a first level
|
* a provided maximum length in the {@link UnifiedHighlighter} context.
|
||||||
* break iterator. When this break iterator finds a passage greater than the maximum length
|
* This class uses a {@link BreakIterator} to find the last break after the provided offset
|
||||||
|
* that would create a passage smaller than <code>maxLen</code>.
|
||||||
|
* If the {@link BreakIterator} cannot find a passage smaller than the maximum length,
|
||||||
* a secondary break iterator is used to re-split the passage at the first boundary after
|
* a secondary break iterator is used to re-split the passage at the first boundary after
|
||||||
* maximum length.
|
* maximum length.
|
||||||
|
*
|
||||||
* This is useful to split passages created by {@link BreakIterator}s like `sentence` that
|
* This is useful to split passages created by {@link BreakIterator}s like `sentence` that
|
||||||
* can create big outliers on semi-structured text.
|
* can create big outliers on semi-structured text.
|
||||||
*
|
*
|
||||||
|
*
|
||||||
* WARNING: This break iterator is designed to work with the {@link UnifiedHighlighter}.
|
* WARNING: This break iterator is designed to work with the {@link UnifiedHighlighter}.
|
||||||
|
*
|
||||||
|
* TODO: We should be able to create passages incrementally, starting from the offset of the first match and expanding or not
|
||||||
|
* depending on the offsets of subsequent matches. This is currently impossible because {@link FieldHighlighter} uses
|
||||||
|
* only the first matching offset to derive the start and end of each passage.
|
||||||
**/
|
**/
|
||||||
public class BoundedBreakIteratorScanner extends BreakIterator {
|
public class BoundedBreakIteratorScanner extends BreakIterator {
|
||||||
private final BreakIterator mainBreak;
|
private final BreakIterator mainBreak;
|
||||||
|
@ -93,7 +101,15 @@ public class BoundedBreakIteratorScanner extends BreakIterator {
|
||||||
innerEnd = windowEnd;
|
innerEnd = windowEnd;
|
||||||
} else {
|
} else {
|
||||||
windowStart = innerStart = mainBreak.preceding(offset);
|
windowStart = innerStart = mainBreak.preceding(offset);
|
||||||
windowEnd = innerEnd = mainBreak.following(offset-1);
|
windowEnd = innerEnd = mainBreak.following(offset - 1);
|
||||||
|
// expand to next break until we reach maxLen
|
||||||
|
while (innerEnd - innerStart < maxLen) {
|
||||||
|
int newEnd = mainBreak.following(innerEnd);
|
||||||
|
if (newEnd == DONE || (newEnd - innerStart) > maxLen) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
windowEnd = innerEnd = newEnd;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (innerEnd - innerStart > maxLen) {
|
if (innerEnd - innerStart > maxLen) {
|
||||||
|
|
|
@ -184,6 +184,20 @@ public class CustomUnifiedHighlighterTests extends ESTestCase {
|
||||||
BoundedBreakIteratorScanner.getSentence(Locale.ROOT, 10), 0, outputs);
|
BoundedBreakIteratorScanner.getSentence(Locale.ROOT, 10), 0, outputs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSmallSentenceBoundedBreakIterator() throws Exception {
|
||||||
|
final String[] inputs = {
|
||||||
|
"A short sentence. Followed by a bigger sentence that should be truncated. And a last short sentence."
|
||||||
|
};
|
||||||
|
final String[] outputs = {
|
||||||
|
"A short <b>sentence</b>.",
|
||||||
|
"Followed by a bigger <b>sentence</b>",
|
||||||
|
"And a last short <b>sentence</b>"
|
||||||
|
};
|
||||||
|
TermQuery query = new TermQuery(new Term("text", "sentence"));
|
||||||
|
assertHighlightOneDoc("text", inputs, new StandardAnalyzer(), query, Locale.ROOT,
|
||||||
|
BoundedBreakIteratorScanner.getSentence(Locale.ROOT, 20), 0, outputs);
|
||||||
|
}
|
||||||
|
|
||||||
public void testRepeat() throws Exception {
|
public void testRepeat() throws Exception {
|
||||||
final String[] inputs = {
|
final String[] inputs = {
|
||||||
"Fun fun fun fun fun fun fun fun fun fun"
|
"Fun fun fun fun fun fun fun fun fun fun"
|
||||||
|
@ -205,4 +219,25 @@ public class CustomUnifiedHighlighterTests extends ESTestCase {
|
||||||
assertHighlightOneDoc("text", inputs, new StandardAnalyzer(), query, Locale.ROOT,
|
assertHighlightOneDoc("text", inputs, new StandardAnalyzer(), query, Locale.ROOT,
|
||||||
BoundedBreakIteratorScanner.getSentence(Locale.ROOT, 10), 0, outputs);
|
BoundedBreakIteratorScanner.getSentence(Locale.ROOT, 10), 0, outputs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testGroupSentences() throws Exception {
|
||||||
|
final String[] inputs = {
|
||||||
|
"Two words. Followed by many words in a big sentence. One. Two. Three. And more words."
|
||||||
|
};
|
||||||
|
final String[] outputs = {
|
||||||
|
"<b>Two</b> <b>words</b>.",
|
||||||
|
"Followed by many <b>words</b>",
|
||||||
|
"<b>One</b>. <b>Two</b>. <b>Three</b>.",
|
||||||
|
"And more <b>words</b>.",
|
||||||
|
};
|
||||||
|
BooleanQuery query = new BooleanQuery.Builder()
|
||||||
|
.add(new TermQuery(new Term("text", "one")), BooleanClause.Occur.SHOULD)
|
||||||
|
.add(new TermQuery(new Term("text", "two")), BooleanClause.Occur.SHOULD)
|
||||||
|
.add(new TermQuery(new Term("text", "three")), BooleanClause.Occur.SHOULD)
|
||||||
|
.add(new TermQuery(new Term("text", "words")), BooleanClause.Occur.SHOULD)
|
||||||
|
.build();
|
||||||
|
assertHighlightOneDoc("text", inputs, new StandardAnalyzer(), query, Locale.ROOT,
|
||||||
|
BoundedBreakIteratorScanner.getSentence(Locale.ROOT, 20), 0, outputs);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -397,7 +397,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
|
|
||||||
for (int i = 0; i < indexRequestBuilders.length; i++) {
|
for (int i = 0; i < indexRequestBuilders.length; i++) {
|
||||||
assertHighlight(search, i, "title", 0,
|
assertHighlight(search, i, "title", 0,
|
||||||
equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch."));
|
equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch. Hopefully it works."));
|
||||||
assertHighlight(search, i, "title", 1, 2,
|
assertHighlight(search, i, "title", 1, 2,
|
||||||
equalTo("This is the second <em>bug</em> to perform highlighting on."));
|
equalTo("This is the second <em>bug</em> to perform highlighting on."));
|
||||||
}
|
}
|
||||||
|
@ -491,7 +491,8 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
|
|
||||||
SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet();
|
SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet();
|
||||||
|
|
||||||
assertHighlight(searchResponse, 0, "field-postings", 0, 1, equalTo("This is the first <xxx>test</xxx> sentence."));
|
assertHighlight(searchResponse, 0, "field-postings", 0, 1,
|
||||||
|
equalTo("This is the first <xxx>test</xxx> sentence. Here is the second one."));
|
||||||
assertHighlight(searchResponse, 0, "field-fvh", 0, 1, equalTo("This is the <xxx>test</xxx> with term_vectors"));
|
assertHighlight(searchResponse, 0, "field-fvh", 0, 1, equalTo("This is the <xxx>test</xxx> with term_vectors"));
|
||||||
assertHighlight(searchResponse, 0, "field-plain", 0, 1, equalTo("This is the <xxx>test</xxx> for the plain highlighter"));
|
assertHighlight(searchResponse, 0, "field-plain", 0, 1, equalTo("This is the <xxx>test</xxx> for the plain highlighter"));
|
||||||
}
|
}
|
||||||
|
@ -1386,7 +1387,8 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
.highlighter(highlight().field("field0").order("score").preTags("<x>").postTags("</x>"));
|
.highlighter(highlight().field("field0").order("score").preTags("<x>").postTags("</x>"));
|
||||||
|
|
||||||
searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet();
|
searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet();
|
||||||
assertHighlight(searchResponse, 0, "field0", 0, 1, equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));
|
assertHighlight(searchResponse, 0, "field0", 0, 1,
|
||||||
|
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));
|
||||||
|
|
||||||
logger.info("--> highlighting and searching on field1");
|
logger.info("--> highlighting and searching on field1");
|
||||||
source = searchSource()
|
source = searchSource()
|
||||||
|
@ -1438,7 +1440,8 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
|
|
||||||
searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet();
|
searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet();
|
||||||
|
|
||||||
assertHighlight(searchResponse, 0, "field3", 0, 1, equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));
|
assertHighlight(searchResponse, 0, "field3", 0, 1,
|
||||||
|
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));
|
||||||
|
|
||||||
logger.info("--> highlighting and searching on field4");
|
logger.info("--> highlighting and searching on field4");
|
||||||
source = searchSource().postFilter(termQuery("type", "type2")).query(matchPhrasePrefixQuery("field4", "the fast bro"))
|
source = searchSource().postFilter(termQuery("type", "type2")).query(matchPhrasePrefixQuery("field4", "the fast bro"))
|
||||||
|
@ -1453,7 +1456,8 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
equalTo("<x>The</x> <x>quick</x> <x>brown</x> fox jumps over the lazy dog")));
|
equalTo("<x>The</x> <x>quick</x> <x>brown</x> fox jumps over the lazy dog")));
|
||||||
|
|
||||||
logger.info("--> highlighting and searching on field4");
|
logger.info("--> highlighting and searching on field4");
|
||||||
source = searchSource().postFilter(termQuery("type", "type2")).query(matchPhrasePrefixQuery("field4", "a fast quick blue ca"))
|
source = searchSource().postFilter(termQuery("type", "type2"))
|
||||||
|
.query(matchPhrasePrefixQuery("field4", "a fast quick blue ca"))
|
||||||
.highlighter(highlight().field("field4").order("score").preTags("<x>").postTags("</x>"));
|
.highlighter(highlight().field("field4").order("score").preTags("<x>").postTags("</x>"));
|
||||||
searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet();
|
searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet();
|
||||||
|
|
||||||
|
@ -1887,33 +1891,42 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
.highlighterType("plain")
|
.highlighterType("plain")
|
||||||
.noMatchSize(20);
|
.noMatchSize(20);
|
||||||
SearchResponse response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get();
|
SearchResponse response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get();
|
||||||
assertHighlight(response, 0, "text", 0, 1, equalTo("This is the first"));
|
assertHighlight(response, 0, "text", 0, 1,
|
||||||
|
equalTo("This is the first"));
|
||||||
|
|
||||||
field.highlighterType("fvh");
|
field.highlighterType("fvh");
|
||||||
response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get();
|
response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get();
|
||||||
assertHighlight(response, 0, "text", 0, 1, equalTo("This is the first sentence"));
|
assertHighlight(response, 0, "text", 0, 1,
|
||||||
|
equalTo("This is the first sentence"));
|
||||||
|
|
||||||
field.highlighterType("unified");
|
field.highlighterType("unified");
|
||||||
response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get();
|
response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get();
|
||||||
assertHighlight(response, 0, "text", 0, 1, equalTo("This is the first sentence"));
|
assertHighlight(response, 0, "text", 0, 1,
|
||||||
|
equalTo("This is the first sentence"));
|
||||||
|
|
||||||
|
|
||||||
//if there's a match we only return the values with matches (whole value as number_of_fragments == 0)
|
//if there's a match we only return the values with matches (whole value as number_of_fragments == 0)
|
||||||
MatchQueryBuilder queryBuilder = QueryBuilders.matchQuery("text", "third fifth");
|
MatchQueryBuilder queryBuilder = QueryBuilders.matchQuery("text", "third fifth");
|
||||||
field.highlighterType("plain");
|
field.highlighterType("plain");
|
||||||
response = client().prepareSearch("test").setQuery(queryBuilder).highlighter(new HighlightBuilder().field(field)).get();
|
response = client().prepareSearch("test").setQuery(queryBuilder).highlighter(new HighlightBuilder().field(field)).get();
|
||||||
assertHighlight(response, 0, "text", 0, 2, equalTo("This is the <em>third</em> sentence. This is the fourth sentence."));
|
assertHighlight(response, 0, "text", 0, 2,
|
||||||
assertHighlight(response, 0, "text", 1, 2, equalTo("This is the <em>fifth</em> sentence"));
|
equalTo("This is the <em>third</em> sentence. This is the fourth sentence."));
|
||||||
|
assertHighlight(response, 0, "text", 1, 2,
|
||||||
|
equalTo("This is the <em>fifth</em> sentence"));
|
||||||
|
|
||||||
field.highlighterType("fvh");
|
field.highlighterType("fvh");
|
||||||
response = client().prepareSearch("test").setQuery(queryBuilder).highlighter(new HighlightBuilder().field(field)).get();
|
response = client().prepareSearch("test").setQuery(queryBuilder).highlighter(new HighlightBuilder().field(field)).get();
|
||||||
assertHighlight(response, 0, "text", 0, 2, equalTo("This is the <em>third</em> sentence. This is the fourth sentence."));
|
assertHighlight(response, 0, "text", 0, 2,
|
||||||
assertHighlight(response, 0, "text", 1, 2, equalTo("This is the <em>fifth</em> sentence"));
|
equalTo("This is the <em>third</em> sentence. This is the fourth sentence."));
|
||||||
|
assertHighlight(response, 0, "text", 1, 2,
|
||||||
|
equalTo("This is the <em>fifth</em> sentence"));
|
||||||
|
|
||||||
field.highlighterType("unified");
|
field.highlighterType("unified");
|
||||||
response = client().prepareSearch("test").setQuery(queryBuilder).highlighter(new HighlightBuilder().field(field)).get();
|
response = client().prepareSearch("test").setQuery(queryBuilder).highlighter(new HighlightBuilder().field(field)).get();
|
||||||
assertHighlight(response, 0, "text", 0, 2, equalTo("This is the <em>third</em> sentence. This is the fourth sentence."));
|
assertHighlight(response, 0, "text", 0, 2,
|
||||||
assertHighlight(response, 0, "text", 1, 2, equalTo("This is the <em>fifth</em> sentence"));
|
equalTo("This is the <em>third</em> sentence. This is the fourth sentence."));
|
||||||
|
assertHighlight(response, 0, "text", 1, 2,
|
||||||
|
equalTo("This is the <em>fifth</em> sentence"));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPostingsHighlighter() throws Exception {
|
public void testPostingsHighlighter() throws Exception {
|
||||||
|
@ -1989,7 +2002,8 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
new HighlightBuilder().field(new Field("field1").preTags("<1>").postTags("</1>")
|
new HighlightBuilder().field(new Field("field1").preTags("<1>").postTags("</1>")
|
||||||
.requireFieldMatch(true)))
|
.requireFieldMatch(true)))
|
||||||
.get();
|
.get();
|
||||||
assertHighlight(response, 0, "field1", 0, 1, equalTo("The <b>quick<b> brown <1>fox</1>."));
|
assertHighlight(response, 0, "field1", 0, 1,
|
||||||
|
equalTo("The <b>quick<b> brown <1>fox</1>. Second sentence."));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPostingsHighlighterNumberOfFragments() throws Exception {
|
public void testPostingsHighlighterNumberOfFragments() throws Exception {
|
||||||
|
@ -2012,9 +2026,12 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
|
|
||||||
SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet();
|
SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet();
|
||||||
|
|
||||||
assertHighlight(searchResponse, 0, "field1", 0, equalTo("The quick brown <field1>fox</field1> jumps over the lazy dog."));
|
assertThat(searchResponse.getHits().getHits().length, equalTo(1));
|
||||||
assertHighlight(searchResponse, 0, "field1", 1, equalTo("The lazy red <field1>fox</field1> jumps over the quick dog."));
|
assertHighlight(searchResponse, 0, "field1", 0, 2,
|
||||||
assertHighlight(searchResponse, 0, "field1", 2, 3, equalTo("The quick brown dog jumps over the lazy <field1>fox</field1>."));
|
equalTo("The quick brown <field1>fox</field1> jumps over the lazy dog." +
|
||||||
|
" The lazy red <field1>fox</field1> jumps over the quick dog."));
|
||||||
|
assertHighlight(searchResponse, 0, "field1", 1, 2,
|
||||||
|
equalTo("The quick brown dog jumps over the lazy <field1>fox</field1>."));
|
||||||
|
|
||||||
client().prepareIndex("test", "type1", "2")
|
client().prepareIndex("test", "type1", "2")
|
||||||
.setSource("field1", new String[]{
|
.setSource("field1", new String[]{
|
||||||
|
@ -2033,14 +2050,17 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
|
|
||||||
for (SearchHit searchHit : searchResponse.getHits()) {
|
for (SearchHit searchHit : searchResponse.getHits()) {
|
||||||
if ("1".equals(searchHit.getId())) {
|
if ("1".equals(searchHit.getId())) {
|
||||||
assertHighlight(searchHit, "field1", 0, 1, equalTo("The quick brown <field1>fox</field1> jumps over the lazy dog. "
|
assertHighlight(searchHit, "field1", 0, 1,
|
||||||
|
equalTo("The quick brown <field1>fox</field1> jumps over the lazy dog. "
|
||||||
+ "The lazy red <field1>fox</field1> jumps over the quick dog. "
|
+ "The lazy red <field1>fox</field1> jumps over the quick dog. "
|
||||||
+ "The quick brown dog jumps over the lazy <field1>fox</field1>."));
|
+ "The quick brown dog jumps over the lazy <field1>fox</field1>."));
|
||||||
} else if ("2".equals(searchHit.getId())) {
|
} else if ("2".equals(searchHit.getId())) {
|
||||||
assertHighlight(searchHit, "field1", 0, 3,
|
assertHighlight(searchHit, "field1", 0, 3,
|
||||||
equalTo("The quick brown <field1>fox</field1> jumps over the lazy dog. Second sentence not finished"));
|
equalTo("The quick brown <field1>fox</field1> jumps over the lazy dog. Second sentence not finished"));
|
||||||
assertHighlight(searchHit, "field1", 1, 3, equalTo("The lazy red <field1>fox</field1> jumps over the quick dog."));
|
assertHighlight(searchHit, "field1", 1, 3,
|
||||||
assertHighlight(searchHit, "field1", 2, 3, equalTo("The quick brown dog jumps over the lazy <field1>fox</field1>."));
|
equalTo("The lazy red <field1>fox</field1> jumps over the quick dog."));
|
||||||
|
assertHighlight(searchHit, "field1", 2, 3,
|
||||||
|
equalTo("The quick brown dog jumps over the lazy <field1>fox</field1>."));
|
||||||
} else {
|
} else {
|
||||||
fail("Only hits with id 1 and 2 are returned");
|
fail("Only hits with id 1 and 2 are returned");
|
||||||
}
|
}
|
||||||
|
@ -2083,7 +2103,8 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
logger.info("Running multi-match type: [{}] highlight with type: [{}]", matchQueryType, highlighterType);
|
logger.info("Running multi-match type: [{}] highlight with type: [{}]", matchQueryType, highlighterType);
|
||||||
SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet();
|
SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet();
|
||||||
assertHitCount(searchResponse, 1L);
|
assertHitCount(searchResponse, 1L);
|
||||||
assertHighlight(searchResponse, 0, "field1", 0, anyOf(equalTo("<field1>The quick brown fox</field1> jumps over"),
|
assertHighlight(searchResponse, 0, "field1", 0,
|
||||||
|
anyOf(equalTo("<field1>The quick brown fox</field1> jumps over"),
|
||||||
equalTo("<field1>The</field1> <field1>quick</field1> <field1>brown</field1> <field1>fox</field1> jumps over")));
|
equalTo("<field1>The</field1> <field1>quick</field1> <field1>brown</field1> <field1>fox</field1> jumps over")));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2112,13 +2133,15 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
Map<String, HighlightField> highlightFieldMap = searchResponse.getHits().getAt(0).getHighlightFields();
|
Map<String, HighlightField> highlightFieldMap = searchResponse.getHits().getAt(0).getHighlightFields();
|
||||||
assertThat(highlightFieldMap.size(), equalTo(1));
|
assertThat(highlightFieldMap.size(), equalTo(1));
|
||||||
HighlightField field1 = highlightFieldMap.get("field1");
|
HighlightField field1 = highlightFieldMap.get("field1");
|
||||||
assertThat(field1.fragments().length, equalTo(5));
|
assertThat(field1.fragments().length, equalTo(4));
|
||||||
assertThat(field1.fragments()[0].string(),
|
assertThat(field1.fragments()[0].string(),
|
||||||
equalTo("This <em>sentence</em> contains three <em>sentence</em> occurrences (<em>sentence</em>)."));
|
equalTo("This <em>sentence</em> contains three <em>sentence</em> occurrences (<em>sentence</em>)."));
|
||||||
assertThat(field1.fragments()[1].string(), equalTo("This <em>sentence</em> contains two <em>sentence</em> matches."));
|
assertThat(field1.fragments()[1].string(),
|
||||||
assertThat(field1.fragments()[2].string(), equalTo("This is the second value's first <em>sentence</em>."));
|
equalTo("This <em>sentence</em> contains one match, not that short. " +
|
||||||
assertThat(field1.fragments()[3].string(), equalTo("This <em>sentence</em> contains one match, not that short."));
|
"This <em>sentence</em> contains two <em>sentence</em> matches."));
|
||||||
assertThat(field1.fragments()[4].string(),
|
assertThat(field1.fragments()[2].string(),
|
||||||
|
equalTo("This is the second value's first <em>sentence</em>. This one contains no matches."));
|
||||||
|
assertThat(field1.fragments()[3].string(),
|
||||||
equalTo("One <em>sentence</em> match here and scored lower since the text is quite long, not that appealing."));
|
equalTo("One <em>sentence</em> match here and scored lower since the text is quite long, not that appealing."));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2139,7 +2162,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
|
|
||||||
for (int i = 0; i < indexRequestBuilders.length; i++) {
|
for (int i = 0; i < indexRequestBuilders.length; i++) {
|
||||||
assertHighlight(searchResponse, i, "title", 0, 1,
|
assertHighlight(searchResponse, i, "title", 0, 1,
|
||||||
equalTo("This is a html escaping highlighting <em>test</em> for *&?"));
|
equalTo("This is a html escaping highlighting <em>test</em> for *&? elasticsearch"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2173,7 +2196,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
assertHitCount(searchResponse, 1L);
|
assertHitCount(searchResponse, 1L);
|
||||||
SearchHit hit = searchResponse.getHits().getAt(0);
|
SearchHit hit = searchResponse.getHits().getAt(0);
|
||||||
//stopwords are not highlighted since not indexed
|
//stopwords are not highlighted since not indexed
|
||||||
assertHighlight(hit, "title", 0, 1, equalTo("this is a <em>test</em> ."));
|
assertHighlight(hit, "title", 0, 1, equalTo("this is a <em>test</em> . Second sentence."));
|
||||||
|
|
||||||
// search on title.key and highlight on title
|
// search on title.key and highlight on title
|
||||||
searchResponse = client().prepareSearch()
|
searchResponse = client().prepareSearch()
|
||||||
|
@ -2183,7 +2206,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
|
|
||||||
//stopwords are now highlighted since we used only whitespace analyzer here
|
//stopwords are now highlighted since we used only whitespace analyzer here
|
||||||
assertHighlight(searchResponse, 0, "title.key", 0, 1,
|
assertHighlight(searchResponse, 0, "title.key", 0, 1,
|
||||||
equalTo("<em>this</em> <em>is</em> <em>a</em> <em>test</em> ."));
|
equalTo("<em>this</em> <em>is</em> <em>a</em> <em>test</em> . Second sentence."));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPostingsHighlighterMultiMapperFromSource() throws Exception {
|
public void testPostingsHighlighterMultiMapperFromSource() throws Exception {
|
||||||
|
@ -2258,7 +2281,8 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
.highlighter(highlight().field("field2").preTags("<x>").postTags("</x>"));
|
.highlighter(highlight().field("field2").preTags("<x>").postTags("</x>"));
|
||||||
SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet();
|
SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet();
|
||||||
|
|
||||||
assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick <x>brown</x> fox jumps over the lazy dog!"));
|
assertHighlight(searchResponse, 0, "field2", 0, 1,
|
||||||
|
equalTo("The quick <x>brown</x> fox jumps over the lazy dog! Second sentence."));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPostingsHighlighterCommonTermsQuery() throws IOException {
|
public void testPostingsHighlighterCommonTermsQuery() throws IOException {
|
||||||
|
@ -2275,7 +2299,8 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet();
|
SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet();
|
||||||
assertHitCount(searchResponse, 1L);
|
assertHitCount(searchResponse, 1L);
|
||||||
|
|
||||||
assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog!"));
|
assertHighlight(searchResponse, 0, "field2", 0, 1,
|
||||||
|
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog! Second sentence."));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static XContentBuilder type1PostingsffsetsMapping() throws IOException {
|
private static XContentBuilder type1PostingsffsetsMapping() throws IOException {
|
||||||
|
@ -2299,7 +2324,8 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
SearchSourceBuilder source = searchSource().query(prefixQuery("field2", "qui"))
|
SearchSourceBuilder source = searchSource().query(prefixQuery("field2", "qui"))
|
||||||
.highlighter(highlight().field("field2"));
|
.highlighter(highlight().field("field2"));
|
||||||
SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get();
|
SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get();
|
||||||
assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
|
assertHighlight(searchResponse, 0, "field2", 0, 1,
|
||||||
|
equalTo("The <em>quick</em> brown fox jumps over the lazy dog! Second sentence."));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPostingsHighlighterFuzzyQuery() throws Exception {
|
public void testPostingsHighlighterFuzzyQuery() throws Exception {
|
||||||
|
@ -2315,7 +2341,8 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
.highlighter(highlight().field("field2"));
|
.highlighter(highlight().field("field2"));
|
||||||
SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get();
|
SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get();
|
||||||
|
|
||||||
assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
|
assertHighlight(searchResponse, 0, "field2", 0, 1,
|
||||||
|
equalTo("The <em>quick</em> brown fox jumps over the lazy dog! Second sentence."));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPostingsHighlighterRegexpQuery() throws Exception {
|
public void testPostingsHighlighterRegexpQuery() throws Exception {
|
||||||
|
@ -2331,7 +2358,8 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
.highlighter(highlight().field("field2"));
|
.highlighter(highlight().field("field2"));
|
||||||
SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get();
|
SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get();
|
||||||
|
|
||||||
assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
|
assertHighlight(searchResponse, 0, "field2", 0, 1,
|
||||||
|
equalTo("The <em>quick</em> brown fox jumps over the lazy dog! Second sentence."));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPostingsHighlighterWildcardQuery() throws Exception {
|
public void testPostingsHighlighterWildcardQuery() throws Exception {
|
||||||
|
@ -2347,14 +2375,16 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
.highlighter(highlight().field("field2"));
|
.highlighter(highlight().field("field2"));
|
||||||
SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get();
|
SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get();
|
||||||
|
|
||||||
assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
|
assertHighlight(searchResponse, 0, "field2", 0, 1,
|
||||||
|
equalTo("The <em>quick</em> brown fox jumps over the lazy dog! Second sentence."));
|
||||||
|
|
||||||
source = searchSource().query(wildcardQuery("field2", "qu*k"))
|
source = searchSource().query(wildcardQuery("field2", "qu*k"))
|
||||||
.highlighter(highlight().field("field2"));
|
.highlighter(highlight().field("field2"));
|
||||||
searchResponse = client().prepareSearch("test").setSource(source).get();
|
searchResponse = client().prepareSearch("test").setSource(source).get();
|
||||||
assertHitCount(searchResponse, 1L);
|
assertHitCount(searchResponse, 1L);
|
||||||
|
|
||||||
assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
|
assertHighlight(searchResponse, 0, "field2", 0, 1,
|
||||||
|
equalTo("The <em>quick</em> brown fox jumps over the lazy dog! Second sentence."));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPostingsHighlighterTermRangeQuery() throws Exception {
|
public void testPostingsHighlighterTermRangeQuery() throws Exception {
|
||||||
|
@ -2384,7 +2414,8 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
SearchSourceBuilder source = searchSource().query(queryStringQuery("qui*").defaultField("field2"))
|
SearchSourceBuilder source = searchSource().query(queryStringQuery("qui*").defaultField("field2"))
|
||||||
.highlighter(highlight().field("field2"));
|
.highlighter(highlight().field("field2"));
|
||||||
SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get();
|
SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get();
|
||||||
assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
|
assertHighlight(searchResponse, 0, "field2", 0, 1,
|
||||||
|
equalTo("The <em>quick</em> brown fox jumps over the lazy dog! Second sentence."));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPostingsHighlighterRegexpQueryWithinConstantScoreQuery() throws Exception {
|
public void testPostingsHighlighterRegexpQueryWithinConstantScoreQuery() throws Exception {
|
||||||
|
@ -2479,7 +2510,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
assertThat(searchResponse.getHits().getHits().length, equalTo(COUNT));
|
assertThat(searchResponse.getHits().getHits().length, equalTo(COUNT));
|
||||||
for (SearchHit hit : searchResponse.getHits()) {
|
for (SearchHit hit : searchResponse.getHits()) {
|
||||||
String prefix = prefixes.get(hit.getId());
|
String prefix = prefixes.get(hit.getId());
|
||||||
assertHighlight(hit, "field1", 0, 1, equalTo("Sentence " + prefix + " <em>test</em>."));
|
assertHighlight(hit, "field1", 0, 1, equalTo("Sentence " + prefix + " <em>test</em>. Sentence two."));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue