Added support for highlighting multi term queries using the postings highlighter
Closes #4042
This commit is contained in:
parent
123bc98d81
commit
5474cffe8f
|
@ -21,10 +21,11 @@ package org.elasticsearch.search.highlight;
|
|||
import com.google.common.collect.Maps;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoringRewrite;
|
||||
import org.apache.lucene.search.TopTermsRewrite;
|
||||
import org.apache.lucene.search.highlight.Encoder;
|
||||
import org.apache.lucene.search.postingshighlight.CustomPassageFormatter;
|
||||
import org.apache.lucene.search.postingshighlight.CustomPostingsHighlighter;
|
||||
|
@ -67,9 +68,10 @@ public class PostingsHighlighter implements Highlighter {
|
|||
FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
|
||||
|
||||
if (!hitContext.cache().containsKey(CACHE_KEY)) {
|
||||
//get the non rewritten query and rewrite it
|
||||
Query query;
|
||||
try {
|
||||
query = rewrite(context.query());
|
||||
query = rewrite(context, hitContext.topLevelReader());
|
||||
} catch (IOException e) {
|
||||
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
|
||||
}
|
||||
|
@ -107,7 +109,7 @@ public class PostingsHighlighter implements Highlighter {
|
|||
//we highlight every value separately calling the highlight method multiple times, only if we need to have back a snippet per value (whole value)
|
||||
int values = mergeValues ? 1 : textsToHighlight.size();
|
||||
for (int i = 0; i < values; i++) {
|
||||
Snippet[] fieldSnippets = highlighter.highlightDoc(highlighterContext.fieldName, mapperHighlighterEntry.filteredQueryTerms, new IndexSearcher(hitContext.reader()), hitContext.docId(), numberOfFragments);
|
||||
Snippet[] fieldSnippets = highlighter.highlightDoc(highlighterContext.fieldName, mapperHighlighterEntry.filteredQueryTerms, context.searcher(), hitContext.docId(), numberOfFragments);
|
||||
if (fieldSnippets != null) {
|
||||
for (Snippet fieldSnippet : fieldSnippets) {
|
||||
if (Strings.hasText(fieldSnippet.getText())) {
|
||||
|
@ -144,17 +146,49 @@ public class PostingsHighlighter implements Highlighter {
|
|||
return null;
|
||||
}
|
||||
|
||||
private static final IndexReader EMPTY_INDEXREADER = new MultiReader();
|
||||
private static Query rewrite(SearchContext searchContext, IndexReader reader) throws IOException {
|
||||
//rewrite is expensive: if the query was already rewritten we try not to rewrite
|
||||
boolean mustRewrite = !searchContext.queryRewritten();
|
||||
|
||||
Query original = searchContext.parsedQuery().query();
|
||||
|
||||
MultiTermQuery originalMultiTermQuery = null;
|
||||
MultiTermQuery.RewriteMethod originalRewriteMethod = null;
|
||||
if (original instanceof MultiTermQuery) {
|
||||
originalMultiTermQuery = (MultiTermQuery) original;
|
||||
if (!allowsForTermExtraction(originalMultiTermQuery.getRewriteMethod())) {
|
||||
originalRewriteMethod = originalMultiTermQuery.getRewriteMethod();
|
||||
originalMultiTermQuery.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(50));
|
||||
//we need to rewrite anyway if it is a multi term query which was rewritten with the wrong rewrite method
|
||||
mustRewrite = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!mustRewrite) {
|
||||
//return the rewritten query
|
||||
return searchContext.query();
|
||||
}
|
||||
|
||||
private static Query rewrite(Query original) throws IOException {
|
||||
Query query = original;
|
||||
for (Query rewrittenQuery = query.rewrite(EMPTY_INDEXREADER); rewrittenQuery != query;
|
||||
rewrittenQuery = query.rewrite(EMPTY_INDEXREADER)) {
|
||||
for (Query rewrittenQuery = query.rewrite(reader); rewrittenQuery != query;
|
||||
rewrittenQuery = query.rewrite(reader)) {
|
||||
query = rewrittenQuery;
|
||||
}
|
||||
|
||||
if (originalMultiTermQuery != null) {
|
||||
if (originalRewriteMethod != null) {
|
||||
//set back the original rewrite method after the rewrite is done
|
||||
originalMultiTermQuery.setRewriteMethod(originalRewriteMethod);
|
||||
}
|
||||
}
|
||||
|
||||
return query;
|
||||
}
|
||||
|
||||
private static boolean allowsForTermExtraction(MultiTermQuery.RewriteMethod rewriteMethod) {
|
||||
return rewriteMethod instanceof TopTermsRewrite || rewriteMethod instanceof ScoringRewrite;
|
||||
}
|
||||
|
||||
private static SortedSet<Term> extractTerms(Query query) {
|
||||
SortedSet<Term> queryTerms = new TreeSet<Term>();
|
||||
query.extractTerms(queryTerms);
|
||||
|
|
|
@ -2019,7 +2019,7 @@ public class HighlighterSearchTests extends AbstractIntegrationTest {
|
|||
ensureGreen();
|
||||
|
||||
client().prepareIndex("test", "type1")
|
||||
.setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog").setRefresh(true).get();
|
||||
.setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy quick dog").setRefresh(true).get();
|
||||
|
||||
logger.info("--> highlighting and searching on field1");
|
||||
SearchSourceBuilder source = searchSource()
|
||||
|
@ -2049,28 +2049,28 @@ public class HighlighterSearchTests extends AbstractIntegrationTest {
|
|||
searchResponse = client().search(searchRequest("test").source(source)).actionGet();
|
||||
assertHitCount(searchResponse, 1l);
|
||||
|
||||
assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(), equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog"));
|
||||
assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(), equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy <xxx>quick</xxx> dog"));
|
||||
|
||||
logger.info("--> searching on _all, highlighting on field2");
|
||||
source = searchSource()
|
||||
.query(prefixQuery("_all", "qui"))
|
||||
.query(matchPhraseQuery("_all", "quick brown"))
|
||||
.highlight(highlight().field("field2").preTags("<xxx>").postTags("</xxx>"));
|
||||
|
||||
searchResponse = client().search(searchRequest("test").source(source)).actionGet();
|
||||
assertHitCount(searchResponse, 1l);
|
||||
//no snippets produced for prefix query, not supported by postings highlighter
|
||||
assertThat(searchResponse.getHits().getAt(0).highlightFields().size(), equalTo(0));
|
||||
//phrase query results in highlighting all different terms regardless of their positions
|
||||
assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(), equalTo("The <xxx>quick</xxx> <xxx>brown</xxx> fox jumps over the lazy <xxx>quick</xxx> dog"));
|
||||
|
||||
//lets fall back to the standard highlighter then, what people would do with unsupported queries
|
||||
//lets fall back to the standard highlighter then, what people would do to highlight query matches
|
||||
logger.info("--> searching on _all, highlighting on field2, falling back to the plain highlighter");
|
||||
source = searchSource()
|
||||
.query(prefixQuery("_all", "qui"))
|
||||
.query(matchPhraseQuery("_all", "quick brown"))
|
||||
.highlight(highlight().field("field2").preTags("<xxx>").postTags("</xxx>").highlighterType("highlighter"));
|
||||
|
||||
searchResponse = client().search(searchRequest("test").source(source)).actionGet();
|
||||
assertHitCount(searchResponse, 1l);
|
||||
|
||||
assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(), equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog"));
|
||||
assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(), equalTo("The <xxx>quick</xxx> <xxx>brown</xxx> fox jumps over the lazy quick dog"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -2486,6 +2486,136 @@ public class HighlighterSearchTests extends AbstractIntegrationTest {
|
|||
.endObject().endObject();
|
||||
}
|
||||
|
||||
private static final String[] REWRITE_METHODS = new String[]{"constant_score_auto", "scoring_boolean", "constant_score_boolean",
|
||||
"constant_score_filter", "top_terms_boost_50", "top_terms_50"};
|
||||
|
||||
@Test
|
||||
public void testPostingsHighlighterPrefixQuery() throws Exception {
|
||||
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
|
||||
ensureGreen();
|
||||
|
||||
client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get();
|
||||
refresh();
|
||||
logger.info("--> highlighting and searching on field2");
|
||||
|
||||
for (String rewriteMethod : REWRITE_METHODS) {
|
||||
SearchSourceBuilder source = searchSource().query(prefixQuery("field2", "qui").rewrite(rewriteMethod))
|
||||
.highlight(highlight().field("field2"));
|
||||
SearchResponse searchResponse = client().search(searchRequest("test").source(source)
|
||||
.searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
|
||||
assertHitCount(searchResponse, 1l);
|
||||
|
||||
assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(),
|
||||
equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPostingsHighlighterFuzzyQuery() throws Exception {
|
||||
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
|
||||
ensureGreen();
|
||||
|
||||
client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get();
|
||||
refresh();
|
||||
logger.info("--> highlighting and searching on field2");
|
||||
SearchSourceBuilder source = searchSource().query(fuzzyQuery("field2", "quck"))
|
||||
.highlight(highlight().field("field2"));
|
||||
SearchResponse searchResponse = client().search(searchRequest("test").source(source)
|
||||
.searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
|
||||
assertHitCount(searchResponse, 1l);
|
||||
|
||||
assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(),
|
||||
equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPostingsHighlighterRegexpQuery() throws Exception {
|
||||
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
|
||||
ensureGreen();
|
||||
|
||||
client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get();
|
||||
refresh();
|
||||
logger.info("--> highlighting and searching on field2");
|
||||
for (String rewriteMethod : REWRITE_METHODS) {
|
||||
SearchSourceBuilder source = searchSource().query(regexpQuery("field2", "qu[a-l]+k").rewrite(rewriteMethod))
|
||||
.highlight(highlight().field("field2"));
|
||||
SearchResponse searchResponse = client().search(searchRequest("test").source(source)
|
||||
.searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
|
||||
assertHitCount(searchResponse, 1l);
|
||||
|
||||
assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(),
|
||||
equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPostingsHighlighterWildcardQuery() throws Exception {
|
||||
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
|
||||
ensureGreen();
|
||||
|
||||
client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get();
|
||||
refresh();
|
||||
logger.info("--> highlighting and searching on field2");
|
||||
for (String rewriteMethod : REWRITE_METHODS) {
|
||||
SearchSourceBuilder source = searchSource().query(wildcardQuery("field2", "qui*").rewrite(rewriteMethod))
|
||||
.highlight(highlight().field("field2"));
|
||||
SearchResponse searchResponse = client().search(searchRequest("test").source(source)
|
||||
.searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
|
||||
assertHitCount(searchResponse, 1l);
|
||||
|
||||
assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(),
|
||||
equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
|
||||
|
||||
source = searchSource().query(wildcardQuery("field2", "qu*k").rewrite(rewriteMethod))
|
||||
.highlight(highlight().field("field2"));
|
||||
searchResponse = client().search(searchRequest("test").source(source)
|
||||
.searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
|
||||
assertHitCount(searchResponse, 1l);
|
||||
|
||||
assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(),
|
||||
equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPostingsHighlighterTermRangeQuery() throws Exception {
|
||||
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
|
||||
ensureGreen();
|
||||
|
||||
client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "aaab").get();
|
||||
refresh();
|
||||
logger.info("--> highlighting and searching on field2");
|
||||
SearchSourceBuilder source = searchSource().query(rangeQuery("field2").gte("aaaa").lt("zzzz"))
|
||||
.highlight(highlight().field("field2"));
|
||||
SearchResponse searchResponse = client().search(searchRequest("test").source(source)
|
||||
.searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
|
||||
assertHitCount(searchResponse, 1l);
|
||||
|
||||
assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(),
|
||||
equalTo("<em>aaab</em>"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPostingsHighlighterQueryString() throws Exception {
|
||||
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
|
||||
ensureGreen();
|
||||
|
||||
client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get();
|
||||
refresh();
|
||||
logger.info("--> highlighting and searching on field2");
|
||||
for (String rewriteMethod : REWRITE_METHODS) {
|
||||
SearchSourceBuilder source = searchSource().query(queryString("qui*").defaultField("field2").rewrite(rewriteMethod))
|
||||
.highlight(highlight().field("field2"));
|
||||
SearchResponse searchResponse = client().search(searchRequest("test").source(source)
|
||||
.searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
|
||||
assertHitCount(searchResponse, 1l);
|
||||
|
||||
assertThat(searchResponse.getHits().getAt(0).highlightFields().get("field2").fragments()[0].string(),
|
||||
equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@Slow
|
||||
public void testPostingsHighlighterManyDocs() throws Exception {
|
||||
|
|
Loading…
Reference in New Issue