diff --git a/docs/reference/search/request/highlighting.asciidoc b/docs/reference/search/request/highlighting.asciidoc index 2b85e358a7f..00de5bc5e75 100644 --- a/docs/reference/search/request/highlighting.asciidoc +++ b/docs/reference/search/request/highlighting.asciidoc @@ -57,12 +57,20 @@ highlighting using the postings highlighter on it: } -------------------------------------------------- +[NOTE] Note that the postings highlighter is meant to perform simple query terms highlighting, regardless of their positions. That means that when used for instance in combination with a phrase query, it will highlight all the terms that the query is composed of, regardless of whether they are actually part of a query match, effectively ignoring their positions. +[WARNING] +The postings highlighter does support highlighting of multi term queries, like +prefix queries, wildcard queries and so on. On the other hand, this requires +the queries to be rewritten using a proper +<> that supports multi term +extraction, which is a potentially expensive operation. + ==== Fast vector highlighter diff --git a/src/main/java/org/elasticsearch/search/highlight/PostingsHighlighter.java b/src/main/java/org/elasticsearch/search/highlight/PostingsHighlighter.java index 80d203e7de0..5c2fca64456 100644 --- a/src/main/java/org/elasticsearch/search/highlight/PostingsHighlighter.java +++ b/src/main/java/org/elasticsearch/search/highlight/PostingsHighlighter.java @@ -18,6 +18,7 @@ */ package org.elasticsearch.search.highlight; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexReader; @@ -33,6 +34,8 @@ import org.apache.lucene.util.CollectionUtil; import org.apache.lucene.util.UnicodeUtil; import org.elasticsearch.ElasticsearchIllegalArgumentException; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.collect.Tuple; +import org.elasticsearch.common.lucene.search.XFilteredQuery; import org.elasticsearch.common.text.StringText; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.search.fetch.FetchPhaseExecutionException; @@ -144,25 +147,17 @@ public class PostingsHighlighter implements Highlighter { } private static Query rewrite(HighlighterContext highlighterContext, IndexReader reader) throws IOException { - //rewrite is expensive: if the query was already rewritten we try not to rewrite - boolean mustRewrite = !highlighterContext.query.queryRewritten(); Query original = highlighterContext.query.originalQuery(); - MultiTermQuery originalMultiTermQuery = null; - MultiTermQuery.RewriteMethod originalRewriteMethod = null; - if (original instanceof MultiTermQuery) { - originalMultiTermQuery = (MultiTermQuery) original; - if (!allowsForTermExtraction(originalMultiTermQuery.getRewriteMethod())) { - originalRewriteMethod = originalMultiTermQuery.getRewriteMethod(); - originalMultiTermQuery.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(50)); - //we need to rewrite anyway if it is a multi term query which was rewritten with the wrong rewrite method - mustRewrite = true; - } - } + //we walk the query tree and when we encounter multi term queries we need to make sure the rewrite method + //supports multi term extraction. If not we temporarily override it (and restore it after the rewrite). + List> modifiedMultiTermQueries = Lists.newArrayList(); + overrideMultiTermRewriteMethod(original, modifiedMultiTermQueries); - if (!mustRewrite) { - //return the rewritten query + //rewrite is expensive: if the query was already rewritten we try not to rewrite it again + if (highlighterContext.query.queryRewritten() && modifiedMultiTermQueries.size() == 0) { + //return the already rewritten query return highlighterContext.query.query(); } @@ -172,16 +167,46 @@ public class PostingsHighlighter implements Highlighter { query = rewrittenQuery; } - if (originalMultiTermQuery != null) { - if (originalRewriteMethod != null) { - //set back the original rewrite method after the rewrite is done - originalMultiTermQuery.setRewriteMethod(originalRewriteMethod); - } + //set back the original rewrite method after the rewrite is done + for (Tuple modifiedMultiTermQuery : modifiedMultiTermQueries) { + modifiedMultiTermQuery.v1().setRewriteMethod(modifiedMultiTermQuery.v2()); } return query; } + private static void overrideMultiTermRewriteMethod(Query query, List> modifiedMultiTermQueries) { + + if (query instanceof MultiTermQuery) { + MultiTermQuery originalMultiTermQuery = (MultiTermQuery) query; + if (!allowsForTermExtraction(originalMultiTermQuery.getRewriteMethod())) { + MultiTermQuery.RewriteMethod originalRewriteMethod = originalMultiTermQuery.getRewriteMethod(); + originalMultiTermQuery.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(50)); + //we need to rewrite anyway if it is a multi term query which was rewritten with the wrong rewrite method + modifiedMultiTermQueries.add(Tuple.tuple(originalMultiTermQuery, originalRewriteMethod)); + } + } + + if (query instanceof BooleanQuery) { + BooleanQuery booleanQuery = (BooleanQuery) query; + for (BooleanClause booleanClause : booleanQuery) { + overrideMultiTermRewriteMethod(booleanClause.getQuery(), modifiedMultiTermQueries); + } + } + + if (query instanceof XFilteredQuery) { + overrideMultiTermRewriteMethod(((XFilteredQuery) query).getQuery(), modifiedMultiTermQueries); + } + + if (query instanceof FilteredQuery) { + overrideMultiTermRewriteMethod(((FilteredQuery) query).getQuery(), modifiedMultiTermQueries); + } + + if (query instanceof ConstantScoreQuery) { + overrideMultiTermRewriteMethod(((ConstantScoreQuery) query).getQuery(), modifiedMultiTermQueries); + } + } + private static boolean allowsForTermExtraction(MultiTermQuery.RewriteMethod rewriteMethod) { return rewriteMethod instanceof TopTermsRewrite || rewriteMethod instanceof ScoringRewrite; } diff --git a/src/test/java/org/elasticsearch/search/highlight/HighlighterSearchTests.java b/src/test/java/org/elasticsearch/search/highlight/HighlighterSearchTests.java index c832b68d198..bc8dea730f0 100644 --- a/src/test/java/org/elasticsearch/search/highlight/HighlighterSearchTests.java +++ b/src/test/java/org/elasticsearch/search/highlight/HighlighterSearchTests.java @@ -48,6 +48,7 @@ import java.util.Map; import static org.elasticsearch.action.search.SearchType.QUERY_THEN_FETCH; import static org.elasticsearch.client.Requests.searchRequest; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.elasticsearch.index.query.FilterBuilders.*; import static org.elasticsearch.index.query.QueryBuilders.*; import static org.elasticsearch.search.builder.SearchSourceBuilder.highlight; import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource; @@ -1393,7 +1394,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest { .setSource("field4", "a quick fast blue car").get(); refresh(); - source = searchSource().postFilter(FilterBuilders.typeFilter("type2")).query(matchPhrasePrefixQuery("field3", "fast bro")).from(0).size(60).explain(true) + source = searchSource().postFilter(typeFilter("type2")).query(matchPhrasePrefixQuery("field3", "fast bro")).from(0).size(60).explain(true) .highlight(highlight().field("field3").order("score").preTags("").postTags("")); searchResponse = client().search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH)).actionGet(); @@ -1401,7 +1402,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest { assertHighlight(searchResponse, 0, "field3", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); logger.info("--> highlighting and searching on field4"); - source = searchSource().postFilter(FilterBuilders.typeFilter("type2")).query(matchPhrasePrefixQuery("field4", "the fast bro")).from(0).size(60).explain(true) + source = searchSource().postFilter(typeFilter("type2")).query(matchPhrasePrefixQuery("field4", "the fast bro")).from(0).size(60).explain(true) .highlight(highlight().field("field4").order("score").preTags("").postTags("")); searchResponse = client().search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH)).actionGet(); @@ -1409,7 +1410,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest { assertHighlight(searchResponse, 1, "field4", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); logger.info("--> highlighting and searching on field4"); - source = searchSource().postFilter(FilterBuilders.typeFilter("type2")).query(matchPhrasePrefixQuery("field4", "a fast quick blue ca")).from(0).size(60).explain(true) + source = searchSource().postFilter(typeFilter("type2")).query(matchPhrasePrefixQuery("field4", "a fast quick blue ca")).from(0).size(60).explain(true) .highlight(highlight().field("field4").order("score").preTags("").postTags("")); searchResponse = client().search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH)).actionGet(); @@ -2419,6 +2420,85 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest { } } + @Test + public void testPostingsHighlighterRegexpQueryWithinConstantScoreQuery() throws Exception { + + assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping())); + ensureGreen(); + + client().prepareIndex("test", "type1").setSource("field1", "The photography word will get highlighted").get(); + refresh(); + + logger.info("--> highlighting and searching on field1"); + for (String rewriteMethod : REWRITE_METHODS) { + SearchSourceBuilder source = searchSource().query(constantScoreQuery(regexpQuery("field1", "pho[a-z]+").rewrite(rewriteMethod))) + .highlight(highlight().field("field1")); + SearchResponse searchResponse = client().search(searchRequest("test").source(source) + .searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get(); + assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The photography word will get highlighted")); + } + } + + @Test + public void testPostingsHighlighterMultiTermQueryMultipleLevels() throws Exception { + + assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping())); + ensureGreen(); + + client().prepareIndex("test", "type1").setSource("field1", "The photography word will get highlighted").get(); + refresh(); + + logger.info("--> highlighting and searching on field1"); + for (String rewriteMethod : REWRITE_METHODS) { + SearchSourceBuilder source = searchSource().query(boolQuery() + .should(constantScoreQuery(FilterBuilders.missingFilter("field1"))) + .should(matchQuery("field1", "test")) + .should(filteredQuery(queryString("field1:photo*").rewrite(rewriteMethod), null))) + .highlight(highlight().field("field1")); + SearchResponse searchResponse = client().search(searchRequest("test").source(source) + .searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get(); + assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The photography word will get highlighted")); + } + } + + @Test + public void testPostingsHighlighterPrefixQueryWithinBooleanQuery() throws Exception { + + assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping())); + ensureGreen(); + + client().prepareIndex("test", "type1").setSource("field1", "The photography word will get highlighted").get(); + refresh(); + + logger.info("--> highlighting and searching on field1"); + for (String rewriteMethod : REWRITE_METHODS) { + SearchSourceBuilder source = searchSource().query(boolQuery().must(prefixQuery("field1", "photo").rewrite(rewriteMethod)).should(matchQuery("field1", "test").minimumShouldMatch("0"))) + .highlight(highlight().field("field1")); + SearchResponse searchResponse = client().search(searchRequest("test").source(source) + .searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get(); + assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The photography word will get highlighted")); + } + } + + @Test + public void testPostingsHighlighterQueryStringWithinFilteredQuery() throws Exception { + + assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping())); + ensureGreen(); + + client().prepareIndex("test", "type1").setSource("field1", "The photography word will get highlighted").get(); + refresh(); + + logger.info("--> highlighting and searching on field1"); + for (String rewriteMethod : REWRITE_METHODS) { + SearchSourceBuilder source = searchSource().query(filteredQuery(queryString("field1:photo*").rewrite(rewriteMethod), missingFilter("field_null"))) + .highlight(highlight().field("field1")); + SearchResponse searchResponse = client().search(searchRequest("test").source(source) + .searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get(); + assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The photography word will get highlighted")); + } + } + @Test @Slow public void testPostingsHighlighterManyDocs() throws Exception {