Fixed multi term queries support in postings highlighter for non top-level queries
In #4052 we added support for highlighting multi term queries using the postings highlighter. That worked only for top-level queries though, and not for multi term queries that are nested for instance within a bool query, or filtered query, or a constant score query. The way we make this work is by walking the query structure and temporarily overriding the query rewrite method with a method that allows for multi terms extraction. Closes #5102
This commit is contained in:
parent
e913b6626f
commit
4e6610a798
|
@ -57,12 +57,20 @@ highlighting using the postings highlighter on it:
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
[NOTE]
|
||||
Note that the postings highlighter is meant to perform simple query terms
|
||||
highlighting, regardless of their positions. That means that when used for
|
||||
instance in combination with a phrase query, it will highlight all the terms
|
||||
that the query is composed of, regardless of whether they are actually part of
|
||||
a query match, effectively ignoring their positions.
|
||||
|
||||
[WARNING]
|
||||
The postings highlighter does support highlighting of multi term queries, like
|
||||
prefix queries, wildcard queries and so on. On the other hand, this requires
|
||||
the queries to be rewritten using a proper
|
||||
<<query-dsl-multi-term-rewrite,rewrite method>> that supports multi term
|
||||
extraction, which is a potentially expensive operation.
|
||||
|
||||
|
||||
==== Fast vector highlighter
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
*/
|
||||
package org.elasticsearch.search.highlight;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -33,6 +34,8 @@ import org.apache.lucene.util.CollectionUtil;
|
|||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.collect.Tuple;
|
||||
import org.elasticsearch.common.lucene.search.XFilteredQuery;
|
||||
import org.elasticsearch.common.text.StringText;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
|
||||
|
@ -144,25 +147,17 @@ public class PostingsHighlighter implements Highlighter {
|
|||
}
|
||||
|
||||
private static Query rewrite(HighlighterContext highlighterContext, IndexReader reader) throws IOException {
|
||||
//rewrite is expensive: if the query was already rewritten we try not to rewrite
|
||||
boolean mustRewrite = !highlighterContext.query.queryRewritten();
|
||||
|
||||
Query original = highlighterContext.query.originalQuery();
|
||||
|
||||
MultiTermQuery originalMultiTermQuery = null;
|
||||
MultiTermQuery.RewriteMethod originalRewriteMethod = null;
|
||||
if (original instanceof MultiTermQuery) {
|
||||
originalMultiTermQuery = (MultiTermQuery) original;
|
||||
if (!allowsForTermExtraction(originalMultiTermQuery.getRewriteMethod())) {
|
||||
originalRewriteMethod = originalMultiTermQuery.getRewriteMethod();
|
||||
originalMultiTermQuery.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(50));
|
||||
//we need to rewrite anyway if it is a multi term query which was rewritten with the wrong rewrite method
|
||||
mustRewrite = true;
|
||||
}
|
||||
}
|
||||
//we walk the query tree and when we encounter multi term queries we need to make sure the rewrite method
|
||||
//supports multi term extraction. If not we temporarily override it (and restore it after the rewrite).
|
||||
List<Tuple<MultiTermQuery, MultiTermQuery.RewriteMethod>> modifiedMultiTermQueries = Lists.newArrayList();
|
||||
overrideMultiTermRewriteMethod(original, modifiedMultiTermQueries);
|
||||
|
||||
if (!mustRewrite) {
|
||||
//return the rewritten query
|
||||
//rewrite is expensive: if the query was already rewritten we try not to rewrite it again
|
||||
if (highlighterContext.query.queryRewritten() && modifiedMultiTermQueries.size() == 0) {
|
||||
//return the already rewritten query
|
||||
return highlighterContext.query.query();
|
||||
}
|
||||
|
||||
|
@ -172,16 +167,46 @@ public class PostingsHighlighter implements Highlighter {
|
|||
query = rewrittenQuery;
|
||||
}
|
||||
|
||||
if (originalMultiTermQuery != null) {
|
||||
if (originalRewriteMethod != null) {
|
||||
//set back the original rewrite method after the rewrite is done
|
||||
originalMultiTermQuery.setRewriteMethod(originalRewriteMethod);
|
||||
}
|
||||
//set back the original rewrite method after the rewrite is done
|
||||
for (Tuple<MultiTermQuery, MultiTermQuery.RewriteMethod> modifiedMultiTermQuery : modifiedMultiTermQueries) {
|
||||
modifiedMultiTermQuery.v1().setRewriteMethod(modifiedMultiTermQuery.v2());
|
||||
}
|
||||
|
||||
return query;
|
||||
}
|
||||
|
||||
private static void overrideMultiTermRewriteMethod(Query query, List<Tuple<MultiTermQuery, MultiTermQuery.RewriteMethod>> modifiedMultiTermQueries) {
|
||||
|
||||
if (query instanceof MultiTermQuery) {
|
||||
MultiTermQuery originalMultiTermQuery = (MultiTermQuery) query;
|
||||
if (!allowsForTermExtraction(originalMultiTermQuery.getRewriteMethod())) {
|
||||
MultiTermQuery.RewriteMethod originalRewriteMethod = originalMultiTermQuery.getRewriteMethod();
|
||||
originalMultiTermQuery.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(50));
|
||||
//we need to rewrite anyway if it is a multi term query which was rewritten with the wrong rewrite method
|
||||
modifiedMultiTermQueries.add(Tuple.tuple(originalMultiTermQuery, originalRewriteMethod));
|
||||
}
|
||||
}
|
||||
|
||||
if (query instanceof BooleanQuery) {
|
||||
BooleanQuery booleanQuery = (BooleanQuery) query;
|
||||
for (BooleanClause booleanClause : booleanQuery) {
|
||||
overrideMultiTermRewriteMethod(booleanClause.getQuery(), modifiedMultiTermQueries);
|
||||
}
|
||||
}
|
||||
|
||||
if (query instanceof XFilteredQuery) {
|
||||
overrideMultiTermRewriteMethod(((XFilteredQuery) query).getQuery(), modifiedMultiTermQueries);
|
||||
}
|
||||
|
||||
if (query instanceof FilteredQuery) {
|
||||
overrideMultiTermRewriteMethod(((FilteredQuery) query).getQuery(), modifiedMultiTermQueries);
|
||||
}
|
||||
|
||||
if (query instanceof ConstantScoreQuery) {
|
||||
overrideMultiTermRewriteMethod(((ConstantScoreQuery) query).getQuery(), modifiedMultiTermQueries);
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean allowsForTermExtraction(MultiTermQuery.RewriteMethod rewriteMethod) {
|
||||
return rewriteMethod instanceof TopTermsRewrite || rewriteMethod instanceof ScoringRewrite;
|
||||
}
|
||||
|
|
|
@ -48,6 +48,7 @@ import java.util.Map;
|
|||
import static org.elasticsearch.action.search.SearchType.QUERY_THEN_FETCH;
|
||||
import static org.elasticsearch.client.Requests.searchRequest;
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.elasticsearch.index.query.FilterBuilders.*;
|
||||
import static org.elasticsearch.index.query.QueryBuilders.*;
|
||||
import static org.elasticsearch.search.builder.SearchSourceBuilder.highlight;
|
||||
import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource;
|
||||
|
@ -1393,7 +1394,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
|
|||
.setSource("field4", "a quick fast blue car").get();
|
||||
refresh();
|
||||
|
||||
source = searchSource().postFilter(FilterBuilders.typeFilter("type2")).query(matchPhrasePrefixQuery("field3", "fast bro")).from(0).size(60).explain(true)
|
||||
source = searchSource().postFilter(typeFilter("type2")).query(matchPhrasePrefixQuery("field3", "fast bro")).from(0).size(60).explain(true)
|
||||
.highlight(highlight().field("field3").order("score").preTags("<x>").postTags("</x>"));
|
||||
|
||||
searchResponse = client().search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH)).actionGet();
|
||||
|
@ -1401,7 +1402,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
|
|||
assertHighlight(searchResponse, 0, "field3", 0, 1, equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));
|
||||
|
||||
logger.info("--> highlighting and searching on field4");
|
||||
source = searchSource().postFilter(FilterBuilders.typeFilter("type2")).query(matchPhrasePrefixQuery("field4", "the fast bro")).from(0).size(60).explain(true)
|
||||
source = searchSource().postFilter(typeFilter("type2")).query(matchPhrasePrefixQuery("field4", "the fast bro")).from(0).size(60).explain(true)
|
||||
.highlight(highlight().field("field4").order("score").preTags("<x>").postTags("</x>"));
|
||||
searchResponse = client().search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH)).actionGet();
|
||||
|
||||
|
@ -1409,7 +1410,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
|
|||
assertHighlight(searchResponse, 1, "field4", 0, 1, equalTo("<x>The quick brown</x> fox jumps over the lazy dog"));
|
||||
|
||||
logger.info("--> highlighting and searching on field4");
|
||||
source = searchSource().postFilter(FilterBuilders.typeFilter("type2")).query(matchPhrasePrefixQuery("field4", "a fast quick blue ca")).from(0).size(60).explain(true)
|
||||
source = searchSource().postFilter(typeFilter("type2")).query(matchPhrasePrefixQuery("field4", "a fast quick blue ca")).from(0).size(60).explain(true)
|
||||
.highlight(highlight().field("field4").order("score").preTags("<x>").postTags("</x>"));
|
||||
searchResponse = client().search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH)).actionGet();
|
||||
|
||||
|
@ -2419,6 +2420,85 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPostingsHighlighterRegexpQueryWithinConstantScoreQuery() throws Exception {
|
||||
|
||||
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
|
||||
ensureGreen();
|
||||
|
||||
client().prepareIndex("test", "type1").setSource("field1", "The photography word will get highlighted").get();
|
||||
refresh();
|
||||
|
||||
logger.info("--> highlighting and searching on field1");
|
||||
for (String rewriteMethod : REWRITE_METHODS) {
|
||||
SearchSourceBuilder source = searchSource().query(constantScoreQuery(regexpQuery("field1", "pho[a-z]+").rewrite(rewriteMethod)))
|
||||
.highlight(highlight().field("field1"));
|
||||
SearchResponse searchResponse = client().search(searchRequest("test").source(source)
|
||||
.searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
|
||||
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The <em>photography</em> word will get highlighted"));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPostingsHighlighterMultiTermQueryMultipleLevels() throws Exception {
|
||||
|
||||
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
|
||||
ensureGreen();
|
||||
|
||||
client().prepareIndex("test", "type1").setSource("field1", "The photography word will get highlighted").get();
|
||||
refresh();
|
||||
|
||||
logger.info("--> highlighting and searching on field1");
|
||||
for (String rewriteMethod : REWRITE_METHODS) {
|
||||
SearchSourceBuilder source = searchSource().query(boolQuery()
|
||||
.should(constantScoreQuery(FilterBuilders.missingFilter("field1")))
|
||||
.should(matchQuery("field1", "test"))
|
||||
.should(filteredQuery(queryString("field1:photo*").rewrite(rewriteMethod), null)))
|
||||
.highlight(highlight().field("field1"));
|
||||
SearchResponse searchResponse = client().search(searchRequest("test").source(source)
|
||||
.searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
|
||||
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The <em>photography</em> word will get highlighted"));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPostingsHighlighterPrefixQueryWithinBooleanQuery() throws Exception {
|
||||
|
||||
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
|
||||
ensureGreen();
|
||||
|
||||
client().prepareIndex("test", "type1").setSource("field1", "The photography word will get highlighted").get();
|
||||
refresh();
|
||||
|
||||
logger.info("--> highlighting and searching on field1");
|
||||
for (String rewriteMethod : REWRITE_METHODS) {
|
||||
SearchSourceBuilder source = searchSource().query(boolQuery().must(prefixQuery("field1", "photo").rewrite(rewriteMethod)).should(matchQuery("field1", "test").minimumShouldMatch("0")))
|
||||
.highlight(highlight().field("field1"));
|
||||
SearchResponse searchResponse = client().search(searchRequest("test").source(source)
|
||||
.searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
|
||||
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The <em>photography</em> word will get highlighted"));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPostingsHighlighterQueryStringWithinFilteredQuery() throws Exception {
|
||||
|
||||
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
|
||||
ensureGreen();
|
||||
|
||||
client().prepareIndex("test", "type1").setSource("field1", "The photography word will get highlighted").get();
|
||||
refresh();
|
||||
|
||||
logger.info("--> highlighting and searching on field1");
|
||||
for (String rewriteMethod : REWRITE_METHODS) {
|
||||
SearchSourceBuilder source = searchSource().query(filteredQuery(queryString("field1:photo*").rewrite(rewriteMethod), missingFilter("field_null")))
|
||||
.highlight(highlight().field("field1"));
|
||||
SearchResponse searchResponse = client().search(searchRequest("test").source(source)
|
||||
.searchType(randomBoolean() ? SearchType.DFS_QUERY_THEN_FETCH : SearchType.QUERY_THEN_FETCH)).get();
|
||||
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The <em>photography</em> word will get highlighted"));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@Slow
|
||||
public void testPostingsHighlighterManyDocs() throws Exception {
|
||||
|
|
Loading…
Reference in New Issue