diff --git a/docs/reference/search/request/highlighting.asciidoc b/docs/reference/search/request/highlighting.asciidoc index 173d1b6977f..6df2272a024 100644 --- a/docs/reference/search/request/highlighting.asciidoc +++ b/docs/reference/search/request/highlighting.asciidoc @@ -77,6 +77,9 @@ will be used instead of the plain highlighter. The fast vector highlighter: increases the size of the index * Can combine matches from multiple fields into one result. See `matched_fields` +* Can assign different weights to matches at different positions allowing + for things like phrase matches being sorted above term matches when + highlighting a Boosting Query that boosts phrase matches over term matches Here is an example of setting the `content` field to allow for highlighting using the fast vector highlighter on it (this will cause diff --git a/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java b/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java index 49a375191f1..8bf5ac58bea 100644 --- a/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java +++ b/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java @@ -25,6 +25,8 @@ import org.apache.lucene.queries.FilterClause; import org.apache.lucene.queries.TermFilter; import org.apache.lucene.search.*; import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.util.Version; +import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.common.lucene.search.XBooleanFilter; import org.elasticsearch.common.lucene.search.XFilteredQuery; @@ -66,10 +68,36 @@ public class CustomFieldQuery extends FieldQuery { @Override void flatten(Query sourceQuery, IndexReader reader, Collection flatQueries) throws IOException { - if (sourceQuery instanceof DisjunctionMaxQuery) { + assert Lucene.VERSION == Version.LUCENE_46 : "LUCENE-5361"; + if( sourceQuery instanceof BooleanQuery ){ + BooleanQuery bq = (BooleanQuery)sourceQuery; + if (bq.getBoost() == 1) { + for( BooleanClause clause : bq.getClauses() ) { + if(!clause.isProhibited()) { + flatten(clause.getQuery(), reader, flatQueries); + } + } + } else { + for( BooleanClause clause : bq.getClauses() ) { + if(!clause.isProhibited()) { + Query cloned = clause.getQuery().clone(); + cloned.setBoost(cloned.getBoost() * bq.getBoost()); + flatten(cloned, reader, flatQueries); + } + } + } + } else if (sourceQuery instanceof DisjunctionMaxQuery) { DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) sourceQuery; - for (Query query : dmq) { - flatten(query, reader, flatQueries); + if (dmq.getBoost() == 1) { + for (Query query : dmq) { + flatten(query, reader, flatQueries); + } + } else { + for (Query query : dmq) { + Query clone = query.clone(); + clone.setBoost(clone.getBoost() * dmq.getBoost()); + flatten(clone, reader, flatQueries); + } } } else if (sourceQuery instanceof SpanTermQuery) { TermQuery termQuery = new TermQuery(((SpanTermQuery) sourceQuery).getTerm()); diff --git a/src/test/java/org/elasticsearch/search/highlight/HighlighterSearchTests.java b/src/test/java/org/elasticsearch/search/highlight/HighlighterSearchTests.java index 7974fbadaef..942461b9a27 100644 --- a/src/test/java/org/elasticsearch/search/highlight/HighlighterSearchTests.java +++ b/src/test/java/org/elasticsearch/search/highlight/HighlighterSearchTests.java @@ -2494,4 +2494,82 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest { .addHighlightedField("field1").setHighlighterRequireFieldMatch(true).get(); assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("First sentence. Second sentence.")); } + + @Test + public void testFastVectorHighlighterPhraseBoost() throws Exception { + assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1TermVectorMapping())); + phraseBoostTestCase("fvh"); + } + + @Test + public void testPostingsHighlighterPhraseBoost() throws Exception { + assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping())); + phraseBoostTestCase("postings"); + } + + /** + * Test phrase boosting over normal term matches. Note that this will never pass with the plain highlighter + * because it doesn't support the concept of terms having a different weight based on position. + * @param highlighterType highlighter to test + */ + private void phraseBoostTestCase(String highlighterType) { + ensureGreen(); + StringBuilder text = new StringBuilder(); + text.append("words words junk junk junk junk junk junk junk junk highlight junk junk junk junk together junk\n"); + for (int i = 0; i<10; i++) { + text.append("junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk\n"); + } + text.append("highlight words together\n"); + for (int i = 0; i<10; i++) { + text.append("junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk\n"); + } + index("test", "type1", "1", "field1", text.toString()); + refresh(); + + // Match queries + phraseBoostTestCaseForClauses(highlighterType, 100f, + matchQuery("field1", "highlight words together"), + matchPhraseQuery("field1", "highlight words together")); + + // Query string with a single field + phraseBoostTestCaseForClauses(highlighterType, 100f, + queryString("highlight words together").field("field1"), + queryString("\"highlight words together\"").field("field1").autoGeneratePhraseQueries(true)); + + // Query string with a single field without dismax + phraseBoostTestCaseForClauses(highlighterType, 100f, + queryString("highlight words together").field("field1").useDisMax(false), + queryString("\"highlight words together\"").field("field1").useDisMax(false).autoGeneratePhraseQueries(true)); + + // Query string with more than one field + phraseBoostTestCaseForClauses(highlighterType, 100f, + queryString("highlight words together").field("field1").field("field2"), + queryString("\"highlight words together\"").field("field1").field("field2").autoGeneratePhraseQueries(true)); + + // Query string boosting the field + phraseBoostTestCaseForClauses(highlighterType, 1f, + queryString("highlight words together").field("field1"), + queryString("\"highlight words together\"").field("field1^100").autoGeneratePhraseQueries(true)); + } + + private

void + phraseBoostTestCaseForClauses(String highlighterType, float boost, QueryBuilder terms, P phrase) { + Matcher highlightedMatcher = either(containsString("highlight words together")).or( + containsString("highlight words together")); + SearchRequestBuilder search = client().prepareSearch("test").setHighlighterRequireFieldMatch(true) + .setHighlighterOrder("score").setHighlighterType(highlighterType) + .addHighlightedField("field1", 100, 1); + + // Try with a bool query + phrase.boost(boost); + SearchResponse response = search.setQuery(boolQuery().must(terms).should(phrase)).get(); + assertHighlight(response, 0, "field1", 0, 1, highlightedMatcher); + phrase.boost(1); + // Try with a boosting query + response = search.setQuery(boostingQuery().positive(phrase).negative(terms).boost(boost).negativeBoost(1)).get(); + assertHighlight(response, 0, "field1", 0, 1, highlightedMatcher); + // Try with a boosting query using a negative boost + response = search.setQuery(boostingQuery().positive(phrase).negative(terms).boost(1).negativeBoost(1/boost)).get(); + assertHighlight(response, 0, "field1", 0, 1, highlightedMatcher); + } }