Stop FVH from throwing away some query boosts

The FVH was throwing away some boosts on queries stopping a number of ways to boost phrase matches to the top of the list of fragments from working. The plain highlighter also doesn't work for this but that is because it doesn't support the concept of the same term having a different score at different positions. Also update documentation claiming that FHV is nicer for weighing terms found by query combinations. Closes #4351
2013-12-05 12:56:39 -05:00 · 2013-12-05 12:56:39 -05:00 · 8bd9e34e39
parent 522d620eb6
commit 8bd9e34e39
3 changed files with 112 additions and 3 deletions
--- a/docs/reference/search/request/highlighting.asciidoc
+++ b/docs/reference/search/request/highlighting.asciidoc
@ -77,6 +77,9 @@ will be used instead of the plain highlighter.  The fast vector highlighter:
  increases the size of the index
 * Can combine matches from multiple fields into one result.  See
  `matched_fields`
 * Can assign different weights to matches at different positions allowing
  for things like phrase matches being sorted above term matches when
  highlighting a Boosting Query that boosts phrase matches over term matches
 Here is an example of setting the `content` field to allow for
 highlighting using the fast vector highlighter on it (this will cause
--- a/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java
+++ b/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java
@ -25,6 +25,8 @@ import org.apache.lucene.queries.FilterClause;
 import org.apache.lucene.queries.TermFilter;
 import org.apache.lucene.search.*;
 import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.util.Version;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
 import org.elasticsearch.common.lucene.search.XBooleanFilter;
 import org.elasticsearch.common.lucene.search.XFilteredQuery;
@ -66,10 +68,36 @@ public class CustomFieldQuery extends FieldQuery {
    @Override
    void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries) throws IOException {
-        if (sourceQuery instanceof DisjunctionMaxQuery) {
+        assert Lucene.VERSION == Version.LUCENE_46 : "LUCENE-5361";
        if( sourceQuery instanceof BooleanQuery ){
            BooleanQuery bq = (BooleanQuery)sourceQuery;
            if (bq.getBoost() == 1) {
                for( BooleanClause clause : bq.getClauses() ) {
                    if(!clause.isProhibited()) {
                        flatten(clause.getQuery(), reader, flatQueries);
                    }
                }
            } else {
                for( BooleanClause clause : bq.getClauses() ) {
                    if(!clause.isProhibited()) {
                        Query cloned = clause.getQuery().clone();
                        cloned.setBoost(cloned.getBoost() * bq.getBoost());
                        flatten(cloned, reader, flatQueries);
                    }
                }
            }
        } else if (sourceQuery instanceof DisjunctionMaxQuery) {
            DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) sourceQuery;
-            for (Query query : dmq) {
+            if (dmq.getBoost() == 1) {
-                flatten(query, reader, flatQueries);
+                for (Query query : dmq) {
                    flatten(query, reader, flatQueries);
                }
            } else {
                for (Query query : dmq) {
                    Query clone = query.clone();
                    clone.setBoost(clone.getBoost() * dmq.getBoost());
                    flatten(clone, reader, flatQueries);
                }
            }
        } else if (sourceQuery instanceof SpanTermQuery) {
            TermQuery termQuery = new TermQuery(((SpanTermQuery) sourceQuery).getTerm());
--- a/src/test/java/org/elasticsearch/search/highlight/HighlighterSearchTests.java
+++ b/src/test/java/org/elasticsearch/search/highlight/HighlighterSearchTests.java
@ -2494,4 +2494,82 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
                .addHighlightedField("field1").setHighlighterRequireFieldMatch(true).get();
        assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("<em>First</em> sentence. Second sentence."));
    }
    @Test
    public void testFastVectorHighlighterPhraseBoost() throws Exception {
        assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1TermVectorMapping()));
        phraseBoostTestCase("fvh");
    }
    @Test
    public void testPostingsHighlighterPhraseBoost() throws Exception {
        assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
        phraseBoostTestCase("postings");
    }
    /**
     * Test phrase boosting over normal term matches.  Note that this will never pass with the plain highlighter
     * because it doesn't support the concept of terms having a different weight based on position.
     * @param highlighterType highlighter to test
     */
    private void phraseBoostTestCase(String highlighterType) {
        ensureGreen();
        StringBuilder text = new StringBuilder();
        text.append("words words junk junk junk junk junk junk junk junk highlight junk junk junk junk together junk\n");
        for (int i = 0; i<10; i++) {
            text.append("junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk\n");
        }
        text.append("highlight words together\n");
        for (int i = 0; i<10; i++) {
            text.append("junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk\n");
        }
        index("test", "type1", "1", "field1", text.toString());
        refresh();
        // Match queries
        phraseBoostTestCaseForClauses(highlighterType, 100f,
                matchQuery("field1", "highlight words together"),
                matchPhraseQuery("field1", "highlight words together"));
        // Query string with a single field
        phraseBoostTestCaseForClauses(highlighterType, 100f,
                queryString("highlight words together").field("field1"),
                queryString("\"highlight words together\"").field("field1").autoGeneratePhraseQueries(true));
        // Query string with a single field without dismax
        phraseBoostTestCaseForClauses(highlighterType, 100f,
                queryString("highlight words together").field("field1").useDisMax(false),
                queryString("\"highlight words together\"").field("field1").useDisMax(false).autoGeneratePhraseQueries(true));
        // Query string with more than one field
        phraseBoostTestCaseForClauses(highlighterType, 100f,
                queryString("highlight words together").field("field1").field("field2"),
                queryString("\"highlight words together\"").field("field1").field("field2").autoGeneratePhraseQueries(true));
        // Query string boosting the field
        phraseBoostTestCaseForClauses(highlighterType, 1f,
                queryString("highlight words together").field("field1"),
                queryString("\"highlight words together\"").field("field1^100").autoGeneratePhraseQueries(true));
    }
    private <P extends QueryBuilder & BoostableQueryBuilder> void
            phraseBoostTestCaseForClauses(String highlighterType, float boost, QueryBuilder terms, P phrase) {
        Matcher<String> highlightedMatcher = either(containsString("<em>highlight words together</em>")).or(
                containsString("<em>highlight</em> <em>words</em> <em>together</em>"));
        SearchRequestBuilder search = client().prepareSearch("test").setHighlighterRequireFieldMatch(true)
                .setHighlighterOrder("score").setHighlighterType(highlighterType)
                .addHighlightedField("field1", 100, 1);
        // Try with a bool query
        phrase.boost(boost);
        SearchResponse response = search.setQuery(boolQuery().must(terms).should(phrase)).get();
        assertHighlight(response, 0, "field1", 0, 1, highlightedMatcher);
        phrase.boost(1);
        // Try with a boosting query
        response = search.setQuery(boostingQuery().positive(phrase).negative(terms).boost(boost).negativeBoost(1)).get();
        assertHighlight(response, 0, "field1", 0, 1, highlightedMatcher);
        // Try with a boosting query using a negative boost
        response = search.setQuery(boostingQuery().positive(phrase).negative(terms).boost(1).negativeBoost(1/boost)).get();
        assertHighlight(response, 0, "field1", 0, 1, highlightedMatcher);
    }
 }