Stop FVH from throwing away some query boosts

The FVH was throwing away some boosts on queries stopping a number of
ways to boost phrase matches to the top of the list of fragments from
working.

The plain highlighter also doesn't work for this but that is because it
doesn't support the concept of the same term having a different score at
different positions.

Also update documentation claiming that FHV is nicer for weighing terms
found by query combinations.

Closes #4351
This commit is contained in:
Nik Everett 2013-12-05 12:56:39 -05:00 committed by Adrien Grand
parent 522d620eb6
commit 8bd9e34e39
3 changed files with 112 additions and 3 deletions

View File

@ -77,6 +77,9 @@ will be used instead of the plain highlighter. The fast vector highlighter:
increases the size of the index increases the size of the index
* Can combine matches from multiple fields into one result. See * Can combine matches from multiple fields into one result. See
`matched_fields` `matched_fields`
* Can assign different weights to matches at different positions allowing
for things like phrase matches being sorted above term matches when
highlighting a Boosting Query that boosts phrase matches over term matches
Here is an example of setting the `content` field to allow for Here is an example of setting the `content` field to allow for
highlighting using the fast vector highlighter on it (this will cause highlighting using the fast vector highlighter on it (this will cause

View File

@ -25,6 +25,8 @@ import org.apache.lucene.queries.FilterClause;
import org.apache.lucene.queries.TermFilter; import org.apache.lucene.queries.TermFilter;
import org.apache.lucene.search.*; import org.apache.lucene.search.*;
import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.elasticsearch.common.lucene.search.XBooleanFilter; import org.elasticsearch.common.lucene.search.XBooleanFilter;
import org.elasticsearch.common.lucene.search.XFilteredQuery; import org.elasticsearch.common.lucene.search.XFilteredQuery;
@ -66,10 +68,36 @@ public class CustomFieldQuery extends FieldQuery {
@Override @Override
void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries) throws IOException { void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries) throws IOException {
if (sourceQuery instanceof DisjunctionMaxQuery) { assert Lucene.VERSION == Version.LUCENE_46 : "LUCENE-5361";
if( sourceQuery instanceof BooleanQuery ){
BooleanQuery bq = (BooleanQuery)sourceQuery;
if (bq.getBoost() == 1) {
for( BooleanClause clause : bq.getClauses() ) {
if(!clause.isProhibited()) {
flatten(clause.getQuery(), reader, flatQueries);
}
}
} else {
for( BooleanClause clause : bq.getClauses() ) {
if(!clause.isProhibited()) {
Query cloned = clause.getQuery().clone();
cloned.setBoost(cloned.getBoost() * bq.getBoost());
flatten(cloned, reader, flatQueries);
}
}
}
} else if (sourceQuery instanceof DisjunctionMaxQuery) {
DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) sourceQuery; DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) sourceQuery;
for (Query query : dmq) { if (dmq.getBoost() == 1) {
flatten(query, reader, flatQueries); for (Query query : dmq) {
flatten(query, reader, flatQueries);
}
} else {
for (Query query : dmq) {
Query clone = query.clone();
clone.setBoost(clone.getBoost() * dmq.getBoost());
flatten(clone, reader, flatQueries);
}
} }
} else if (sourceQuery instanceof SpanTermQuery) { } else if (sourceQuery instanceof SpanTermQuery) {
TermQuery termQuery = new TermQuery(((SpanTermQuery) sourceQuery).getTerm()); TermQuery termQuery = new TermQuery(((SpanTermQuery) sourceQuery).getTerm());

View File

@ -2494,4 +2494,82 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
.addHighlightedField("field1").setHighlighterRequireFieldMatch(true).get(); .addHighlightedField("field1").setHighlighterRequireFieldMatch(true).get();
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("<em>First</em> sentence. Second sentence.")); assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("<em>First</em> sentence. Second sentence."));
} }
@Test
public void testFastVectorHighlighterPhraseBoost() throws Exception {
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1TermVectorMapping()));
phraseBoostTestCase("fvh");
}
@Test
public void testPostingsHighlighterPhraseBoost() throws Exception {
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
phraseBoostTestCase("postings");
}
/**
* Test phrase boosting over normal term matches. Note that this will never pass with the plain highlighter
* because it doesn't support the concept of terms having a different weight based on position.
* @param highlighterType highlighter to test
*/
private void phraseBoostTestCase(String highlighterType) {
ensureGreen();
StringBuilder text = new StringBuilder();
text.append("words words junk junk junk junk junk junk junk junk highlight junk junk junk junk together junk\n");
for (int i = 0; i<10; i++) {
text.append("junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk\n");
}
text.append("highlight words together\n");
for (int i = 0; i<10; i++) {
text.append("junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk\n");
}
index("test", "type1", "1", "field1", text.toString());
refresh();
// Match queries
phraseBoostTestCaseForClauses(highlighterType, 100f,
matchQuery("field1", "highlight words together"),
matchPhraseQuery("field1", "highlight words together"));
// Query string with a single field
phraseBoostTestCaseForClauses(highlighterType, 100f,
queryString("highlight words together").field("field1"),
queryString("\"highlight words together\"").field("field1").autoGeneratePhraseQueries(true));
// Query string with a single field without dismax
phraseBoostTestCaseForClauses(highlighterType, 100f,
queryString("highlight words together").field("field1").useDisMax(false),
queryString("\"highlight words together\"").field("field1").useDisMax(false).autoGeneratePhraseQueries(true));
// Query string with more than one field
phraseBoostTestCaseForClauses(highlighterType, 100f,
queryString("highlight words together").field("field1").field("field2"),
queryString("\"highlight words together\"").field("field1").field("field2").autoGeneratePhraseQueries(true));
// Query string boosting the field
phraseBoostTestCaseForClauses(highlighterType, 1f,
queryString("highlight words together").field("field1"),
queryString("\"highlight words together\"").field("field1^100").autoGeneratePhraseQueries(true));
}
private <P extends QueryBuilder & BoostableQueryBuilder> void
phraseBoostTestCaseForClauses(String highlighterType, float boost, QueryBuilder terms, P phrase) {
Matcher<String> highlightedMatcher = either(containsString("<em>highlight words together</em>")).or(
containsString("<em>highlight</em> <em>words</em> <em>together</em>"));
SearchRequestBuilder search = client().prepareSearch("test").setHighlighterRequireFieldMatch(true)
.setHighlighterOrder("score").setHighlighterType(highlighterType)
.addHighlightedField("field1", 100, 1);
// Try with a bool query
phrase.boost(boost);
SearchResponse response = search.setQuery(boolQuery().must(terms).should(phrase)).get();
assertHighlight(response, 0, "field1", 0, 1, highlightedMatcher);
phrase.boost(1);
// Try with a boosting query
response = search.setQuery(boostingQuery().positive(phrase).negative(terms).boost(boost).negativeBoost(1)).get();
assertHighlight(response, 0, "field1", 0, 1, highlightedMatcher);
// Try with a boosting query using a negative boost
response = search.setQuery(boostingQuery().positive(phrase).negative(terms).boost(1).negativeBoost(1/boost)).get();
assertHighlight(response, 0, "field1", 0, 1, highlightedMatcher);
}
} }