Stop FVH from throwing away some query boosts
The FVH was throwing away some boosts on queries stopping a number of ways to boost phrase matches to the top of the list of fragments from working. The plain highlighter also doesn't work for this but that is because it doesn't support the concept of the same term having a different score at different positions. Also update documentation claiming that FHV is nicer for weighing terms found by query combinations. Closes #4351
This commit is contained in:
parent
522d620eb6
commit
8bd9e34e39
|
@ -77,6 +77,9 @@ will be used instead of the plain highlighter. The fast vector highlighter:
|
||||||
increases the size of the index
|
increases the size of the index
|
||||||
* Can combine matches from multiple fields into one result. See
|
* Can combine matches from multiple fields into one result. See
|
||||||
`matched_fields`
|
`matched_fields`
|
||||||
|
* Can assign different weights to matches at different positions allowing
|
||||||
|
for things like phrase matches being sorted above term matches when
|
||||||
|
highlighting a Boosting Query that boosts phrase matches over term matches
|
||||||
|
|
||||||
Here is an example of setting the `content` field to allow for
|
Here is an example of setting the `content` field to allow for
|
||||||
highlighting using the fast vector highlighter on it (this will cause
|
highlighting using the fast vector highlighter on it (this will cause
|
||||||
|
|
|
@ -25,6 +25,8 @@ import org.apache.lucene.queries.FilterClause;
|
||||||
import org.apache.lucene.queries.TermFilter;
|
import org.apache.lucene.queries.TermFilter;
|
||||||
import org.apache.lucene.search.*;
|
import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
|
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
|
||||||
import org.elasticsearch.common.lucene.search.XBooleanFilter;
|
import org.elasticsearch.common.lucene.search.XBooleanFilter;
|
||||||
import org.elasticsearch.common.lucene.search.XFilteredQuery;
|
import org.elasticsearch.common.lucene.search.XFilteredQuery;
|
||||||
|
@ -66,10 +68,36 @@ public class CustomFieldQuery extends FieldQuery {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries) throws IOException {
|
void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries) throws IOException {
|
||||||
if (sourceQuery instanceof DisjunctionMaxQuery) {
|
assert Lucene.VERSION == Version.LUCENE_46 : "LUCENE-5361";
|
||||||
|
if( sourceQuery instanceof BooleanQuery ){
|
||||||
|
BooleanQuery bq = (BooleanQuery)sourceQuery;
|
||||||
|
if (bq.getBoost() == 1) {
|
||||||
|
for( BooleanClause clause : bq.getClauses() ) {
|
||||||
|
if(!clause.isProhibited()) {
|
||||||
|
flatten(clause.getQuery(), reader, flatQueries);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for( BooleanClause clause : bq.getClauses() ) {
|
||||||
|
if(!clause.isProhibited()) {
|
||||||
|
Query cloned = clause.getQuery().clone();
|
||||||
|
cloned.setBoost(cloned.getBoost() * bq.getBoost());
|
||||||
|
flatten(cloned, reader, flatQueries);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (sourceQuery instanceof DisjunctionMaxQuery) {
|
||||||
DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) sourceQuery;
|
DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) sourceQuery;
|
||||||
for (Query query : dmq) {
|
if (dmq.getBoost() == 1) {
|
||||||
flatten(query, reader, flatQueries);
|
for (Query query : dmq) {
|
||||||
|
flatten(query, reader, flatQueries);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (Query query : dmq) {
|
||||||
|
Query clone = query.clone();
|
||||||
|
clone.setBoost(clone.getBoost() * dmq.getBoost());
|
||||||
|
flatten(clone, reader, flatQueries);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else if (sourceQuery instanceof SpanTermQuery) {
|
} else if (sourceQuery instanceof SpanTermQuery) {
|
||||||
TermQuery termQuery = new TermQuery(((SpanTermQuery) sourceQuery).getTerm());
|
TermQuery termQuery = new TermQuery(((SpanTermQuery) sourceQuery).getTerm());
|
||||||
|
|
|
@ -2494,4 +2494,82 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
|
||||||
.addHighlightedField("field1").setHighlighterRequireFieldMatch(true).get();
|
.addHighlightedField("field1").setHighlighterRequireFieldMatch(true).get();
|
||||||
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("<em>First</em> sentence. Second sentence."));
|
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("<em>First</em> sentence. Second sentence."));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFastVectorHighlighterPhraseBoost() throws Exception {
|
||||||
|
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1TermVectorMapping()));
|
||||||
|
phraseBoostTestCase("fvh");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPostingsHighlighterPhraseBoost() throws Exception {
|
||||||
|
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
|
||||||
|
phraseBoostTestCase("postings");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test phrase boosting over normal term matches. Note that this will never pass with the plain highlighter
|
||||||
|
* because it doesn't support the concept of terms having a different weight based on position.
|
||||||
|
* @param highlighterType highlighter to test
|
||||||
|
*/
|
||||||
|
private void phraseBoostTestCase(String highlighterType) {
|
||||||
|
ensureGreen();
|
||||||
|
StringBuilder text = new StringBuilder();
|
||||||
|
text.append("words words junk junk junk junk junk junk junk junk highlight junk junk junk junk together junk\n");
|
||||||
|
for (int i = 0; i<10; i++) {
|
||||||
|
text.append("junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk\n");
|
||||||
|
}
|
||||||
|
text.append("highlight words together\n");
|
||||||
|
for (int i = 0; i<10; i++) {
|
||||||
|
text.append("junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk junk\n");
|
||||||
|
}
|
||||||
|
index("test", "type1", "1", "field1", text.toString());
|
||||||
|
refresh();
|
||||||
|
|
||||||
|
// Match queries
|
||||||
|
phraseBoostTestCaseForClauses(highlighterType, 100f,
|
||||||
|
matchQuery("field1", "highlight words together"),
|
||||||
|
matchPhraseQuery("field1", "highlight words together"));
|
||||||
|
|
||||||
|
// Query string with a single field
|
||||||
|
phraseBoostTestCaseForClauses(highlighterType, 100f,
|
||||||
|
queryString("highlight words together").field("field1"),
|
||||||
|
queryString("\"highlight words together\"").field("field1").autoGeneratePhraseQueries(true));
|
||||||
|
|
||||||
|
// Query string with a single field without dismax
|
||||||
|
phraseBoostTestCaseForClauses(highlighterType, 100f,
|
||||||
|
queryString("highlight words together").field("field1").useDisMax(false),
|
||||||
|
queryString("\"highlight words together\"").field("field1").useDisMax(false).autoGeneratePhraseQueries(true));
|
||||||
|
|
||||||
|
// Query string with more than one field
|
||||||
|
phraseBoostTestCaseForClauses(highlighterType, 100f,
|
||||||
|
queryString("highlight words together").field("field1").field("field2"),
|
||||||
|
queryString("\"highlight words together\"").field("field1").field("field2").autoGeneratePhraseQueries(true));
|
||||||
|
|
||||||
|
// Query string boosting the field
|
||||||
|
phraseBoostTestCaseForClauses(highlighterType, 1f,
|
||||||
|
queryString("highlight words together").field("field1"),
|
||||||
|
queryString("\"highlight words together\"").field("field1^100").autoGeneratePhraseQueries(true));
|
||||||
|
}
|
||||||
|
|
||||||
|
private <P extends QueryBuilder & BoostableQueryBuilder> void
|
||||||
|
phraseBoostTestCaseForClauses(String highlighterType, float boost, QueryBuilder terms, P phrase) {
|
||||||
|
Matcher<String> highlightedMatcher = either(containsString("<em>highlight words together</em>")).or(
|
||||||
|
containsString("<em>highlight</em> <em>words</em> <em>together</em>"));
|
||||||
|
SearchRequestBuilder search = client().prepareSearch("test").setHighlighterRequireFieldMatch(true)
|
||||||
|
.setHighlighterOrder("score").setHighlighterType(highlighterType)
|
||||||
|
.addHighlightedField("field1", 100, 1);
|
||||||
|
|
||||||
|
// Try with a bool query
|
||||||
|
phrase.boost(boost);
|
||||||
|
SearchResponse response = search.setQuery(boolQuery().must(terms).should(phrase)).get();
|
||||||
|
assertHighlight(response, 0, "field1", 0, 1, highlightedMatcher);
|
||||||
|
phrase.boost(1);
|
||||||
|
// Try with a boosting query
|
||||||
|
response = search.setQuery(boostingQuery().positive(phrase).negative(terms).boost(boost).negativeBoost(1)).get();
|
||||||
|
assertHighlight(response, 0, "field1", 0, 1, highlightedMatcher);
|
||||||
|
// Try with a boosting query using a negative boost
|
||||||
|
response = search.setQuery(boostingQuery().positive(phrase).negative(terms).boost(1).negativeBoost(1/boost)).get();
|
||||||
|
assertHighlight(response, 0, "field1", 0, 1, highlightedMatcher);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue