Merge pull request #12294 from jpountz/fix/multi_match_boost

`multi_match` query applies boosts too many times.
This commit is contained in:
Adrien Grand 2015-08-06 19:07:44 +02:00
commit 00093a21dc
4 changed files with 60 additions and 14 deletions

View File

@ -33,6 +33,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
import java.util.Arrays;
@ -62,13 +63,17 @@ import java.util.List;
public abstract class BlendedTermQuery extends Query {
private final Term[] terms;
private final float[] boosts;
public BlendedTermQuery(Term[] terms) {
public BlendedTermQuery(Term[] terms, float[] boosts) {
if (terms == null || terms.length == 0) {
throw new IllegalArgumentException("terms must not be null or empty");
}
if (boosts != null && boosts.length != terms.length) {
throw new IllegalArgumentException("boosts must have the same size as terms");
}
this.terms = terms;
this.boosts = boosts;
}
@Override
@ -231,8 +236,22 @@ public abstract class BlendedTermQuery extends Query {
@Override
public String toString(String field) {
return "blended(terms: " + Arrays.toString(terms) + ")";
StringBuilder builder = new StringBuilder("blended(terms:[");
for (int i = 0; i < terms.length; ++i) {
builder.append(terms[i]);
float boost = 1f;
if (boosts != null) {
boost = boosts[i];
}
builder.append(ToStringUtils.boost(boost));
builder.append(", ");
}
if (terms.length > 0) {
builder.setLength(builder.length() - 2);
}
builder.append("])");
builder.append(ToStringUtils.boost(getBoost()));
return builder.toString();
}
private volatile Term[] equalTerms = null;
@ -277,7 +296,7 @@ public abstract class BlendedTermQuery extends Query {
}
public static BlendedTermQuery booleanBlendedQuery(Term[] terms, final float[] boosts, final boolean disableCoord) {
return new BlendedTermQuery(terms) {
return new BlendedTermQuery(terms, boosts) {
@Override
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
BooleanQuery query = new BooleanQuery(disableCoord);
@ -294,7 +313,7 @@ public abstract class BlendedTermQuery extends Query {
}
public static BlendedTermQuery commonTermsBlendedQuery(Term[] terms, final float[] boosts, final boolean disableCoord, final float maxTermFrequency) {
return new BlendedTermQuery(terms) {
return new BlendedTermQuery(terms, boosts) {
@Override
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
BooleanQuery query = new BooleanQuery(true);
@ -334,7 +353,7 @@ public abstract class BlendedTermQuery extends Query {
}
public static BlendedTermQuery dismaxBlendedQuery(Term[] terms, final float[] boosts, final float tieBreakerMultiplier) {
return new BlendedTermQuery(terms) {
return new BlendedTermQuery(terms, boosts) {
@Override
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
DisjunctionMaxQuery query = new DisjunctionMaxQuery(tieBreakerMultiplier);

View File

@ -85,7 +85,7 @@ public class MultiMatchQuery extends MatchQuery {
throw new IllegalStateException("No such type: " + type);
}
final List<? extends Query> queries = queryBuilder.buildGroupedQueries(type, fieldNames, value, minimumShouldMatch);
return queryBuilder.conbineGrouped(queries);
return queryBuilder.combineGrouped(queries);
}
private QueryBuilder queryBuilder;
@ -119,7 +119,7 @@ public class MultiMatchQuery extends MatchQuery {
return parseAndApply(type, field, value, minimumShouldMatch, boostValue);
}
public Query conbineGrouped(List<? extends Query> groupQuery) {
public Query combineGrouped(List<? extends Query> groupQuery) {
if (groupQuery == null || groupQuery.isEmpty()) {
return null;
}
@ -196,7 +196,7 @@ public class MultiMatchQuery extends MatchQuery {
blendedFields = null;
}
final FieldAndFieldType fieldAndFieldType = group.get(0);
Query q = parseGroup(type.matchQueryType(), fieldAndFieldType.field, fieldAndFieldType.boost, value, minimumShouldMatch);
Query q = parseGroup(type.matchQueryType(), fieldAndFieldType.field, 1f, value, minimumShouldMatch);
if (q != null) {
queries.add(q);
}

View File

@ -54,6 +54,7 @@ import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.mapper.core.NumberFieldMapper;
@ -83,6 +84,7 @@ import static org.hamcrest.Matchers.*;
public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
private IndexQueryParserService queryParser;
private IndexService indexService;
@Before
public void setup() throws IOException {
@ -99,6 +101,7 @@ public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
assertNotNull(doc.dynamicMappingsUpdate());
client().admin().indices().preparePutMapping("test").setType("person").setSource(doc.dynamicMappingsUpdate().toString()).get();
this.indexService = indexService;
queryParser = indexService.queryParserService();
}
@ -2269,6 +2272,23 @@ public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
assertThat(parsedQuery, instanceOf(BooleanQuery.class));
}
public void testCrossFieldMultiMatchQuery() throws IOException {
IndexQueryParserService queryParser = queryParser();
Query parsedQuery = queryParser.parse(multiMatchQuery("banon", "name.first^2", "name.last^3", "foobar").type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)).query();
try (Engine.Searcher searcher = indexService.shardSafe(0).acquireSearcher("test")) {
Query rewrittenQuery = searcher.searcher().rewrite(parsedQuery);
BooleanQuery expected = new BooleanQuery();
expected.add(new TermQuery(new Term("foobar", "banon")), Occur.SHOULD);
TermQuery tq1 = new TermQuery(new Term("name.first", "banon"));
tq1.setBoost(2);
TermQuery tq2 = new TermQuery(new Term("name.last", "banon"));
tq2.setBoost(3);
expected.add(new DisjunctionMaxQuery(Arrays.<Query>asList(tq1, tq2), 0f), Occur.SHOULD);
assertEquals(expected, rewrittenQuery);
}
}
@Test
public void testSimpleQueryString() throws Exception {
IndexQueryParserService queryParser = queryParser();

View File

@ -302,10 +302,17 @@ document to match. (Compare this to
That solves one of the two problems. The problem of differing term frequencies
is solved by _blending_ the term frequencies for all fields in order to even
out the differences. In other words, `first_name:smith` will be treated as
though it has the same weight as `last_name:smith`. (Actually,
`last_name:smith` is given a tiny advantage over `first_name:smith`, just to
make the order of results more stable.)
out the differences.
In practice, `first_name:smith` will be treated as though it has the same
frequencies as `last_name:smith`, plus one. This will make matches on
`first_name` and `last_name` have comparable scores, with a tiny advantage
for `last_name` since it is the most likely field that contains `smith`.
Note that `cross_fields` is usually only useful on short string fields
that all have a `boost` of `1`. Otherwise boosts, term freqs and length
normalization contribute to the score in such a way that the blending of term
statistics is not meaningful anymore.
If you run the above query through the <<search-validate>>, it returns this
explanation: