Merge pull request #12294 from jpountz/fix/multi_match_boost

`multi_match` query applies boosts too many times.
This commit is contained in:
Adrien Grand 2015-08-06 19:07:44 +02:00
commit 00093a21dc
4 changed files with 60 additions and 14 deletions

View File

@ -33,6 +33,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.InPlaceMergeSorter; import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
@ -62,13 +63,17 @@ import java.util.List;
public abstract class BlendedTermQuery extends Query { public abstract class BlendedTermQuery extends Query {
private final Term[] terms; private final Term[] terms;
private final float[] boosts;
public BlendedTermQuery(Term[] terms, float[] boosts) {
public BlendedTermQuery(Term[] terms) {
if (terms == null || terms.length == 0) { if (terms == null || terms.length == 0) {
throw new IllegalArgumentException("terms must not be null or empty"); throw new IllegalArgumentException("terms must not be null or empty");
} }
if (boosts != null && boosts.length != terms.length) {
throw new IllegalArgumentException("boosts must have the same size as terms");
}
this.terms = terms; this.terms = terms;
this.boosts = boosts;
} }
@Override @Override
@ -231,8 +236,22 @@ public abstract class BlendedTermQuery extends Query {
@Override @Override
public String toString(String field) { public String toString(String field) {
return "blended(terms: " + Arrays.toString(terms) + ")"; StringBuilder builder = new StringBuilder("blended(terms:[");
for (int i = 0; i < terms.length; ++i) {
builder.append(terms[i]);
float boost = 1f;
if (boosts != null) {
boost = boosts[i];
}
builder.append(ToStringUtils.boost(boost));
builder.append(", ");
}
if (terms.length > 0) {
builder.setLength(builder.length() - 2);
}
builder.append("])");
builder.append(ToStringUtils.boost(getBoost()));
return builder.toString();
} }
private volatile Term[] equalTerms = null; private volatile Term[] equalTerms = null;
@ -277,7 +296,7 @@ public abstract class BlendedTermQuery extends Query {
} }
public static BlendedTermQuery booleanBlendedQuery(Term[] terms, final float[] boosts, final boolean disableCoord) { public static BlendedTermQuery booleanBlendedQuery(Term[] terms, final float[] boosts, final boolean disableCoord) {
return new BlendedTermQuery(terms) { return new BlendedTermQuery(terms, boosts) {
@Override @Override
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) { protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
BooleanQuery query = new BooleanQuery(disableCoord); BooleanQuery query = new BooleanQuery(disableCoord);
@ -294,7 +313,7 @@ public abstract class BlendedTermQuery extends Query {
} }
public static BlendedTermQuery commonTermsBlendedQuery(Term[] terms, final float[] boosts, final boolean disableCoord, final float maxTermFrequency) { public static BlendedTermQuery commonTermsBlendedQuery(Term[] terms, final float[] boosts, final boolean disableCoord, final float maxTermFrequency) {
return new BlendedTermQuery(terms) { return new BlendedTermQuery(terms, boosts) {
@Override @Override
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) { protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
BooleanQuery query = new BooleanQuery(true); BooleanQuery query = new BooleanQuery(true);
@ -334,7 +353,7 @@ public abstract class BlendedTermQuery extends Query {
} }
public static BlendedTermQuery dismaxBlendedQuery(Term[] terms, final float[] boosts, final float tieBreakerMultiplier) { public static BlendedTermQuery dismaxBlendedQuery(Term[] terms, final float[] boosts, final float tieBreakerMultiplier) {
return new BlendedTermQuery(terms) { return new BlendedTermQuery(terms, boosts) {
@Override @Override
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) { protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
DisjunctionMaxQuery query = new DisjunctionMaxQuery(tieBreakerMultiplier); DisjunctionMaxQuery query = new DisjunctionMaxQuery(tieBreakerMultiplier);

View File

@ -85,7 +85,7 @@ public class MultiMatchQuery extends MatchQuery {
throw new IllegalStateException("No such type: " + type); throw new IllegalStateException("No such type: " + type);
} }
final List<? extends Query> queries = queryBuilder.buildGroupedQueries(type, fieldNames, value, minimumShouldMatch); final List<? extends Query> queries = queryBuilder.buildGroupedQueries(type, fieldNames, value, minimumShouldMatch);
return queryBuilder.conbineGrouped(queries); return queryBuilder.combineGrouped(queries);
} }
private QueryBuilder queryBuilder; private QueryBuilder queryBuilder;
@ -119,7 +119,7 @@ public class MultiMatchQuery extends MatchQuery {
return parseAndApply(type, field, value, minimumShouldMatch, boostValue); return parseAndApply(type, field, value, minimumShouldMatch, boostValue);
} }
public Query conbineGrouped(List<? extends Query> groupQuery) { public Query combineGrouped(List<? extends Query> groupQuery) {
if (groupQuery == null || groupQuery.isEmpty()) { if (groupQuery == null || groupQuery.isEmpty()) {
return null; return null;
} }
@ -196,7 +196,7 @@ public class MultiMatchQuery extends MatchQuery {
blendedFields = null; blendedFields = null;
} }
final FieldAndFieldType fieldAndFieldType = group.get(0); final FieldAndFieldType fieldAndFieldType = group.get(0);
Query q = parseGroup(type.matchQueryType(), fieldAndFieldType.field, fieldAndFieldType.boost, value, minimumShouldMatch); Query q = parseGroup(type.matchQueryType(), fieldAndFieldType.field, 1f, value, minimumShouldMatch);
if (q != null) { if (q != null) {
queries.add(q); queries.add(q);
} }

View File

@ -54,6 +54,7 @@ import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.IndexService; import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.mapper.core.NumberFieldMapper; import org.elasticsearch.index.mapper.core.NumberFieldMapper;
@ -83,6 +84,7 @@ import static org.hamcrest.Matchers.*;
public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase { public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
private IndexQueryParserService queryParser; private IndexQueryParserService queryParser;
private IndexService indexService;
@Before @Before
public void setup() throws IOException { public void setup() throws IOException {
@ -99,6 +101,7 @@ public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
assertNotNull(doc.dynamicMappingsUpdate()); assertNotNull(doc.dynamicMappingsUpdate());
client().admin().indices().preparePutMapping("test").setType("person").setSource(doc.dynamicMappingsUpdate().toString()).get(); client().admin().indices().preparePutMapping("test").setType("person").setSource(doc.dynamicMappingsUpdate().toString()).get();
this.indexService = indexService;
queryParser = indexService.queryParserService(); queryParser = indexService.queryParserService();
} }
@ -2269,6 +2272,23 @@ public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
assertThat(parsedQuery, instanceOf(BooleanQuery.class)); assertThat(parsedQuery, instanceOf(BooleanQuery.class));
} }
public void testCrossFieldMultiMatchQuery() throws IOException {
IndexQueryParserService queryParser = queryParser();
Query parsedQuery = queryParser.parse(multiMatchQuery("banon", "name.first^2", "name.last^3", "foobar").type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)).query();
try (Engine.Searcher searcher = indexService.shardSafe(0).acquireSearcher("test")) {
Query rewrittenQuery = searcher.searcher().rewrite(parsedQuery);
BooleanQuery expected = new BooleanQuery();
expected.add(new TermQuery(new Term("foobar", "banon")), Occur.SHOULD);
TermQuery tq1 = new TermQuery(new Term("name.first", "banon"));
tq1.setBoost(2);
TermQuery tq2 = new TermQuery(new Term("name.last", "banon"));
tq2.setBoost(3);
expected.add(new DisjunctionMaxQuery(Arrays.<Query>asList(tq1, tq2), 0f), Occur.SHOULD);
assertEquals(expected, rewrittenQuery);
}
}
@Test @Test
public void testSimpleQueryString() throws Exception { public void testSimpleQueryString() throws Exception {
IndexQueryParserService queryParser = queryParser(); IndexQueryParserService queryParser = queryParser();

View File

@ -302,10 +302,17 @@ document to match. (Compare this to
That solves one of the two problems. The problem of differing term frequencies That solves one of the two problems. The problem of differing term frequencies
is solved by _blending_ the term frequencies for all fields in order to even is solved by _blending_ the term frequencies for all fields in order to even
out the differences. In other words, `first_name:smith` will be treated as out the differences.
though it has the same weight as `last_name:smith`. (Actually,
`last_name:smith` is given a tiny advantage over `first_name:smith`, just to In practice, `first_name:smith` will be treated as though it has the same
make the order of results more stable.) frequencies as `last_name:smith`, plus one. This will make matches on
`first_name` and `last_name` have comparable scores, with a tiny advantage
for `last_name` since it is the most likely field that contains `smith`.
Note that `cross_fields` is usually only useful on short string fields
that all have a `boost` of `1`. Otherwise boosts, term freqs and length
normalization contribute to the score in such a way that the blending of term
statistics is not meaningful anymore.
If you run the above query through the <<search-validate>>, it returns this If you run the above query through the <<search-validate>>, it returns this
explanation: explanation: