Merge pull request #12294 from jpountz/fix/multi_match_boost
`multi_match` query applies boosts too many times.
This commit is contained in:
commit
00093a21dc
|
@ -33,6 +33,7 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
@ -62,13 +63,17 @@ import java.util.List;
|
|||
public abstract class BlendedTermQuery extends Query {
|
||||
|
||||
private final Term[] terms;
|
||||
private final float[] boosts;
|
||||
|
||||
|
||||
public BlendedTermQuery(Term[] terms) {
|
||||
public BlendedTermQuery(Term[] terms, float[] boosts) {
|
||||
if (terms == null || terms.length == 0) {
|
||||
throw new IllegalArgumentException("terms must not be null or empty");
|
||||
}
|
||||
if (boosts != null && boosts.length != terms.length) {
|
||||
throw new IllegalArgumentException("boosts must have the same size as terms");
|
||||
}
|
||||
this.terms = terms;
|
||||
this.boosts = boosts;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -231,8 +236,22 @@ public abstract class BlendedTermQuery extends Query {
|
|||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return "blended(terms: " + Arrays.toString(terms) + ")";
|
||||
|
||||
StringBuilder builder = new StringBuilder("blended(terms:[");
|
||||
for (int i = 0; i < terms.length; ++i) {
|
||||
builder.append(terms[i]);
|
||||
float boost = 1f;
|
||||
if (boosts != null) {
|
||||
boost = boosts[i];
|
||||
}
|
||||
builder.append(ToStringUtils.boost(boost));
|
||||
builder.append(", ");
|
||||
}
|
||||
if (terms.length > 0) {
|
||||
builder.setLength(builder.length() - 2);
|
||||
}
|
||||
builder.append("])");
|
||||
builder.append(ToStringUtils.boost(getBoost()));
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
private volatile Term[] equalTerms = null;
|
||||
|
@ -277,7 +296,7 @@ public abstract class BlendedTermQuery extends Query {
|
|||
}
|
||||
|
||||
public static BlendedTermQuery booleanBlendedQuery(Term[] terms, final float[] boosts, final boolean disableCoord) {
|
||||
return new BlendedTermQuery(terms) {
|
||||
return new BlendedTermQuery(terms, boosts) {
|
||||
@Override
|
||||
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
|
||||
BooleanQuery query = new BooleanQuery(disableCoord);
|
||||
|
@ -294,7 +313,7 @@ public abstract class BlendedTermQuery extends Query {
|
|||
}
|
||||
|
||||
public static BlendedTermQuery commonTermsBlendedQuery(Term[] terms, final float[] boosts, final boolean disableCoord, final float maxTermFrequency) {
|
||||
return new BlendedTermQuery(terms) {
|
||||
return new BlendedTermQuery(terms, boosts) {
|
||||
@Override
|
||||
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
|
||||
BooleanQuery query = new BooleanQuery(true);
|
||||
|
@ -334,7 +353,7 @@ public abstract class BlendedTermQuery extends Query {
|
|||
}
|
||||
|
||||
public static BlendedTermQuery dismaxBlendedQuery(Term[] terms, final float[] boosts, final float tieBreakerMultiplier) {
|
||||
return new BlendedTermQuery(terms) {
|
||||
return new BlendedTermQuery(terms, boosts) {
|
||||
@Override
|
||||
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
|
||||
DisjunctionMaxQuery query = new DisjunctionMaxQuery(tieBreakerMultiplier);
|
||||
|
|
|
@ -85,7 +85,7 @@ public class MultiMatchQuery extends MatchQuery {
|
|||
throw new IllegalStateException("No such type: " + type);
|
||||
}
|
||||
final List<? extends Query> queries = queryBuilder.buildGroupedQueries(type, fieldNames, value, minimumShouldMatch);
|
||||
return queryBuilder.conbineGrouped(queries);
|
||||
return queryBuilder.combineGrouped(queries);
|
||||
}
|
||||
|
||||
private QueryBuilder queryBuilder;
|
||||
|
@ -119,7 +119,7 @@ public class MultiMatchQuery extends MatchQuery {
|
|||
return parseAndApply(type, field, value, minimumShouldMatch, boostValue);
|
||||
}
|
||||
|
||||
public Query conbineGrouped(List<? extends Query> groupQuery) {
|
||||
public Query combineGrouped(List<? extends Query> groupQuery) {
|
||||
if (groupQuery == null || groupQuery.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
@ -196,7 +196,7 @@ public class MultiMatchQuery extends MatchQuery {
|
|||
blendedFields = null;
|
||||
}
|
||||
final FieldAndFieldType fieldAndFieldType = group.get(0);
|
||||
Query q = parseGroup(type.matchQueryType(), fieldAndFieldType.field, fieldAndFieldType.boost, value, minimumShouldMatch);
|
||||
Query q = parseGroup(type.matchQueryType(), fieldAndFieldType.field, 1f, value, minimumShouldMatch);
|
||||
if (q != null) {
|
||||
queries.add(q);
|
||||
}
|
||||
|
|
|
@ -54,6 +54,7 @@ import org.elasticsearch.common.xcontent.XContentFactory;
|
|||
import org.elasticsearch.common.xcontent.XContentHelper;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.index.IndexService;
|
||||
import org.elasticsearch.index.engine.Engine;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.mapper.ParsedDocument;
|
||||
import org.elasticsearch.index.mapper.core.NumberFieldMapper;
|
||||
|
@ -83,6 +84,7 @@ import static org.hamcrest.Matchers.*;
|
|||
public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
|
||||
|
||||
private IndexQueryParserService queryParser;
|
||||
private IndexService indexService;
|
||||
|
||||
@Before
|
||||
public void setup() throws IOException {
|
||||
|
@ -99,6 +101,7 @@ public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
|
|||
assertNotNull(doc.dynamicMappingsUpdate());
|
||||
client().admin().indices().preparePutMapping("test").setType("person").setSource(doc.dynamicMappingsUpdate().toString()).get();
|
||||
|
||||
this.indexService = indexService;
|
||||
queryParser = indexService.queryParserService();
|
||||
}
|
||||
|
||||
|
@ -2269,6 +2272,23 @@ public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
|
|||
assertThat(parsedQuery, instanceOf(BooleanQuery.class));
|
||||
}
|
||||
|
||||
public void testCrossFieldMultiMatchQuery() throws IOException {
|
||||
IndexQueryParserService queryParser = queryParser();
|
||||
Query parsedQuery = queryParser.parse(multiMatchQuery("banon", "name.first^2", "name.last^3", "foobar").type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)).query();
|
||||
try (Engine.Searcher searcher = indexService.shardSafe(0).acquireSearcher("test")) {
|
||||
Query rewrittenQuery = searcher.searcher().rewrite(parsedQuery);
|
||||
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
expected.add(new TermQuery(new Term("foobar", "banon")), Occur.SHOULD);
|
||||
TermQuery tq1 = new TermQuery(new Term("name.first", "banon"));
|
||||
tq1.setBoost(2);
|
||||
TermQuery tq2 = new TermQuery(new Term("name.last", "banon"));
|
||||
tq2.setBoost(3);
|
||||
expected.add(new DisjunctionMaxQuery(Arrays.<Query>asList(tq1, tq2), 0f), Occur.SHOULD);
|
||||
assertEquals(expected, rewrittenQuery);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleQueryString() throws Exception {
|
||||
IndexQueryParserService queryParser = queryParser();
|
||||
|
|
|
@ -302,10 +302,17 @@ document to match. (Compare this to
|
|||
|
||||
That solves one of the two problems. The problem of differing term frequencies
|
||||
is solved by _blending_ the term frequencies for all fields in order to even
|
||||
out the differences. In other words, `first_name:smith` will be treated as
|
||||
though it has the same weight as `last_name:smith`. (Actually,
|
||||
`last_name:smith` is given a tiny advantage over `first_name:smith`, just to
|
||||
make the order of results more stable.)
|
||||
out the differences.
|
||||
|
||||
In practice, `first_name:smith` will be treated as though it has the same
|
||||
frequencies as `last_name:smith`, plus one. This will make matches on
|
||||
`first_name` and `last_name` have comparable scores, with a tiny advantage
|
||||
for `last_name` since it is the most likely field that contains `smith`.
|
||||
|
||||
Note that `cross_fields` is usually only useful on short string fields
|
||||
that all have a `boost` of `1`. Otherwise boosts, term freqs and length
|
||||
normalization contribute to the score in such a way that the blending of term
|
||||
statistics is not meaningful anymore.
|
||||
|
||||
If you run the above query through the <<search-validate>>, it returns this
|
||||
explanation:
|
||||
|
|
Loading…
Reference in New Issue