Merge pull request #12294 from jpountz/fix/multi_match_boost
`multi_match` query applies boosts too many times.
This commit is contained in:
commit
00093a21dc
|
@ -33,6 +33,7 @@ import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||||
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -62,13 +63,17 @@ import java.util.List;
|
||||||
public abstract class BlendedTermQuery extends Query {
|
public abstract class BlendedTermQuery extends Query {
|
||||||
|
|
||||||
private final Term[] terms;
|
private final Term[] terms;
|
||||||
|
private final float[] boosts;
|
||||||
|
|
||||||
|
public BlendedTermQuery(Term[] terms, float[] boosts) {
|
||||||
public BlendedTermQuery(Term[] terms) {
|
|
||||||
if (terms == null || terms.length == 0) {
|
if (terms == null || terms.length == 0) {
|
||||||
throw new IllegalArgumentException("terms must not be null or empty");
|
throw new IllegalArgumentException("terms must not be null or empty");
|
||||||
}
|
}
|
||||||
|
if (boosts != null && boosts.length != terms.length) {
|
||||||
|
throw new IllegalArgumentException("boosts must have the same size as terms");
|
||||||
|
}
|
||||||
this.terms = terms;
|
this.terms = terms;
|
||||||
|
this.boosts = boosts;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -231,8 +236,22 @@ public abstract class BlendedTermQuery extends Query {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString(String field) {
|
public String toString(String field) {
|
||||||
return "blended(terms: " + Arrays.toString(terms) + ")";
|
StringBuilder builder = new StringBuilder("blended(terms:[");
|
||||||
|
for (int i = 0; i < terms.length; ++i) {
|
||||||
|
builder.append(terms[i]);
|
||||||
|
float boost = 1f;
|
||||||
|
if (boosts != null) {
|
||||||
|
boost = boosts[i];
|
||||||
|
}
|
||||||
|
builder.append(ToStringUtils.boost(boost));
|
||||||
|
builder.append(", ");
|
||||||
|
}
|
||||||
|
if (terms.length > 0) {
|
||||||
|
builder.setLength(builder.length() - 2);
|
||||||
|
}
|
||||||
|
builder.append("])");
|
||||||
|
builder.append(ToStringUtils.boost(getBoost()));
|
||||||
|
return builder.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
private volatile Term[] equalTerms = null;
|
private volatile Term[] equalTerms = null;
|
||||||
|
@ -277,7 +296,7 @@ public abstract class BlendedTermQuery extends Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static BlendedTermQuery booleanBlendedQuery(Term[] terms, final float[] boosts, final boolean disableCoord) {
|
public static BlendedTermQuery booleanBlendedQuery(Term[] terms, final float[] boosts, final boolean disableCoord) {
|
||||||
return new BlendedTermQuery(terms) {
|
return new BlendedTermQuery(terms, boosts) {
|
||||||
@Override
|
@Override
|
||||||
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
|
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
|
||||||
BooleanQuery query = new BooleanQuery(disableCoord);
|
BooleanQuery query = new BooleanQuery(disableCoord);
|
||||||
|
@ -294,7 +313,7 @@ public abstract class BlendedTermQuery extends Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static BlendedTermQuery commonTermsBlendedQuery(Term[] terms, final float[] boosts, final boolean disableCoord, final float maxTermFrequency) {
|
public static BlendedTermQuery commonTermsBlendedQuery(Term[] terms, final float[] boosts, final boolean disableCoord, final float maxTermFrequency) {
|
||||||
return new BlendedTermQuery(terms) {
|
return new BlendedTermQuery(terms, boosts) {
|
||||||
@Override
|
@Override
|
||||||
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
|
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
|
||||||
BooleanQuery query = new BooleanQuery(true);
|
BooleanQuery query = new BooleanQuery(true);
|
||||||
|
@ -334,7 +353,7 @@ public abstract class BlendedTermQuery extends Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static BlendedTermQuery dismaxBlendedQuery(Term[] terms, final float[] boosts, final float tieBreakerMultiplier) {
|
public static BlendedTermQuery dismaxBlendedQuery(Term[] terms, final float[] boosts, final float tieBreakerMultiplier) {
|
||||||
return new BlendedTermQuery(terms) {
|
return new BlendedTermQuery(terms, boosts) {
|
||||||
@Override
|
@Override
|
||||||
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
|
protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) {
|
||||||
DisjunctionMaxQuery query = new DisjunctionMaxQuery(tieBreakerMultiplier);
|
DisjunctionMaxQuery query = new DisjunctionMaxQuery(tieBreakerMultiplier);
|
||||||
|
|
|
@ -85,7 +85,7 @@ public class MultiMatchQuery extends MatchQuery {
|
||||||
throw new IllegalStateException("No such type: " + type);
|
throw new IllegalStateException("No such type: " + type);
|
||||||
}
|
}
|
||||||
final List<? extends Query> queries = queryBuilder.buildGroupedQueries(type, fieldNames, value, minimumShouldMatch);
|
final List<? extends Query> queries = queryBuilder.buildGroupedQueries(type, fieldNames, value, minimumShouldMatch);
|
||||||
return queryBuilder.conbineGrouped(queries);
|
return queryBuilder.combineGrouped(queries);
|
||||||
}
|
}
|
||||||
|
|
||||||
private QueryBuilder queryBuilder;
|
private QueryBuilder queryBuilder;
|
||||||
|
@ -119,7 +119,7 @@ public class MultiMatchQuery extends MatchQuery {
|
||||||
return parseAndApply(type, field, value, minimumShouldMatch, boostValue);
|
return parseAndApply(type, field, value, minimumShouldMatch, boostValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Query conbineGrouped(List<? extends Query> groupQuery) {
|
public Query combineGrouped(List<? extends Query> groupQuery) {
|
||||||
if (groupQuery == null || groupQuery.isEmpty()) {
|
if (groupQuery == null || groupQuery.isEmpty()) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -196,7 +196,7 @@ public class MultiMatchQuery extends MatchQuery {
|
||||||
blendedFields = null;
|
blendedFields = null;
|
||||||
}
|
}
|
||||||
final FieldAndFieldType fieldAndFieldType = group.get(0);
|
final FieldAndFieldType fieldAndFieldType = group.get(0);
|
||||||
Query q = parseGroup(type.matchQueryType(), fieldAndFieldType.field, fieldAndFieldType.boost, value, minimumShouldMatch);
|
Query q = parseGroup(type.matchQueryType(), fieldAndFieldType.field, 1f, value, minimumShouldMatch);
|
||||||
if (q != null) {
|
if (q != null) {
|
||||||
queries.add(q);
|
queries.add(q);
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,6 +54,7 @@ import org.elasticsearch.common.xcontent.XContentFactory;
|
||||||
import org.elasticsearch.common.xcontent.XContentHelper;
|
import org.elasticsearch.common.xcontent.XContentHelper;
|
||||||
import org.elasticsearch.common.xcontent.XContentParser;
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
import org.elasticsearch.index.IndexService;
|
import org.elasticsearch.index.IndexService;
|
||||||
|
import org.elasticsearch.index.engine.Engine;
|
||||||
import org.elasticsearch.index.mapper.MapperService;
|
import org.elasticsearch.index.mapper.MapperService;
|
||||||
import org.elasticsearch.index.mapper.ParsedDocument;
|
import org.elasticsearch.index.mapper.ParsedDocument;
|
||||||
import org.elasticsearch.index.mapper.core.NumberFieldMapper;
|
import org.elasticsearch.index.mapper.core.NumberFieldMapper;
|
||||||
|
@ -83,6 +84,7 @@ import static org.hamcrest.Matchers.*;
|
||||||
public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
|
public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
|
||||||
|
|
||||||
private IndexQueryParserService queryParser;
|
private IndexQueryParserService queryParser;
|
||||||
|
private IndexService indexService;
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setup() throws IOException {
|
public void setup() throws IOException {
|
||||||
|
@ -99,6 +101,7 @@ public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
|
||||||
assertNotNull(doc.dynamicMappingsUpdate());
|
assertNotNull(doc.dynamicMappingsUpdate());
|
||||||
client().admin().indices().preparePutMapping("test").setType("person").setSource(doc.dynamicMappingsUpdate().toString()).get();
|
client().admin().indices().preparePutMapping("test").setType("person").setSource(doc.dynamicMappingsUpdate().toString()).get();
|
||||||
|
|
||||||
|
this.indexService = indexService;
|
||||||
queryParser = indexService.queryParserService();
|
queryParser = indexService.queryParserService();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2269,6 +2272,23 @@ public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
|
||||||
assertThat(parsedQuery, instanceOf(BooleanQuery.class));
|
assertThat(parsedQuery, instanceOf(BooleanQuery.class));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testCrossFieldMultiMatchQuery() throws IOException {
|
||||||
|
IndexQueryParserService queryParser = queryParser();
|
||||||
|
Query parsedQuery = queryParser.parse(multiMatchQuery("banon", "name.first^2", "name.last^3", "foobar").type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)).query();
|
||||||
|
try (Engine.Searcher searcher = indexService.shardSafe(0).acquireSearcher("test")) {
|
||||||
|
Query rewrittenQuery = searcher.searcher().rewrite(parsedQuery);
|
||||||
|
|
||||||
|
BooleanQuery expected = new BooleanQuery();
|
||||||
|
expected.add(new TermQuery(new Term("foobar", "banon")), Occur.SHOULD);
|
||||||
|
TermQuery tq1 = new TermQuery(new Term("name.first", "banon"));
|
||||||
|
tq1.setBoost(2);
|
||||||
|
TermQuery tq2 = new TermQuery(new Term("name.last", "banon"));
|
||||||
|
tq2.setBoost(3);
|
||||||
|
expected.add(new DisjunctionMaxQuery(Arrays.<Query>asList(tq1, tq2), 0f), Occur.SHOULD);
|
||||||
|
assertEquals(expected, rewrittenQuery);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSimpleQueryString() throws Exception {
|
public void testSimpleQueryString() throws Exception {
|
||||||
IndexQueryParserService queryParser = queryParser();
|
IndexQueryParserService queryParser = queryParser();
|
||||||
|
|
|
@ -302,10 +302,17 @@ document to match. (Compare this to
|
||||||
|
|
||||||
That solves one of the two problems. The problem of differing term frequencies
|
That solves one of the two problems. The problem of differing term frequencies
|
||||||
is solved by _blending_ the term frequencies for all fields in order to even
|
is solved by _blending_ the term frequencies for all fields in order to even
|
||||||
out the differences. In other words, `first_name:smith` will be treated as
|
out the differences.
|
||||||
though it has the same weight as `last_name:smith`. (Actually,
|
|
||||||
`last_name:smith` is given a tiny advantage over `first_name:smith`, just to
|
In practice, `first_name:smith` will be treated as though it has the same
|
||||||
make the order of results more stable.)
|
frequencies as `last_name:smith`, plus one. This will make matches on
|
||||||
|
`first_name` and `last_name` have comparable scores, with a tiny advantage
|
||||||
|
for `last_name` since it is the most likely field that contains `smith`.
|
||||||
|
|
||||||
|
Note that `cross_fields` is usually only useful on short string fields
|
||||||
|
that all have a `boost` of `1`. Otherwise boosts, term freqs and length
|
||||||
|
normalization contribute to the score in such a way that the blending of term
|
||||||
|
statistics is not meaningful anymore.
|
||||||
|
|
||||||
If you run the above query through the <<search-validate>>, it returns this
|
If you run the above query through the <<search-validate>>, it returns this
|
||||||
explanation:
|
explanation:
|
||||||
|
|
Loading…
Reference in New Issue