diff --git a/core/src/main/java/org/apache/lucene/queries/BlendedTermQuery.java b/core/src/main/java/org/apache/lucene/queries/BlendedTermQuery.java index dd68ba16983..17485d024ae 100644 --- a/core/src/main/java/org/apache/lucene/queries/BlendedTermQuery.java +++ b/core/src/main/java/org/apache/lucene/queries/BlendedTermQuery.java @@ -33,6 +33,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.InPlaceMergeSorter; +import org.apache.lucene.util.ToStringUtils; import java.io.IOException; import java.util.Arrays; @@ -62,13 +63,17 @@ import java.util.List; public abstract class BlendedTermQuery extends Query { private final Term[] terms; + private final float[] boosts; - - public BlendedTermQuery(Term[] terms) { + public BlendedTermQuery(Term[] terms, float[] boosts) { if (terms == null || terms.length == 0) { throw new IllegalArgumentException("terms must not be null or empty"); } + if (boosts != null && boosts.length != terms.length) { + throw new IllegalArgumentException("boosts must have the same size as terms"); + } this.terms = terms; + this.boosts = boosts; } @Override @@ -231,8 +236,22 @@ public abstract class BlendedTermQuery extends Query { @Override public String toString(String field) { - return "blended(terms: " + Arrays.toString(terms) + ")"; - + StringBuilder builder = new StringBuilder("blended(terms:["); + for (int i = 0; i < terms.length; ++i) { + builder.append(terms[i]); + float boost = 1f; + if (boosts != null) { + boost = boosts[i]; + } + builder.append(ToStringUtils.boost(boost)); + builder.append(", "); + } + if (terms.length > 0) { + builder.setLength(builder.length() - 2); + } + builder.append("])"); + builder.append(ToStringUtils.boost(getBoost())); + return builder.toString(); } private volatile Term[] equalTerms = null; @@ -277,7 +296,7 @@ public abstract class BlendedTermQuery extends Query { } public static BlendedTermQuery booleanBlendedQuery(Term[] terms, final float[] boosts, final boolean disableCoord) { - return new BlendedTermQuery(terms) { + return new BlendedTermQuery(terms, boosts) { @Override protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) { BooleanQuery query = new BooleanQuery(disableCoord); @@ -294,7 +313,7 @@ public abstract class BlendedTermQuery extends Query { } public static BlendedTermQuery commonTermsBlendedQuery(Term[] terms, final float[] boosts, final boolean disableCoord, final float maxTermFrequency) { - return new BlendedTermQuery(terms) { + return new BlendedTermQuery(terms, boosts) { @Override protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) { BooleanQuery query = new BooleanQuery(true); @@ -334,7 +353,7 @@ public abstract class BlendedTermQuery extends Query { } public static BlendedTermQuery dismaxBlendedQuery(Term[] terms, final float[] boosts, final float tieBreakerMultiplier) { - return new BlendedTermQuery(terms) { + return new BlendedTermQuery(terms, boosts) { @Override protected Query topLevelQuery(Term[] terms, TermContext[] ctx, int[] docFreqs, int maxDoc) { DisjunctionMaxQuery query = new DisjunctionMaxQuery(tieBreakerMultiplier); diff --git a/core/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java b/core/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java index 878b0502972..621e7d0afca 100644 --- a/core/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java +++ b/core/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java @@ -85,7 +85,7 @@ public class MultiMatchQuery extends MatchQuery { throw new IllegalStateException("No such type: " + type); } final List queries = queryBuilder.buildGroupedQueries(type, fieldNames, value, minimumShouldMatch); - return queryBuilder.conbineGrouped(queries); + return queryBuilder.combineGrouped(queries); } private QueryBuilder queryBuilder; @@ -119,7 +119,7 @@ public class MultiMatchQuery extends MatchQuery { return parseAndApply(type, field, value, minimumShouldMatch, boostValue); } - public Query conbineGrouped(List groupQuery) { + public Query combineGrouped(List groupQuery) { if (groupQuery == null || groupQuery.isEmpty()) { return null; } @@ -196,7 +196,7 @@ public class MultiMatchQuery extends MatchQuery { blendedFields = null; } final FieldAndFieldType fieldAndFieldType = group.get(0); - Query q = parseGroup(type.matchQueryType(), fieldAndFieldType.field, fieldAndFieldType.boost, value, minimumShouldMatch); + Query q = parseGroup(type.matchQueryType(), fieldAndFieldType.field, 1f, value, minimumShouldMatch); if (q != null) { queries.add(q); } diff --git a/core/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java b/core/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java index b164c448b8a..5381c92a3f9 100644 --- a/core/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java @@ -54,6 +54,7 @@ import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.IndexService; +import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.core.NumberFieldMapper; @@ -83,6 +84,7 @@ import static org.hamcrest.Matchers.*; public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase { private IndexQueryParserService queryParser; + private IndexService indexService; @Before public void setup() throws IOException { @@ -99,6 +101,7 @@ public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase { assertNotNull(doc.dynamicMappingsUpdate()); client().admin().indices().preparePutMapping("test").setType("person").setSource(doc.dynamicMappingsUpdate().toString()).get(); + this.indexService = indexService; queryParser = indexService.queryParserService(); } @@ -2269,6 +2272,23 @@ public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase { assertThat(parsedQuery, instanceOf(BooleanQuery.class)); } + public void testCrossFieldMultiMatchQuery() throws IOException { + IndexQueryParserService queryParser = queryParser(); + Query parsedQuery = queryParser.parse(multiMatchQuery("banon", "name.first^2", "name.last^3", "foobar").type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)).query(); + try (Engine.Searcher searcher = indexService.shardSafe(0).acquireSearcher("test")) { + Query rewrittenQuery = searcher.searcher().rewrite(parsedQuery); + + BooleanQuery expected = new BooleanQuery(); + expected.add(new TermQuery(new Term("foobar", "banon")), Occur.SHOULD); + TermQuery tq1 = new TermQuery(new Term("name.first", "banon")); + tq1.setBoost(2); + TermQuery tq2 = new TermQuery(new Term("name.last", "banon")); + tq2.setBoost(3); + expected.add(new DisjunctionMaxQuery(Arrays.asList(tq1, tq2), 0f), Occur.SHOULD); + assertEquals(expected, rewrittenQuery); + } + } + @Test public void testSimpleQueryString() throws Exception { IndexQueryParserService queryParser = queryParser(); diff --git a/docs/reference/query-dsl/multi-match-query.asciidoc b/docs/reference/query-dsl/multi-match-query.asciidoc index ecfaad76d6d..fe1a5f6b974 100644 --- a/docs/reference/query-dsl/multi-match-query.asciidoc +++ b/docs/reference/query-dsl/multi-match-query.asciidoc @@ -302,10 +302,17 @@ document to match. (Compare this to That solves one of the two problems. The problem of differing term frequencies is solved by _blending_ the term frequencies for all fields in order to even -out the differences. In other words, `first_name:smith` will be treated as -though it has the same weight as `last_name:smith`. (Actually, -`last_name:smith` is given a tiny advantage over `first_name:smith`, just to -make the order of results more stable.) +out the differences. + +In practice, `first_name:smith` will be treated as though it has the same +frequencies as `last_name:smith`, plus one. This will make matches on +`first_name` and `last_name` have comparable scores, with a tiny advantage +for `last_name` since it is the most likely field that contains `smith`. + +Note that `cross_fields` is usually only useful on short string fields +that all have a `boost` of `1`. Otherwise boosts, term freqs and length +normalization contribute to the score in such a way that the blending of term +statistics is not meaningful anymore. If you run the above query through the <>, it returns this explanation: