From e43dd4687e4f7b33c60f23f1cd24b6b770b78adf Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 9 Aug 2012 00:59:44 +0200 Subject: [PATCH] - Added support for multi match query. --- .../index/query/MultiMatchQueryBuilder.java | 211 ++++++++++++++++++ .../index/query/MultiMatchQueryParser.java | 180 +++++++++++++++ .../index/query/QueryBuilders.java | 10 + .../index/search/MatchQuery.java | 20 +- .../index/search/MultiMatchQuery.java | 67 ++++++ .../indices/query/IndicesQueriesRegistry.java | 1 + .../search/query/SimpleQueryTests.java | 51 ++++- 7 files changed, 526 insertions(+), 14 deletions(-) create mode 100644 src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java create mode 100644 src/main/java/org/elasticsearch/index/query/MultiMatchQueryParser.java create mode 100644 src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java diff --git a/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java new file mode 100644 index 00000000000..39bdb072658 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java @@ -0,0 +1,211 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import org.elasticsearch.common.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Locale; + +/** + * Same as {@link MatchQueryBuilder} but supports multiple fields. + */ +public class MultiMatchQueryBuilder extends BaseQueryBuilder implements BoostableQueryBuilder { + + private final Object text; + + private final List fields; + + private MatchQueryBuilder.Type type; + + private MatchQueryBuilder.Operator operator; + + private String analyzer; + + private Float boost; + + private Integer slop; + + private String fuzziness; + + private Integer prefixLength; + + private Integer maxExpansions; + + private String minimumShouldMatch; + + private String rewrite = null; + + private String fuzzyRewrite = null; + + private Boolean useDisMax; + + private Integer tieBreaker; + + /** + * Constructs a new text query. + */ + public MultiMatchQueryBuilder(Object text, String... fields) { + this.fields = Arrays.asList(fields); + this.text = text; + } + + /** + * Sets the type of the text query. + */ + public MultiMatchQueryBuilder type(MatchQueryBuilder.Type type) { + this.type = type; + return this; + } + + /** + * Sets the operator to use when using a boolean query. Defaults to OR. + */ + public MultiMatchQueryBuilder operator(MatchQueryBuilder.Operator operator) { + this.operator = operator; + return this; + } + + /** + * Explicitly set the analyzer to use. Defaults to use explicit mapping config for the field, or, if not + * set, the default search analyzer. + */ + public MultiMatchQueryBuilder analyzer(String analyzer) { + this.analyzer = analyzer; + return this; + } + + /** + * Set the boost to apply to the query. + */ + public MultiMatchQueryBuilder boost(float boost) { + this.boost = boost; + return this; + } + + /** + * Set the phrase slop if evaluated to a phrase query type. + */ + public MultiMatchQueryBuilder slop(int slop) { + this.slop = slop; + return this; + } + + /** + * Sets the minimum similarity used when evaluated to a fuzzy query type. Defaults to "0.5". + */ + public MultiMatchQueryBuilder fuzziness(Object fuzziness) { + this.fuzziness = fuzziness.toString(); + return this; + } + + public MultiMatchQueryBuilder prefixLength(int prefixLength) { + this.prefixLength = prefixLength; + return this; + } + + /** + * When using fuzzy or prefix type query, the number of term expansions to use. Defaults to unbounded + * so its recommended to set it to a reasonable value for faster execution. + */ + public MultiMatchQueryBuilder maxExpansions(int maxExpansions) { + this.maxExpansions = maxExpansions; + return this; + } + + public MultiMatchQueryBuilder minimumShouldMatch(String minimumShouldMatch) { + this.minimumShouldMatch = minimumShouldMatch; + return this; + } + + public MultiMatchQueryBuilder rewrite(String rewrite) { + this.rewrite = rewrite; + return this; + } + + public MultiMatchQueryBuilder fuzzyRewrite(String fuzzyRewrite) { + this.fuzzyRewrite = fuzzyRewrite; + return this; + } + + public MultiMatchQueryBuilder useDisMax(Boolean useDisMax) { + this.useDisMax = useDisMax; + return this; + } + + public MultiMatchQueryBuilder setTieBreaker(Integer tieBreaker) { + this.tieBreaker = tieBreaker; + return this; + } + + @Override + public void doXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(MultiMatchQueryParser.NAME); + + builder.field("query", text); + builder.field("fields", fields); + + if (type != null) { + builder.field("type", type.toString().toLowerCase(Locale.ENGLISH)); + } + if (operator != null) { + builder.field("operator", operator.toString()); + } + if (analyzer != null) { + builder.field("analyzer", analyzer); + } + if (boost != null) { + builder.field("boost", boost); + } + if (slop != null) { + builder.field("slop", slop); + } + if (fuzziness != null) { + builder.field("fuzziness", fuzziness); + } + if (prefixLength != null) { + builder.field("prefix_length", prefixLength); + } + if (maxExpansions != null) { + builder.field("max_expansions", maxExpansions); + } + if (minimumShouldMatch != null) { + builder.field("minimum_should_match", minimumShouldMatch); + } + if (rewrite != null) { + builder.field("rewrite", rewrite); + } + if (fuzzyRewrite != null) { + builder.field("fuzzy_rewrite", fuzzyRewrite); + } + + if (useDisMax != null) { + builder.field("use_dis_max", useDisMax); + } + + if (tieBreaker != null) { + builder.field("tie_breaker", tieBreaker); + } + + builder.endObject(); + } +} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/query/MultiMatchQueryParser.java b/src/main/java/org/elasticsearch/index/query/MultiMatchQueryParser.java new file mode 100644 index 00000000000..7d620f90b97 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/query/MultiMatchQueryParser.java @@ -0,0 +1,180 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.lucene.search.Queries; +import org.elasticsearch.common.regex.Regex; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.query.support.QueryParsers; +import org.elasticsearch.index.search.MatchQuery; +import org.elasticsearch.index.search.MultiMatchQuery; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +/** + * Same ad {@link MatchQueryParser} but has support for multiple fields. + */ +public class MultiMatchQueryParser implements QueryParser { + + public static final String NAME = "multi_match"; + + @Inject + public MultiMatchQueryParser() { + } + + @Override + public String[] names() { + return new String[]{ + NAME, "multiMatch" + }; + } + + @Override + public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException { + XContentParser parser = parseContext.parser(); + + String text = null; + float boost = 1.0f; + MatchQuery.Type type = MatchQuery.Type.BOOLEAN; + MultiMatchQuery multiMatchQuery = new MultiMatchQuery(parseContext); + String minimumShouldMatch = null; + List fieldNames = Lists.newArrayList(); + Map fieldNameToBoost = Maps.newHashMap(); + + XContentParser.Token token; + String currentFieldName = null; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token == XContentParser.Token.START_ARRAY) { + if ("fields".equals(currentFieldName)) { + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + String fField = null; + float fBoost = -1; + char[] fieldText = parser.textCharacters(); + int end = parser.textOffset() + parser.textLength(); + for (int i = parser.textOffset(); i < end; i++) { + if (fieldText[i] == '^') { + int relativeLocation = i - parser.textOffset(); + fField = new String(fieldText, parser.textOffset(), relativeLocation); + fBoost = Float.parseFloat(new String(fieldText, i + 1, parser.textLength() - relativeLocation - 1)); + break; + } + } + if (fField == null) { + fField = parser.text(); + } + + if (Regex.isSimpleMatchPattern(fField)) { + for (String field : parseContext.mapperService().simpleMatchToIndexNames(fField)) { + fieldNames.add(field); + if (fBoost != -1) { + fieldNameToBoost.put(field, fBoost); + } + } + } else { + fieldNames.add(fField); + if (fBoost != -1) { + fieldNameToBoost.put(fField, fBoost); + } + } + } + } else { + throw new QueryParsingException(parseContext.index(), "[query_string] query does not support [" + currentFieldName + "]"); + } + } else if (token.isValue()) { + if ("query".equals(currentFieldName)) { + text = parser.text(); + } else if ("type".equals(currentFieldName)) { + String tStr = parser.text(); + if ("boolean".equals(tStr)) { + type = MatchQuery.Type.BOOLEAN; + } else if ("phrase".equals(tStr)) { + type = MatchQuery.Type.PHRASE; + } else if ("phrase_prefix".equals(tStr) || "phrasePrefix".equals(currentFieldName)) { + type = MatchQuery.Type.PHRASE_PREFIX; + } + } else if ("analyzer".equals(currentFieldName)) { + String analyzer = parser.text(); + if (parseContext.analysisService().analyzer(analyzer) == null) { + throw new QueryParsingException(parseContext.index(), "[match] analyzer [" + parser.text() + "] not found"); + } + multiMatchQuery.setAnalyzer(analyzer); + } else if ("boost".equals(currentFieldName)) { + boost = parser.floatValue(); + } else if ("slop".equals(currentFieldName) || "phrase_slop".equals(currentFieldName) || "phraseSlop".equals(currentFieldName)) { + multiMatchQuery.setPhraseSlop(parser.intValue()); + } else if ("fuzziness".equals(currentFieldName)) { + multiMatchQuery.setFuzziness(parser.textOrNull()); + } else if ("prefix_length".equals(currentFieldName) || "prefixLength".equals(currentFieldName)) { + multiMatchQuery.setFuzzyPrefixLength(parser.intValue()); + } else if ("max_expansions".equals(currentFieldName) || "maxExpansions".equals(currentFieldName)) { + multiMatchQuery.setMaxExpansions(parser.intValue()); + } else if ("operator".equals(currentFieldName)) { + String op = parser.text(); + if ("or".equalsIgnoreCase(op)) { + multiMatchQuery.setOccur(BooleanClause.Occur.SHOULD); + } else if ("and".equalsIgnoreCase(op)) { + multiMatchQuery.setOccur(BooleanClause.Occur.MUST); + } else { + throw new QueryParsingException(parseContext.index(), "text query requires operator to be either 'and' or 'or', not [" + op + "]"); + } + } else if ("minimum_should_match".equals(currentFieldName) || "minimumShouldMatch".equals(currentFieldName)) { + minimumShouldMatch = parser.textOrNull(); + } else if ("rewrite".equals(currentFieldName)) { + multiMatchQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null)); + } else if ("fuzzy_rewrite".equals(currentFieldName) || "fuzzyRewrite".equals(currentFieldName)) { + multiMatchQuery.setFuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null)); + } else if ("use_dis_max".equals(currentFieldName) || "useDisMax".equals(currentFieldName)) { + multiMatchQuery.setUseDisMax(parser.booleanValue()); + } else if ("tie_breaker".equals(currentFieldName) || "tieBreaker".equals(currentFieldName)) { + multiMatchQuery.setTieBreaker(parser.intValue()); + } else { + throw new QueryParsingException(parseContext.index(), "[match] query does not support [" + currentFieldName + "]"); + } + } + } + + if (text == null) { + throw new QueryParsingException(parseContext.index(), "No text specified for match_all query"); + } + + if (fieldNames.isEmpty()) { + throw new QueryParsingException(parseContext.index(), "No fields specified for match_all query"); + } + + Query query = multiMatchQuery.parse(type, fieldNames, text); + + if (query instanceof BooleanQuery) { + Queries.applyMinimumShouldMatch((BooleanQuery) query, minimumShouldMatch); + } + + query.setBoost(boost); + return query; + } +} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/query/QueryBuilders.java b/src/main/java/org/elasticsearch/index/query/QueryBuilders.java index fd70a99f944..98167f18441 100644 --- a/src/main/java/org/elasticsearch/index/query/QueryBuilders.java +++ b/src/main/java/org/elasticsearch/index/query/QueryBuilders.java @@ -65,6 +65,16 @@ public abstract class QueryBuilders { return new MatchQueryBuilder(name, text).type(MatchQueryBuilder.Type.BOOLEAN); } + /** + * Creates a match query with type "BOOLEAN" for the provided field name and text. + * + * @param fieldNames The field names. + * @param text The query text (to be analyzed). + */ + public static MultiMatchQueryBuilder multiMatchQuery(Object text, String... fieldNames) { + return new MultiMatchQueryBuilder(text, fieldNames); // BOOLEAN is the default + } + /** * Creates a text query with type "PHRASE" for the provided field name and text. * diff --git a/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/src/main/java/org/elasticsearch/index/search/MatchQuery.java index 15040a28de4..0249fe6c653 100644 --- a/src/main/java/org/elasticsearch/index/search/MatchQuery.java +++ b/src/main/java/org/elasticsearch/index/search/MatchQuery.java @@ -51,22 +51,22 @@ public class MatchQuery { PHRASE_PREFIX } - private final QueryParseContext parseContext; + protected final QueryParseContext parseContext; - private String analyzer; + protected String analyzer; - private BooleanClause.Occur occur = BooleanClause.Occur.SHOULD; + protected BooleanClause.Occur occur = BooleanClause.Occur.SHOULD; - private boolean enablePositionIncrements = true; + protected boolean enablePositionIncrements = true; - private int phraseSlop = 0; + protected int phraseSlop = 0; - private String fuzziness = null; - private int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; - private int maxExpansions = FuzzyQuery.defaultMaxExpansions; + protected String fuzziness = null; + protected int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; + protected int maxExpansions = FuzzyQuery.defaultMaxExpansions; - private MultiTermQuery.RewriteMethod rewriteMethod; - private MultiTermQuery.RewriteMethod fuzzyRewriteMethod; + protected MultiTermQuery.RewriteMethod rewriteMethod; + protected MultiTermQuery.RewriteMethod fuzzyRewriteMethod; public MatchQuery(QueryParseContext parseContext) { this.parseContext = parseContext; diff --git a/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java b/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java new file mode 100644 index 00000000000..328c3543232 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java @@ -0,0 +1,67 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.search; + +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.DisjunctionMaxQuery; +import org.apache.lucene.search.Query; +import org.elasticsearch.index.query.QueryParseContext; + +import java.util.List; + +public class MultiMatchQuery extends MatchQuery { + + private boolean useDisMax = true; + private int tieBreaker; + + public void setUseDisMax(boolean useDisMax) { + this.useDisMax = useDisMax; + } + + public void setTieBreaker(int tieBreaker) { + this.tieBreaker = tieBreaker; + } + + public MultiMatchQuery(QueryParseContext parseContext) { + super(parseContext); + } + + public Query parse(Type type, List fieldNames, String text) { + if (fieldNames.size() == 1) { + return parse(type, fieldNames.get(0), text); + } + + if (useDisMax) { + DisjunctionMaxQuery disMaxQuery = new DisjunctionMaxQuery(tieBreaker); + for (String fieldName : fieldNames) { + disMaxQuery.add(parse(type, fieldName, text)); + } + return disMaxQuery; + } else { + BooleanQuery booleanQuery = new BooleanQuery(); + for (String fieldName : fieldNames) { + booleanQuery.add(parse(type, fieldName, text), BooleanClause.Occur.SHOULD); + } + return booleanQuery; + } + } + +} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/indices/query/IndicesQueriesRegistry.java b/src/main/java/org/elasticsearch/indices/query/IndicesQueriesRegistry.java index 99f8dc68825..85766ea3044 100644 --- a/src/main/java/org/elasticsearch/indices/query/IndicesQueriesRegistry.java +++ b/src/main/java/org/elasticsearch/indices/query/IndicesQueriesRegistry.java @@ -41,6 +41,7 @@ public class IndicesQueriesRegistry { public IndicesQueriesRegistry(Settings settings, @Nullable ClusterService clusterService) { Map queryParsers = Maps.newHashMap(); addQueryParser(queryParsers, new MatchQueryParser()); + addQueryParser(queryParsers, new MultiMatchQueryParser()); addQueryParser(queryParsers, new NestedQueryParser()); addQueryParser(queryParsers, new HasChildQueryParser()); addQueryParser(queryParsers, new TopChildrenQueryParser()); diff --git a/src/test/java/org/elasticsearch/test/integration/search/query/SimpleQueryTests.java b/src/test/java/org/elasticsearch/test/integration/search/query/SimpleQueryTests.java index 68383d89ad2..84983114e42 100644 --- a/src/test/java/org/elasticsearch/test/integration/search/query/SimpleQueryTests.java +++ b/src/test/java/org/elasticsearch/test/integration/search/query/SimpleQueryTests.java @@ -22,10 +22,8 @@ package org.elasticsearch.test.integration.search.query; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.Client; import org.elasticsearch.common.settings.ImmutableSettings; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.TermQueryBuilder; -import org.elasticsearch.index.query.WrapperFilterBuilder; -import org.elasticsearch.index.query.WrapperQueryBuilder; +import org.elasticsearch.index.query.*; +import org.elasticsearch.search.facet.FacetBuilders; import org.elasticsearch.test.integration.AbstractNodesTests; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; @@ -349,4 +347,49 @@ public class SimpleQueryTests extends AbstractNodesTests { assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0)); assertThat(searchResponse.hits().totalHits(), equalTo(1l)); } + + @Test + public void testMultiMatchQuery() throws Exception { + try { + client.admin().indices().prepareDelete("test").execute().actionGet(); + } catch (Exception e) { + // ignore + } + + client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("number_of_shards", 1)).execute().actionGet(); + + client.prepareIndex("test", "type1", "1").setSource("field1", "value1", "field2", "value4").execute().actionGet(); + client.prepareIndex("test", "type1", "2").setSource("field1", "value2", "field2", "value5").execute().actionGet(); + client.prepareIndex("test", "type1", "3").setSource("field1", "value3", "field2", "value6").execute().actionGet(); + client.admin().indices().prepareRefresh("test").execute().actionGet(); + + MultiMatchQueryBuilder builder = QueryBuilders.multiMatchQuery("value1 value2 value4", "field1", "field2"); + SearchResponse searchResponse = client.prepareSearch() + .setQuery(builder) + .addFacet(FacetBuilders.termsFacet("field1").field("field1")) + .execute().actionGet(); + + assertThat(searchResponse.hits().totalHits(), equalTo(2l)); + assertThat("1", equalTo(searchResponse.hits().getAt(0).id())); + assertThat("2", equalTo(searchResponse.hits().getAt(1).id())); + + builder.useDisMax(false); + searchResponse = client.prepareSearch() + .setQuery(builder) + .execute().actionGet(); + + assertThat(searchResponse.hits().totalHits(), equalTo(2l)); + assertThat("1", equalTo(searchResponse.hits().getAt(0).id())); + assertThat("2", equalTo(searchResponse.hits().getAt(1).id())); + + client.admin().indices().prepareRefresh("test").execute().actionGet(); + builder = QueryBuilders.multiMatchQuery("value1", "field1", "field2") + .operator(MatchQueryBuilder.Operator.AND); // Operator only applies on terms inside a field! Fields are always OR-ed together. + searchResponse = client.prepareSearch() + .setQuery(builder) + .execute().actionGet(); + assertThat(searchResponse.hits().totalHits(), equalTo(1l)); + assertThat("1", equalTo(searchResponse.hits().getAt(0).id())); + } + }