- Added support for multi match query.

This commit is contained in:
Martijn van Groningen 2012-08-09 00:59:44 +02:00 committed by Shay Banon
parent 53d29e5d8d
commit e43dd4687e
7 changed files with 526 additions and 14 deletions

View File

@ -0,0 +1,211 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.query;
import org.elasticsearch.common.xcontent.XContentBuilder;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
/**
* Same as {@link MatchQueryBuilder} but supports multiple fields.
*/
public class MultiMatchQueryBuilder extends BaseQueryBuilder implements BoostableQueryBuilder<MultiMatchQueryBuilder> {
private final Object text;
private final List<String> fields;
private MatchQueryBuilder.Type type;
private MatchQueryBuilder.Operator operator;
private String analyzer;
private Float boost;
private Integer slop;
private String fuzziness;
private Integer prefixLength;
private Integer maxExpansions;
private String minimumShouldMatch;
private String rewrite = null;
private String fuzzyRewrite = null;
private Boolean useDisMax;
private Integer tieBreaker;
/**
* Constructs a new text query.
*/
public MultiMatchQueryBuilder(Object text, String... fields) {
this.fields = Arrays.asList(fields);
this.text = text;
}
/**
* Sets the type of the text query.
*/
public MultiMatchQueryBuilder type(MatchQueryBuilder.Type type) {
this.type = type;
return this;
}
/**
* Sets the operator to use when using a boolean query. Defaults to <tt>OR</tt>.
*/
public MultiMatchQueryBuilder operator(MatchQueryBuilder.Operator operator) {
this.operator = operator;
return this;
}
/**
* Explicitly set the analyzer to use. Defaults to use explicit mapping config for the field, or, if not
* set, the default search analyzer.
*/
public MultiMatchQueryBuilder analyzer(String analyzer) {
this.analyzer = analyzer;
return this;
}
/**
* Set the boost to apply to the query.
*/
public MultiMatchQueryBuilder boost(float boost) {
this.boost = boost;
return this;
}
/**
* Set the phrase slop if evaluated to a phrase query type.
*/
public MultiMatchQueryBuilder slop(int slop) {
this.slop = slop;
return this;
}
/**
* Sets the minimum similarity used when evaluated to a fuzzy query type. Defaults to "0.5".
*/
public MultiMatchQueryBuilder fuzziness(Object fuzziness) {
this.fuzziness = fuzziness.toString();
return this;
}
public MultiMatchQueryBuilder prefixLength(int prefixLength) {
this.prefixLength = prefixLength;
return this;
}
/**
* When using fuzzy or prefix type query, the number of term expansions to use. Defaults to unbounded
* so its recommended to set it to a reasonable value for faster execution.
*/
public MultiMatchQueryBuilder maxExpansions(int maxExpansions) {
this.maxExpansions = maxExpansions;
return this;
}
public MultiMatchQueryBuilder minimumShouldMatch(String minimumShouldMatch) {
this.minimumShouldMatch = minimumShouldMatch;
return this;
}
public MultiMatchQueryBuilder rewrite(String rewrite) {
this.rewrite = rewrite;
return this;
}
public MultiMatchQueryBuilder fuzzyRewrite(String fuzzyRewrite) {
this.fuzzyRewrite = fuzzyRewrite;
return this;
}
public MultiMatchQueryBuilder useDisMax(Boolean useDisMax) {
this.useDisMax = useDisMax;
return this;
}
public MultiMatchQueryBuilder setTieBreaker(Integer tieBreaker) {
this.tieBreaker = tieBreaker;
return this;
}
@Override
public void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(MultiMatchQueryParser.NAME);
builder.field("query", text);
builder.field("fields", fields);
if (type != null) {
builder.field("type", type.toString().toLowerCase(Locale.ENGLISH));
}
if (operator != null) {
builder.field("operator", operator.toString());
}
if (analyzer != null) {
builder.field("analyzer", analyzer);
}
if (boost != null) {
builder.field("boost", boost);
}
if (slop != null) {
builder.field("slop", slop);
}
if (fuzziness != null) {
builder.field("fuzziness", fuzziness);
}
if (prefixLength != null) {
builder.field("prefix_length", prefixLength);
}
if (maxExpansions != null) {
builder.field("max_expansions", maxExpansions);
}
if (minimumShouldMatch != null) {
builder.field("minimum_should_match", minimumShouldMatch);
}
if (rewrite != null) {
builder.field("rewrite", rewrite);
}
if (fuzzyRewrite != null) {
builder.field("fuzzy_rewrite", fuzzyRewrite);
}
if (useDisMax != null) {
builder.field("use_dis_max", useDisMax);
}
if (tieBreaker != null) {
builder.field("tie_breaker", tieBreaker);
}
builder.endObject();
}
}

View File

@ -0,0 +1,180 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.query;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.query.support.QueryParsers;
import org.elasticsearch.index.search.MatchQuery;
import org.elasticsearch.index.search.MultiMatchQuery;
import java.io.IOException;
import java.util.List;
import java.util.Map;
/**
* Same ad {@link MatchQueryParser} but has support for multiple fields.
*/
public class MultiMatchQueryParser implements QueryParser {
public static final String NAME = "multi_match";
@Inject
public MultiMatchQueryParser() {
}
@Override
public String[] names() {
return new String[]{
NAME, "multiMatch"
};
}
@Override
public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException {
XContentParser parser = parseContext.parser();
String text = null;
float boost = 1.0f;
MatchQuery.Type type = MatchQuery.Type.BOOLEAN;
MultiMatchQuery multiMatchQuery = new MultiMatchQuery(parseContext);
String minimumShouldMatch = null;
List<String> fieldNames = Lists.newArrayList();
Map<String, Float> fieldNameToBoost = Maps.newHashMap();
XContentParser.Token token;
String currentFieldName = null;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token == XContentParser.Token.START_ARRAY) {
if ("fields".equals(currentFieldName)) {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
String fField = null;
float fBoost = -1;
char[] fieldText = parser.textCharacters();
int end = parser.textOffset() + parser.textLength();
for (int i = parser.textOffset(); i < end; i++) {
if (fieldText[i] == '^') {
int relativeLocation = i - parser.textOffset();
fField = new String(fieldText, parser.textOffset(), relativeLocation);
fBoost = Float.parseFloat(new String(fieldText, i + 1, parser.textLength() - relativeLocation - 1));
break;
}
}
if (fField == null) {
fField = parser.text();
}
if (Regex.isSimpleMatchPattern(fField)) {
for (String field : parseContext.mapperService().simpleMatchToIndexNames(fField)) {
fieldNames.add(field);
if (fBoost != -1) {
fieldNameToBoost.put(field, fBoost);
}
}
} else {
fieldNames.add(fField);
if (fBoost != -1) {
fieldNameToBoost.put(fField, fBoost);
}
}
}
} else {
throw new QueryParsingException(parseContext.index(), "[query_string] query does not support [" + currentFieldName + "]");
}
} else if (token.isValue()) {
if ("query".equals(currentFieldName)) {
text = parser.text();
} else if ("type".equals(currentFieldName)) {
String tStr = parser.text();
if ("boolean".equals(tStr)) {
type = MatchQuery.Type.BOOLEAN;
} else if ("phrase".equals(tStr)) {
type = MatchQuery.Type.PHRASE;
} else if ("phrase_prefix".equals(tStr) || "phrasePrefix".equals(currentFieldName)) {
type = MatchQuery.Type.PHRASE_PREFIX;
}
} else if ("analyzer".equals(currentFieldName)) {
String analyzer = parser.text();
if (parseContext.analysisService().analyzer(analyzer) == null) {
throw new QueryParsingException(parseContext.index(), "[match] analyzer [" + parser.text() + "] not found");
}
multiMatchQuery.setAnalyzer(analyzer);
} else if ("boost".equals(currentFieldName)) {
boost = parser.floatValue();
} else if ("slop".equals(currentFieldName) || "phrase_slop".equals(currentFieldName) || "phraseSlop".equals(currentFieldName)) {
multiMatchQuery.setPhraseSlop(parser.intValue());
} else if ("fuzziness".equals(currentFieldName)) {
multiMatchQuery.setFuzziness(parser.textOrNull());
} else if ("prefix_length".equals(currentFieldName) || "prefixLength".equals(currentFieldName)) {
multiMatchQuery.setFuzzyPrefixLength(parser.intValue());
} else if ("max_expansions".equals(currentFieldName) || "maxExpansions".equals(currentFieldName)) {
multiMatchQuery.setMaxExpansions(parser.intValue());
} else if ("operator".equals(currentFieldName)) {
String op = parser.text();
if ("or".equalsIgnoreCase(op)) {
multiMatchQuery.setOccur(BooleanClause.Occur.SHOULD);
} else if ("and".equalsIgnoreCase(op)) {
multiMatchQuery.setOccur(BooleanClause.Occur.MUST);
} else {
throw new QueryParsingException(parseContext.index(), "text query requires operator to be either 'and' or 'or', not [" + op + "]");
}
} else if ("minimum_should_match".equals(currentFieldName) || "minimumShouldMatch".equals(currentFieldName)) {
minimumShouldMatch = parser.textOrNull();
} else if ("rewrite".equals(currentFieldName)) {
multiMatchQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null));
} else if ("fuzzy_rewrite".equals(currentFieldName) || "fuzzyRewrite".equals(currentFieldName)) {
multiMatchQuery.setFuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null));
} else if ("use_dis_max".equals(currentFieldName) || "useDisMax".equals(currentFieldName)) {
multiMatchQuery.setUseDisMax(parser.booleanValue());
} else if ("tie_breaker".equals(currentFieldName) || "tieBreaker".equals(currentFieldName)) {
multiMatchQuery.setTieBreaker(parser.intValue());
} else {
throw new QueryParsingException(parseContext.index(), "[match] query does not support [" + currentFieldName + "]");
}
}
}
if (text == null) {
throw new QueryParsingException(parseContext.index(), "No text specified for match_all query");
}
if (fieldNames.isEmpty()) {
throw new QueryParsingException(parseContext.index(), "No fields specified for match_all query");
}
Query query = multiMatchQuery.parse(type, fieldNames, text);
if (query instanceof BooleanQuery) {
Queries.applyMinimumShouldMatch((BooleanQuery) query, minimumShouldMatch);
}
query.setBoost(boost);
return query;
}
}

View File

@ -65,6 +65,16 @@ public abstract class QueryBuilders {
return new MatchQueryBuilder(name, text).type(MatchQueryBuilder.Type.BOOLEAN);
}
/**
* Creates a match query with type "BOOLEAN" for the provided field name and text.
*
* @param fieldNames The field names.
* @param text The query text (to be analyzed).
*/
public static MultiMatchQueryBuilder multiMatchQuery(Object text, String... fieldNames) {
return new MultiMatchQueryBuilder(text, fieldNames); // BOOLEAN is the default
}
/**
* Creates a text query with type "PHRASE" for the provided field name and text.
*

View File

@ -51,22 +51,22 @@ public class MatchQuery {
PHRASE_PREFIX
}
private final QueryParseContext parseContext;
protected final QueryParseContext parseContext;
private String analyzer;
protected String analyzer;
private BooleanClause.Occur occur = BooleanClause.Occur.SHOULD;
protected BooleanClause.Occur occur = BooleanClause.Occur.SHOULD;
private boolean enablePositionIncrements = true;
protected boolean enablePositionIncrements = true;
private int phraseSlop = 0;
protected int phraseSlop = 0;
private String fuzziness = null;
private int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
private int maxExpansions = FuzzyQuery.defaultMaxExpansions;
protected String fuzziness = null;
protected int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
protected int maxExpansions = FuzzyQuery.defaultMaxExpansions;
private MultiTermQuery.RewriteMethod rewriteMethod;
private MultiTermQuery.RewriteMethod fuzzyRewriteMethod;
protected MultiTermQuery.RewriteMethod rewriteMethod;
protected MultiTermQuery.RewriteMethod fuzzyRewriteMethod;
public MatchQuery(QueryParseContext parseContext) {
this.parseContext = parseContext;

View File

@ -0,0 +1,67 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.search;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.index.query.QueryParseContext;
import java.util.List;
public class MultiMatchQuery extends MatchQuery {
private boolean useDisMax = true;
private int tieBreaker;
public void setUseDisMax(boolean useDisMax) {
this.useDisMax = useDisMax;
}
public void setTieBreaker(int tieBreaker) {
this.tieBreaker = tieBreaker;
}
public MultiMatchQuery(QueryParseContext parseContext) {
super(parseContext);
}
public Query parse(Type type, List<String> fieldNames, String text) {
if (fieldNames.size() == 1) {
return parse(type, fieldNames.get(0), text);
}
if (useDisMax) {
DisjunctionMaxQuery disMaxQuery = new DisjunctionMaxQuery(tieBreaker);
for (String fieldName : fieldNames) {
disMaxQuery.add(parse(type, fieldName, text));
}
return disMaxQuery;
} else {
BooleanQuery booleanQuery = new BooleanQuery();
for (String fieldName : fieldNames) {
booleanQuery.add(parse(type, fieldName, text), BooleanClause.Occur.SHOULD);
}
return booleanQuery;
}
}
}

View File

@ -41,6 +41,7 @@ public class IndicesQueriesRegistry {
public IndicesQueriesRegistry(Settings settings, @Nullable ClusterService clusterService) {
Map<String, QueryParser> queryParsers = Maps.newHashMap();
addQueryParser(queryParsers, new MatchQueryParser());
addQueryParser(queryParsers, new MultiMatchQueryParser());
addQueryParser(queryParsers, new NestedQueryParser());
addQueryParser(queryParsers, new HasChildQueryParser());
addQueryParser(queryParsers, new TopChildrenQueryParser());

View File

@ -22,10 +22,8 @@ package org.elasticsearch.test.integration.search.query;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.index.query.WrapperFilterBuilder;
import org.elasticsearch.index.query.WrapperQueryBuilder;
import org.elasticsearch.index.query.*;
import org.elasticsearch.search.facet.FacetBuilders;
import org.elasticsearch.test.integration.AbstractNodesTests;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
@ -349,4 +347,49 @@ public class SimpleQueryTests extends AbstractNodesTests {
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
}
@Test
public void testMultiMatchQuery() throws Exception {
try {
client.admin().indices().prepareDelete("test").execute().actionGet();
} catch (Exception e) {
// ignore
}
client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("number_of_shards", 1)).execute().actionGet();
client.prepareIndex("test", "type1", "1").setSource("field1", "value1", "field2", "value4").execute().actionGet();
client.prepareIndex("test", "type1", "2").setSource("field1", "value2", "field2", "value5").execute().actionGet();
client.prepareIndex("test", "type1", "3").setSource("field1", "value3", "field2", "value6").execute().actionGet();
client.admin().indices().prepareRefresh("test").execute().actionGet();
MultiMatchQueryBuilder builder = QueryBuilders.multiMatchQuery("value1 value2 value4", "field1", "field2");
SearchResponse searchResponse = client.prepareSearch()
.setQuery(builder)
.addFacet(FacetBuilders.termsFacet("field1").field("field1"))
.execute().actionGet();
assertThat(searchResponse.hits().totalHits(), equalTo(2l));
assertThat("1", equalTo(searchResponse.hits().getAt(0).id()));
assertThat("2", equalTo(searchResponse.hits().getAt(1).id()));
builder.useDisMax(false);
searchResponse = client.prepareSearch()
.setQuery(builder)
.execute().actionGet();
assertThat(searchResponse.hits().totalHits(), equalTo(2l));
assertThat("1", equalTo(searchResponse.hits().getAt(0).id()));
assertThat("2", equalTo(searchResponse.hits().getAt(1).id()));
client.admin().indices().prepareRefresh("test").execute().actionGet();
builder = QueryBuilders.multiMatchQuery("value1", "field1", "field2")
.operator(MatchQueryBuilder.Operator.AND); // Operator only applies on terms inside a field! Fields are always OR-ed together.
searchResponse = client.prepareSearch()
.setQuery(builder)
.execute().actionGet();
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
assertThat("1", equalTo(searchResponse.hits().getAt(0).id()));
}
}