Merge pull request #11703 from cbuescher/feature/query-refactoring-dismax

Query Refactoring: DisMaxQueryBuilder and Parser
This commit is contained in:
Christoph Büscher 2015-06-18 16:16:12 +02:00
commit 42049276dc
6 changed files with 253 additions and 76 deletions

View File

@ -76,15 +76,6 @@ public class BoolQueryBuilder extends QueryBuilder<BoolQueryBuilder> implements
return this;
}
/**
* Adds a list of queries that <b>must</b> appear in the matching documents and will
* contribute to scoring.
*/
public BoolQueryBuilder must(List<QueryBuilder> queryBuilders) {
mustClauses.addAll(queryBuilders);
return this;
}
/**
* Gets the queries that <b>must</b> appear in the matching documents.
*/
@ -101,15 +92,6 @@ public class BoolQueryBuilder extends QueryBuilder<BoolQueryBuilder> implements
return this;
}
/**
* Adds a list of queries that <b>must</b> appear in the matching documents but will
* not contribute to scoring.
*/
public BoolQueryBuilder filter(List<QueryBuilder> queryBuilders) {
filterClauses.addAll(queryBuilders);
return this;
}
/**
* Gets the queries that <b>must</b> appear in the matching documents but don't conntribute to scoring
*/
@ -125,14 +107,6 @@ public class BoolQueryBuilder extends QueryBuilder<BoolQueryBuilder> implements
return this;
}
/**
* Adds a list of queries that <b>must not</b> appear in the matching documents.
*/
public BoolQueryBuilder mustNot(List<QueryBuilder> queryBuilders) {
mustNotClauses.addAll(queryBuilders);
return this;
}
/**
* Gets the queries that <b>must not</b> appear in the matching documents.
*/
@ -152,18 +126,6 @@ public class BoolQueryBuilder extends QueryBuilder<BoolQueryBuilder> implements
return this;
}
/**
* Adds a list of clauses that <i>should</i> be matched by the returned documents. For a boolean query with no
* <tt>MUST</tt> clauses one or more <code>SHOULD</code> clauses must match a document
* for the BooleanQuery to match.
*
* @see #minimumNumberShouldMatch(int)
*/
public BoolQueryBuilder should(List<QueryBuilder> queryBuilders) {
shouldClauses.addAll(queryBuilders);
return this;
}
/**
* Gets the list of clauses that <b>should</b> be matched by the returned documents.
*
@ -396,13 +358,13 @@ public class BoolQueryBuilder extends QueryBuilder<BoolQueryBuilder> implements
public BoolQueryBuilder readFrom(StreamInput in) throws IOException {
BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
List<QueryBuilder> queryBuilders = in.readNamedWritableList();
boolQueryBuilder.must(queryBuilders);
boolQueryBuilder.mustClauses.addAll(queryBuilders);
queryBuilders = in.readNamedWritableList();
boolQueryBuilder.mustNot(queryBuilders);
boolQueryBuilder.mustNotClauses.addAll(queryBuilders);
queryBuilders = in.readNamedWritableList();
boolQueryBuilder.should(queryBuilders);
boolQueryBuilder.shouldClauses.addAll(queryBuilders);
queryBuilders = in.readNamedWritableList();
boolQueryBuilder.filter(queryBuilders);
boolQueryBuilder.filterClauses.addAll(queryBuilders);
boolQueryBuilder.boost = in.readFloat();
boolQueryBuilder.adjustPureNegative = in.readBoolean();
boolQueryBuilder.disableCoord = in.readBoolean();

View File

@ -154,16 +154,23 @@ public class BoolQueryParser extends BaseQueryParser {
}
}
BoolQueryBuilder boolQuery = new BoolQueryBuilder();
boolQuery.must(mustClauses);
boolQuery.mustNot(mustNotClauses);
boolQuery.should(shouldClauses);
boolQuery.filter(filterClauses);
for (QueryBuilder queryBuilder : mustClauses) {
boolQuery.must(queryBuilder);
}
for (QueryBuilder queryBuilder : mustNotClauses) {
boolQuery.mustNot(queryBuilder);
}
for (QueryBuilder queryBuilder : shouldClauses) {
boolQuery.should(queryBuilder);
}
for (QueryBuilder queryBuilder : filterClauses) {
boolQuery.filter(queryBuilder);
}
boolQuery.boost(boost);
boolQuery.disableCoord(disableCoord);
boolQuery.adjustPureNegative(adjustPureNegative);
boolQuery.minimumNumberShouldMatch(minimumShouldMatch);
boolQuery.queryName(queryName);
boolQuery.validate();
return boolQuery;
}

View File

@ -19,27 +19,34 @@
package org.elasticsearch.index.query;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.XContentBuilder;
import java.io.IOException;
import java.util.ArrayList;
import static com.google.common.collect.Lists.newArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Objects;
/**
* A query that generates the union of documents produced by its sub-queries, and that scores each document
* with the maximum score for that document as produced by any sub-query, plus a tie breaking increment for any
* additional matching sub-queries.
*/
public class DisMaxQueryBuilder extends QueryBuilder implements BoostableQueryBuilder<DisMaxQueryBuilder> {
public class DisMaxQueryBuilder extends QueryBuilder<DisMaxQueryBuilder> implements BoostableQueryBuilder<DisMaxQueryBuilder> {
public static final String NAME = "dis_max";
private ArrayList<QueryBuilder> queries = newArrayList();
private final ArrayList<QueryBuilder> queries = new ArrayList<>();
private float boost = -1;
private float boost = 1.0f;
private float tieBreaker = -1;
/** Default multiplication factor for breaking ties in document scores.*/
public static float DEFAULT_TIE_BREAKER = 0.0f;
private float tieBreaker = DEFAULT_TIE_BREAKER;
private String queryName;
@ -53,6 +60,13 @@ public class DisMaxQueryBuilder extends QueryBuilder implements BoostableQueryBu
return this;
}
/**
* @return an immutable list copy of the current sub-queries of this disjunction
*/
public List<QueryBuilder> queries() {
return this.queries;
}
/**
* Sets the boost for this query. Documents matching this query will (in addition to the normal
* weightings) have their score multiplied by the boost provided.
@ -63,6 +77,13 @@ public class DisMaxQueryBuilder extends QueryBuilder implements BoostableQueryBu
return this;
}
/**
* @return the boost for this query
*/
public float boost() {
return this.boost;
}
/**
* The score of each non-maximum disjunct for a document is multiplied by this weight
* and added into the final score. If non-zero, the value should be small, on the order of 0.1, which says that
@ -74,6 +95,14 @@ public class DisMaxQueryBuilder extends QueryBuilder implements BoostableQueryBu
return this;
}
/**
* @return the tie breaker score
* @see DisMaxQueryBuilder#tieBreaker(float)
*/
public float tieBreaker() {
return this.tieBreaker;
}
/**
* Sets the query name for the filter that can be used when searching for matched_filters per hit.
*/
@ -82,15 +111,18 @@ public class DisMaxQueryBuilder extends QueryBuilder implements BoostableQueryBu
return this;
}
/**
* @return the query name for the filter that can be used when searching for matched_filters per hit.
*/
public String queryName() {
return this.queryName;
}
@Override
protected void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(NAME);
if (tieBreaker != -1) {
builder.field("tie_breaker", tieBreaker);
}
if (boost != -1) {
builder.field("boost", boost);
}
builder.field("tie_breaker", tieBreaker);
builder.field("boost", boost);
if (queryName != null) {
builder.field("_name", queryName);
}
@ -102,6 +134,67 @@ public class DisMaxQueryBuilder extends QueryBuilder implements BoostableQueryBu
builder.endObject();
}
@Override
public Query toQuery(QueryParseContext parseContext) throws QueryParsingException, IOException {
// return null if there are no queries at all
Collection<Query> luceneQueries = toQueries(queries, parseContext);
if (luceneQueries.isEmpty()) {
return null;
}
DisjunctionMaxQuery query = new DisjunctionMaxQuery(luceneQueries, tieBreaker);
query.setBoost(boost);
if (queryName != null) {
parseContext.addNamedQuery(queryName, query);
}
return query;
}
@Override
public QueryValidationException validate() {
// nothing to validate, clauses are optional
return null;
}
@Override
public DisMaxQueryBuilder readFrom(StreamInput in) throws IOException {
DisMaxQueryBuilder disMax = new DisMaxQueryBuilder();
List<QueryBuilder> queryBuilders = in.readNamedWritableList();
disMax.queries.addAll(queryBuilders);
disMax.tieBreaker = in.readFloat();
disMax.queryName = in.readOptionalString();
disMax.boost = in.readFloat();
return disMax;
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeNamedWritableList(this.queries);
out.writeFloat(tieBreaker);
out.writeOptionalString(queryName);
out.writeFloat(boost);
}
@Override
public int hashCode() {
return Objects.hash(queries, tieBreaker, boost, queryName);
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null || getClass() != obj.getClass()) {
return false;
}
DisMaxQueryBuilder other = (DisMaxQueryBuilder) obj;
return Objects.equals(queries, other.queries) &&
Objects.equals(tieBreaker, other.tieBreaker) &&
Objects.equals(boost, other.boost) &&
Objects.equals(queryName, other.queryName);
}
@Override
public String queryId() {
return NAME;

View File

@ -19,8 +19,6 @@
package org.elasticsearch.index.query;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.xcontent.XContentParser;
@ -33,7 +31,7 @@ import static com.google.common.collect.Lists.newArrayList;
/**
*
*/
public class DisMaxQueryParser extends BaseQueryParserTemp {
public class DisMaxQueryParser extends BaseQueryParser {
@Inject
public DisMaxQueryParser() {
@ -45,13 +43,13 @@ public class DisMaxQueryParser extends BaseQueryParserTemp {
}
@Override
public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException {
public QueryBuilder fromXContent(QueryParseContext parseContext) throws IOException, QueryParsingException {
XContentParser parser = parseContext.parser();
float boost = 1.0f;
float tieBreaker = 0.0f;
float tieBreaker = DisMaxQueryBuilder.DEFAULT_TIE_BREAKER;
List<Query> queries = newArrayList();
List<QueryBuilder> queries = newArrayList();
boolean queriesFound = false;
String queryName = null;
@ -63,7 +61,7 @@ public class DisMaxQueryParser extends BaseQueryParserTemp {
} else if (token == XContentParser.Token.START_OBJECT) {
if ("queries".equals(currentFieldName)) {
queriesFound = true;
Query query = parseContext.parseInnerQuery();
QueryBuilder query = parseContext.parseInnerQueryBuilder();
if (query != null) {
queries.add(query);
}
@ -74,7 +72,7 @@ public class DisMaxQueryParser extends BaseQueryParserTemp {
if ("queries".equals(currentFieldName)) {
queriesFound = true;
while (token != XContentParser.Token.END_ARRAY) {
Query query = parseContext.parseInnerQuery();
QueryBuilder query = parseContext.parseInnerQueryBuilder();
if (query != null) {
queries.add(query);
}
@ -100,16 +98,14 @@ public class DisMaxQueryParser extends BaseQueryParserTemp {
throw new QueryParsingException(parseContext, "[dis_max] requires 'queries' field");
}
if (queries.isEmpty()) {
return null;
DisMaxQueryBuilder disMaxQuery = new DisMaxQueryBuilder();
disMaxQuery.tieBreaker(tieBreaker);
disMaxQuery.queryName(queryName);
disMaxQuery.boost(boost);
for (QueryBuilder query : queries) {
disMaxQuery.add(query);
}
DisjunctionMaxQuery query = new DisjunctionMaxQuery(queries, tieBreaker);
query.setBoost(boost);
if (queryName != null) {
parseContext.addNamedQuery(queryName, query);
}
return query;
return disMaxQuery;
}
@Override

View File

@ -30,6 +30,9 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentType;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
/**
* Base class for all classes producing lucene queries.
@ -113,6 +116,27 @@ public abstract class QueryBuilder<QB extends QueryBuilder> extends ToXContentTo
return obj;
}
/**
* Helper method to convert collection of {@link QueryBuilder} instances to lucene
* {@link Query} instances. {@link QueryBuilder} that return <tt>null</tt> calling
* their {@link QueryBuilder#toQuery(QueryParseContext)} method are not added to the
* resulting collection.
*
* @throws IOException
* @throws QueryParsingException
*/
protected static Collection<Query> toQueries(Collection<QueryBuilder> queryBuilders, QueryParseContext parseContext) throws QueryParsingException,
IOException {
List<Query> queries = new ArrayList<>(queryBuilders.size());
for (QueryBuilder queryBuilder : queryBuilders) {
Query query = queryBuilder.toQuery(parseContext);
if (query != null) {
queries.add(query);
}
}
return queries;
}
//norelease remove this once all builders implement readFrom themselves
@Override
public QB readFrom(StreamInput in) throws IOException {

View File

@ -0,0 +1,95 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.query;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.junit.Test;
import java.io.IOException;
public class DisMaxQueryBuilderTest extends BaseQueryTestCase<DisMaxQueryBuilder> {
@Override
protected Query createExpectedQuery(DisMaxQueryBuilder testBuilder, QueryParseContext context) throws QueryParsingException, IOException {
Query query = new DisjunctionMaxQuery(QueryBuilder.toQueries(testBuilder.queries(), context), testBuilder.tieBreaker());
query.setBoost(testBuilder.boost());
if (testBuilder.queryName() != null) {
context.addNamedQuery(testBuilder.queryName(), query);
}
return query;
}
/**
* @return a {@link DisMaxQueryBuilder} with random inner queries
*/
@Override
protected DisMaxQueryBuilder createTestQueryBuilder() {
DisMaxQueryBuilder dismax = new DisMaxQueryBuilder();
int clauses = randomIntBetween(1, 5);
for (int i = 0; i < clauses; i++) {
dismax.add(RandomQueryBuilder.create(random()));
}
if (randomBoolean()) {
dismax.boost(2.0f / randomIntBetween(1, 20));
}
if (randomBoolean()) {
dismax.tieBreaker(2.0f / randomIntBetween(1, 20));
}
if (randomBoolean()) {
dismax.queryName(randomUnicodeOfLengthBetween(3, 15));
}
return dismax;
}
/**
* test `null`return value for missing inner queries
* @throws IOException
* @throws QueryParsingException
*/
@Test
public void testNoInnerQueries() throws QueryParsingException, IOException {
DisMaxQueryBuilder disMaxBuilder = new DisMaxQueryBuilder();
assertNull(disMaxBuilder.toQuery(createContext()));
assertNull(disMaxBuilder.validate());
}
/**
* Test inner query parsing to null. Current DSL allows inner filter element to parse to <tt>null</tt>.
* Those should be ignored upstream. To test this, we use inner {@link ConstantScoreQueryBuilder}
* with empty inner filter.
*/
@Test
public void testInnerQueryReturnsNull() throws IOException {
QueryParseContext context = createContext();
String queryId = ConstantScoreQueryBuilder.PROTOTYPE.queryId();
String queryString = "{ \""+queryId+"\" : { \"filter\" : { } }";
XContentParser parser = XContentFactory.xContent(queryString).createParser(queryString);
context.reset(parser);
assertQueryHeader(parser, queryId);
ConstantScoreQueryBuilder innerQueryBuilder = (ConstantScoreQueryBuilder) context.indexQueryParserService()
.queryParser(queryId).fromXContent(context);
DisMaxQueryBuilder disMaxBuilder = new DisMaxQueryBuilder().add(innerQueryBuilder);
assertNull(disMaxBuilder.toQuery(context));
}
}