From 29d16cd1e83c215c61e49f8902ad731fd7ff4dc4 Mon Sep 17 00:00:00 2001 From: Alex Ksikes Date: Tue, 26 May 2015 12:00:06 +0200 Subject: [PATCH] Refactors CommonTermsQuery Refactors CommonTermsQuery analogous to TermQueryBuilder. Still left to do are the tests to compare between builder and actual Lucene query. Relates to #10217 This PR is against the query-refactoring branch. --- .../index/query/CommonTermsQueryBuilder.java | 259 ++++++++++++++---- .../index/query/CommonTermsQueryParser.java | 135 ++------- .../elasticsearch/index/query/Operator.java | 85 ++++++ .../index/query/QueryBuilder.java | 2 +- .../index/query/QueryBuilders.java | 6 +- .../query/CommonTermsQueryBuilderTest.java | 142 ++++++++++ .../search/query/SearchQueryTests.java | 1 - 7 files changed, 470 insertions(+), 160 deletions(-) create mode 100644 core/src/main/java/org/elasticsearch/index/query/Operator.java create mode 100644 core/src/test/java/org/elasticsearch/index/query/CommonTermsQueryBuilderTest.java diff --git a/core/src/main/java/org/elasticsearch/index/query/CommonTermsQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/CommonTermsQueryBuilder.java index 46a84a1f972..183702073b9 100644 --- a/core/src/main/java/org/elasticsearch/index/query/CommonTermsQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/CommonTermsQueryBuilder.java @@ -19,18 +19,31 @@ package org.elasticsearch.index.query; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.Term; +import org.apache.lucene.queries.ExtendedCommonTermsQuery; +import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.mapper.MappedFieldType; import java.io.IOException; +import java.util.Objects; /** * CommonTermsQuery query is a query that executes high-frequency terms in a * optional sub-query to prevent slow queries due to "common" terms like - * stopwords. This query basically builds 2 queries off the {@link #add(Term) - * added} terms where low-frequency terms are added to a required boolean clause + * stopwords. This query basically builds 2 queries off the + * {@link org.apache.lucene.queries.CommonTermsQuery#add(Term) added} terms + * where low-frequency terms are added to a required boolean clause * and high-frequency terms are added to an optional boolean clause. The * optional clause is only executed if the required "low-frequency' clause * matches. Scores produced by this query will be slightly different to plain @@ -46,54 +59,52 @@ public class CommonTermsQueryBuilder extends AbstractQueryBuilderAND. */ public CommonTermsQueryBuilder highFreqOperator(Operator operator) { - this.highFreqOperator = operator; + this.highFreqOperator = (operator == null) ? DEFAULT_HIGH_FREQ_OCCUR : operator; return this; } + public Operator highFreqOperator() { + return highFreqOperator; + } + /** * Sets the operator to use for terms with a low document frequency (less * than {@link #cutoffFrequency(float)}. Defaults to AND. */ public CommonTermsQueryBuilder lowFreqOperator(Operator operator) { - this.lowFreqOperator = operator; + this.lowFreqOperator = (operator == null) ? DEFAULT_LOW_FREQ_OCCUR : operator; return this; } + public Operator lowFreqOperator() { + return lowFreqOperator; + } + /** * Explicitly set the analyzer to use. Defaults to use explicit mapping * config for the field, or, if not set, the default search analyzer. @@ -124,6 +143,10 @@ public class CommonTermsQueryBuilder extends AbstractQueryBuilder=1) representing the maximum threshold of * a terms document frequency to be considered a low frequency term. * Defaults to - * {@value CommonTermsQueryParser#DEFAULT_MAX_TERM_DOC_FREQ} + * {@value #DEFAULT_CUTOFF_FREQ} */ public CommonTermsQueryBuilder cutoffFrequency(float cutoffFrequency) { this.cutoffFrequency = cutoffFrequency; return this; } + + public float cutoffFrequency() { + return this.cutoffFrequency; + } /** * Sets the minimum number of high frequent query terms that need to match in order to @@ -154,6 +185,10 @@ public class CommonTermsQueryBuilder extends AbstractQueryBuilder extends NamedWriteable, ToXContent { - + /** * Validate the query. * @return a {@link QueryValidationException} containing error messages, {@code null} if query is valid. diff --git a/core/src/main/java/org/elasticsearch/index/query/QueryBuilders.java b/core/src/main/java/org/elasticsearch/index/query/QueryBuilders.java index 7ae3ad1c6bf..90b2efca348 100644 --- a/core/src/main/java/org/elasticsearch/index/query/QueryBuilders.java +++ b/core/src/main/java/org/elasticsearch/index/query/QueryBuilders.java @@ -59,11 +59,11 @@ public abstract class QueryBuilders { /** * Creates a common query for the provided field name and text. * - * @param name The field name. + * @param fieldName The field name. * @param text The query text (to be analyzed). */ - public static CommonTermsQueryBuilder commonTermsQuery(String name, Object text) { - return new CommonTermsQueryBuilder(name, text); + public static CommonTermsQueryBuilder commonTermsQuery(String fieldName, Object text) { + return new CommonTermsQueryBuilder(fieldName, text); } /** diff --git a/core/src/test/java/org/elasticsearch/index/query/CommonTermsQueryBuilderTest.java b/core/src/test/java/org/elasticsearch/index/query/CommonTermsQueryBuilderTest.java new file mode 100644 index 00000000000..57b40151cb7 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/index/query/CommonTermsQueryBuilderTest.java @@ -0,0 +1,142 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.queries.ExtendedCommonTermsQuery; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.Query; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.junit.Test; + +import java.io.IOException; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; + +public class CommonTermsQueryBuilderTest extends BaseQueryTestCase { + + @Override + protected CommonTermsQueryBuilder createTestQueryBuilder() { + CommonTermsQueryBuilder query; + + // mapped or unmapped field + String text = randomAsciiOfLengthBetween(1, 10); + if (randomBoolean()) { + query = new CommonTermsQueryBuilder(STRING_FIELD_NAME, text); + } else { + query = new CommonTermsQueryBuilder(randomAsciiOfLengthBetween(1, 10), text); + } + + if (randomBoolean()) { + query.cutoffFrequency((float) randomIntBetween(1, 10)); + } + + if (randomBoolean()) { + query.lowFreqOperator(randomFrom(Operator.values())); + } + + // number of low frequency terms that must match + if (randomBoolean()) { + query.lowFreqMinimumShouldMatch("" + randomIntBetween(1, 5)); + } + + if (randomBoolean()) { + query.highFreqOperator(randomFrom(Operator.values())); + } + + // number of high frequency terms that must match + if (randomBoolean()) { + query.highFreqMinimumShouldMatch("" + randomIntBetween(1, 5)); + } + + if (randomBoolean()) { + query.analyzer(randomFrom("simple", "keyword", "whitespace")); + } + + if (randomBoolean()) { + query.disableCoord(randomBoolean()); + } + + if (randomBoolean()) { + query.boost(2.0f / randomIntBetween(1, 20)); + } + if (randomBoolean()) { + query.queryName(randomAsciiOfLengthBetween(1, 10)); + } + + return query; + } + + @Override + protected Query createExpectedQuery(CommonTermsQueryBuilder queryBuilder, QueryParseContext context) throws IOException { + String fieldName = queryBuilder.fieldName(); + Analyzer analyzer = context.mapperService().searchAnalyzer(); + + // handle mapped field + MappedFieldType fieldType = context.fieldMapper(fieldName); + if (fieldType != null) { + fieldName = fieldType.names().indexName(); + analyzer = context.getSearchAnalyzer(fieldType); + } + + // handle specified analyzer + if (queryBuilder.analyzer() != null) { + analyzer = context.analysisService().analyzer(queryBuilder.analyzer()); + } + + Occur highFreqOccur = queryBuilder.highFreqOperator().toBooleanClauseOccur(); + Occur lowFreqOccur = queryBuilder.lowFreqOperator().toBooleanClauseOccur(); + + ExtendedCommonTermsQuery expectedQuery = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, queryBuilder.cutoffFrequency(), + queryBuilder.disableCoord(), fieldType); + CommonTermsQueryBuilder.parseQueryString(expectedQuery, queryBuilder.text(), fieldName, analyzer, + queryBuilder.lowFreqMinimumShouldMatch(), queryBuilder.highFreqMinimumShouldMatch()); + + expectedQuery.setBoost(queryBuilder.boost()); + return expectedQuery; + } + + @Override + protected void assertLuceneQuery(CommonTermsQueryBuilder queryBuilder, Query query, QueryParseContext context) { + if (queryBuilder.queryName() != null) { + Query namedQuery = context.copyNamedFilters().get(queryBuilder.queryName()); + assertThat(namedQuery, equalTo(query)); + } + } + + @Test + public void testValidate() { + CommonTermsQueryBuilder commonTermsQueryBuilder = new CommonTermsQueryBuilder("", "text"); + assertThat(commonTermsQueryBuilder.validate().validationErrors().size(), is(1)); + + commonTermsQueryBuilder = new CommonTermsQueryBuilder("field", null); + assertThat(commonTermsQueryBuilder.validate().validationErrors().size(), is(1)); + + commonTermsQueryBuilder = new CommonTermsQueryBuilder("field", "text"); + assertNull(commonTermsQueryBuilder.validate()); + } + + @Test + public void testNoTermsFromQueryString() throws IOException { + CommonTermsQueryBuilder builder = new CommonTermsQueryBuilder(STRING_FIELD_NAME, ""); + assertNull(builder.toQuery(createContext())); + } +} diff --git a/core/src/test/java/org/elasticsearch/search/query/SearchQueryTests.java b/core/src/test/java/org/elasticsearch/search/query/SearchQueryTests.java index d4932e1b8e4..5379a2589c8 100644 --- a/core/src/test/java/org/elasticsearch/search/query/SearchQueryTests.java +++ b/core/src/test/java/org/elasticsearch/search/query/SearchQueryTests.java @@ -34,7 +34,6 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.query.*; -import org.elasticsearch.index.query.CommonTermsQueryBuilder.Operator; import org.elasticsearch.index.query.MatchQueryBuilder.Type; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.script.Script;