From 93674b8b1b9fed1427e62eb96438938294a6317c Mon Sep 17 00:00:00 2001 From: Alex Ksikes Date: Fri, 26 Jun 2015 13:29:06 -0500 Subject: [PATCH] Refactoring of RegexpQuery Relates to #10217 Closes #11896 This PR is against the query-refactoring branch. --- .../index/query/RegexpQueryBuilder.java | 153 +++++++++++++++--- .../index/query/RegexpQueryParser.java | 51 ++---- .../index/query/support/QueryParsers.java | 12 +- .../index/query/BaseQueryTestCase.java | 32 ++-- .../index/query/RegexpQueryBuilderTest.java | 100 ++++++++++++ 5 files changed, 270 insertions(+), 78 deletions(-) create mode 100644 core/src/test/java/org/elasticsearch/index/query/RegexpQueryBuilderTest.java diff --git a/core/src/main/java/org/elasticsearch/index/query/RegexpQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/RegexpQueryBuilder.java index 109e0e6d84b..40b368f7055 100644 --- a/core/src/main/java/org/elasticsearch/index/query/RegexpQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/RegexpQueryBuilder.java @@ -19,10 +19,21 @@ package org.elasticsearch.index.query; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.util.automaton.Operations; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.query.support.QueryParsers; import java.io.IOException; +import java.util.Objects; /** * A Query that does fuzzy matching for a specific value. @@ -30,28 +41,51 @@ import java.io.IOException; public class RegexpQueryBuilder extends AbstractQueryBuilder implements MultiTermQueryBuilder { public static final String NAME = "regexp"; - private final String name; - private final String regexp; - private int flags = RegexpQueryParser.DEFAULT_FLAGS_VALUE; + public static final int DEFAULT_FLAGS_VALUE = RegexpFlag.ALL.value(); + + public static final int DEFAULT_MAX_DETERMINIZED_STATES = Operations.DEFAULT_MAX_DETERMINIZED_STATES; + + private final String fieldName; + + private final String value; + + private int flagsValue = DEFAULT_FLAGS_VALUE; + + private int maxDeterminizedStates = DEFAULT_MAX_DETERMINIZED_STATES; private String rewrite; - private int maxDeterminizedStates = Operations.DEFAULT_MAX_DETERMINIZED_STATES; - private boolean maxDetermizedStatesSet; + static final RegexpQueryBuilder PROTOTYPE = new RegexpQueryBuilder(null, null); /** - * Constructs a new term query. - * - * @param name The name of the field - * @param regexp The regular expression + * Constructs a new regex query. + * + * @param fieldName The name of the field + * @param value The regular expression */ - public RegexpQueryBuilder(String name, String regexp) { - this.name = name; - this.regexp = regexp; + public RegexpQueryBuilder(String fieldName, String value) { + this.fieldName = fieldName; + this.value = value; + } + + /** Returns the field name used in this query. */ + public String fieldName() { + return this.fieldName; + } + + /** + * Returns the value used in this query. + */ + public String value() { + return this.value; } public RegexpQueryBuilder flags(RegexpFlag... flags) { + if (flags == null) { + this.flagsValue = DEFAULT_FLAGS_VALUE; + return this; + } int value = 0; if (flags.length == 0) { value = RegexpFlag.ALL.value; @@ -60,35 +94,47 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder value |= flag.value; } } - this.flags = value; + this.flagsValue = value; return this; } + public RegexpQueryBuilder flags(int flags) { + this.flagsValue = flags; + return this; + } + + public int flags() { + return this.flagsValue; + } + /** * Sets the regexp maxDeterminizedStates. */ public RegexpQueryBuilder maxDeterminizedStates(int value) { this.maxDeterminizedStates = value; - this.maxDetermizedStatesSet = true; return this; } + + public int maxDeterminizedStates() { + return this.maxDeterminizedStates; + } public RegexpQueryBuilder rewrite(String rewrite) { this.rewrite = rewrite; return this; } + + public String rewrite() { + return this.rewrite; + } @Override public void doXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(NAME); - builder.startObject(name); - builder.field("value", regexp); - if (flags != -1) { - builder.field("flags_value", flags); - } - if (maxDetermizedStatesSet) { - builder.field("max_determinized_states", maxDeterminizedStates); - } + builder.startObject(fieldName); + builder.field("value", this.value); + builder.field("flags_value", flagsValue); + builder.field("max_determinized_states", maxDeterminizedStates); if (rewrite != null) { builder.field("rewrite", rewrite); } @@ -101,4 +147,67 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder public String getName() { return NAME; } + + @Override + public Query doToQuery(QueryParseContext parseContext) throws QueryParsingException, IOException { + MultiTermQuery.RewriteMethod method = QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), rewrite, null); + + Query query = null; + MappedFieldType fieldType = parseContext.fieldMapper(fieldName); + if (fieldType != null) { + query = fieldType.regexpQuery(value, flagsValue, maxDeterminizedStates, method, parseContext); + } + if (query == null) { + RegexpQuery regexpQuery = new RegexpQuery(new Term(fieldName, BytesRefs.toBytesRef(value)), flagsValue, maxDeterminizedStates); + if (method != null) { + regexpQuery.setRewriteMethod(method); + } + query = regexpQuery; + } + return query; + } + + @Override + public QueryValidationException validate() { + QueryValidationException validationException = null; + if (Strings.isEmpty(this.fieldName)) { + validationException = addValidationError("field name cannot be null or empty.", validationException); + } + if (this.value == null) { + validationException = addValidationError("query text cannot be null", validationException); + } + return validationException; + } + + @Override + public RegexpQueryBuilder doReadFrom(StreamInput in) throws IOException { + RegexpQueryBuilder regexpQueryBuilder = new RegexpQueryBuilder(in.readString(), in.readString()); + regexpQueryBuilder.flagsValue = in.readVInt(); + regexpQueryBuilder.maxDeterminizedStates = in.readVInt(); + regexpQueryBuilder.rewrite = in.readOptionalString(); + return regexpQueryBuilder; + } + + @Override + public void doWriteTo(StreamOutput out) throws IOException { + out.writeString(fieldName); + out.writeString(value); + out.writeVInt(flagsValue); + out.writeVInt(maxDeterminizedStates); + out.writeOptionalString(rewrite); + } + + @Override + public int doHashCode() { + return Objects.hash(fieldName, value, flagsValue, maxDeterminizedStates, rewrite); + } + + @Override + public boolean doEquals(RegexpQueryBuilder other) { + return Objects.equals(fieldName, other.fieldName) && + Objects.equals(value, other.value) && + Objects.equals(flagsValue, other.flagsValue) && + Objects.equals(maxDeterminizedStates, other.maxDeterminizedStates) && + Objects.equals(rewrite, other.rewrite); + } } diff --git a/core/src/main/java/org/elasticsearch/index/query/RegexpQueryParser.java b/core/src/main/java/org/elasticsearch/index/query/RegexpQueryParser.java index dcb822e9889..c5b9f910073 100644 --- a/core/src/main/java/org/elasticsearch/index/query/RegexpQueryParser.java +++ b/core/src/main/java/org/elasticsearch/index/query/RegexpQueryParser.java @@ -19,23 +19,12 @@ package org.elasticsearch.index.query; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.MultiTermQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.RegexpQuery; -import org.apache.lucene.util.automaton.Operations; import org.elasticsearch.common.inject.Inject; -import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.index.query.support.QueryParsers; import java.io.IOException; -/** - * - */ -public class RegexpQueryParser extends BaseQueryParserTemp { +public class RegexpQueryParser extends BaseQueryParser { public static final int DEFAULT_FLAGS_VALUE = RegexpFlag.ALL.value(); @@ -49,16 +38,16 @@ public class RegexpQueryParser extends BaseQueryParserTemp { } @Override - public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException { + public QueryBuilder fromXContent(QueryParseContext parseContext) throws IOException, QueryParsingException { XContentParser parser = parseContext.parser(); String fieldName = parser.currentName(); - String rewriteMethod = null; + String rewrite = null; String value = null; float boost = AbstractQueryBuilder.DEFAULT_BOOST; - int flagsValue = DEFAULT_FLAGS_VALUE; - int maxDeterminizedStates = Operations.DEFAULT_MAX_DETERMINIZED_STATES; + int flagsValue = RegexpQueryBuilder.DEFAULT_FLAGS_VALUE; + int maxDeterminizedStates = RegexpQueryBuilder.DEFAULT_MAX_DETERMINIZED_STATES; String queryName = null; String currentFieldName = null; XContentParser.Token token; @@ -78,7 +67,7 @@ public class RegexpQueryParser extends BaseQueryParserTemp { } else if ("boost".equals(currentFieldName)) { boost = parser.floatValue(); } else if ("rewrite".equals(currentFieldName)) { - rewriteMethod = parser.textOrNull(); + rewrite = parser.textOrNull(); } else if ("flags".equals(currentFieldName)) { String flags = parser.textOrNull(); flagsValue = RegexpFlag.resolveValue(flags); @@ -106,32 +95,16 @@ public class RegexpQueryParser extends BaseQueryParserTemp { if (value == null) { throw new QueryParsingException(parseContext, "No value specified for regexp query"); } - - MultiTermQuery.RewriteMethod method = QueryParsers.parseRewriteMethod(parseContext.parseFieldMatcher(), rewriteMethod, null); - - Query query = null; - MappedFieldType fieldType = parseContext.fieldMapper(fieldName); - if (fieldType != null) { - query = fieldType.regexpQuery(value, flagsValue, maxDeterminizedStates, method, parseContext); - } - if (query == null) { - RegexpQuery regexpQuery = new RegexpQuery(new Term(fieldName, BytesRefs.toBytesRef(value)), flagsValue, maxDeterminizedStates); - if (method != null) { - regexpQuery.setRewriteMethod(method); - } - query = regexpQuery; - } - query.setBoost(boost); - if (queryName != null) { - parseContext.addNamedQuery(queryName, query); - } - return query; + return new RegexpQueryBuilder(fieldName, value) + .flags(flagsValue) + .maxDeterminizedStates(maxDeterminizedStates) + .rewrite(rewrite) + .boost(boost) + .queryName(queryName); } @Override public RegexpQueryBuilder getBuilderPrototype() { return RegexpQueryBuilder.PROTOTYPE; } - - } diff --git a/core/src/main/java/org/elasticsearch/index/query/support/QueryParsers.java b/core/src/main/java/org/elasticsearch/index/query/support/QueryParsers.java index 1a12c74a318..a500393c160 100644 --- a/core/src/main/java/org/elasticsearch/index/query/support/QueryParsers.java +++ b/core/src/main/java/org/elasticsearch/index/query/support/QueryParsers.java @@ -29,12 +29,12 @@ import org.elasticsearch.common.ParseFieldMatcher; */ public final class QueryParsers { - private static final ParseField CONSTANT_SCORE = new ParseField("constant_score", "constant_score_auto", "constant_score_filter"); - private static final ParseField SCORING_BOOLEAN = new ParseField("scoring_boolean"); - private static final ParseField CONSTANT_SCORE_BOOLEAN = new ParseField("constant_score_boolean"); - private static final ParseField TOP_TERMS = new ParseField("top_terms_"); - private static final ParseField TOP_TERMS_BOOST = new ParseField("top_terms_boost_"); - private static final ParseField TOP_TERMS_BLENDED_FREQS = new ParseField("top_terms_blended_freqs_"); + public static final ParseField CONSTANT_SCORE = new ParseField("constant_score", "constant_score_auto", "constant_score_filter"); + public static final ParseField SCORING_BOOLEAN = new ParseField("scoring_boolean"); + public static final ParseField CONSTANT_SCORE_BOOLEAN = new ParseField("constant_score_boolean"); + public static final ParseField TOP_TERMS = new ParseField("top_terms_"); + public static final ParseField TOP_TERMS_BOOST = new ParseField("top_terms_boost_"); + public static final ParseField TOP_TERMS_BLENDED_FREQS = new ParseField("top_terms_blended_freqs_"); private QueryParsers() { diff --git a/core/src/test/java/org/elasticsearch/index/query/BaseQueryTestCase.java b/core/src/test/java/org/elasticsearch/index/query/BaseQueryTestCase.java index ab9327e38a3..af801bea25d 100644 --- a/core/src/test/java/org/elasticsearch/index/query/BaseQueryTestCase.java +++ b/core/src/test/java/org/elasticsearch/index/query/BaseQueryTestCase.java @@ -25,6 +25,7 @@ import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest; import org.elasticsearch.cluster.ClusterService; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.common.ParseField; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.inject.AbstractModule; import org.elasticsearch.common.inject.Injector; @@ -46,6 +47,7 @@ import org.elasticsearch.index.analysis.AnalysisModule; import org.elasticsearch.index.cache.IndexCacheModule; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.query.functionscore.FunctionScoreModule; +import org.elasticsearch.index.query.support.QueryParsers; import org.elasticsearch.index.settings.IndexSettingsModule; import org.elasticsearch.index.similarity.SimilarityModule; import org.elasticsearch.indices.breaker.CircuitBreakerService; @@ -58,20 +60,11 @@ import org.elasticsearch.test.TestSearchContext; import org.elasticsearch.test.VersionUtils; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.threadpool.ThreadPoolModule; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Ignore; -import org.junit.Test; +import org.junit.*; import java.io.IOException; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.instanceOf; -import static org.hamcrest.Matchers.is; -import static org.hamcrest.Matchers.notNullValue; -import static org.hamcrest.Matchers.nullValue; +import static org.hamcrest.Matchers.*; @Ignore public abstract class BaseQueryTestCase> extends ElasticsearchTestCase { @@ -338,4 +331,21 @@ public abstract class BaseQueryTestCase> ext } return value; } + + /** + * Helper method to return a random rewrite method + */ + protected static String getRandomRewriteMethod() { + String rewrite; + if (randomBoolean()) { + rewrite = randomFrom(new ParseField[]{QueryParsers.CONSTANT_SCORE, + QueryParsers.SCORING_BOOLEAN, + QueryParsers.CONSTANT_SCORE_BOOLEAN}).getPreferredName(); + } else { + rewrite = randomFrom(new ParseField[]{QueryParsers.TOP_TERMS, + QueryParsers.TOP_TERMS_BOOST, + QueryParsers.TOP_TERMS_BLENDED_FREQS}).getPreferredName() + "1"; + } + return rewrite; + } } diff --git a/core/src/test/java/org/elasticsearch/index/query/RegexpQueryBuilderTest.java b/core/src/test/java/org/elasticsearch/index/query/RegexpQueryBuilderTest.java new file mode 100644 index 00000000000..30cfaccad79 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/index/query/RegexpQueryBuilderTest.java @@ -0,0 +1,100 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.index.Term; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.RegexpQuery; +import org.elasticsearch.common.ParseFieldMatcher; +import org.elasticsearch.common.lucene.BytesRefs; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.query.support.QueryParsers; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static org.hamcrest.Matchers.is; + +public class RegexpQueryBuilderTest extends BaseQueryTestCase { + + @Override + protected RegexpQueryBuilder doCreateTestQueryBuilder() { + // mapped or unmapped fields + String fieldName = randomBoolean() ? STRING_FIELD_NAME : randomAsciiOfLengthBetween(1, 10); + String value = randomAsciiOfLengthBetween(1, 10); + RegexpQueryBuilder query = new RegexpQueryBuilder(fieldName, value); + + if (randomBoolean()) { + List flags = new ArrayList<>(); + int iter = randomInt(5); + for (int i = 0; i < iter; i++) { + flags.add(randomFrom(RegexpFlag.values())); + } + query.flags(flags.toArray(new RegexpFlag[flags.size()])); + } + if (randomBoolean()) { + query.maxDeterminizedStates(randomInt(50000)); + } + if (randomBoolean()) { + query.rewrite(randomFrom(getRandomRewriteMethod())); + } + return query; + } + + @Override + protected Query doCreateExpectedQuery(RegexpQueryBuilder queryBuilder, QueryParseContext context) throws IOException { + //norelease fix to be removed to avoid NPE on unmapped fields + context.parseFieldMatcher(randomBoolean() ? ParseFieldMatcher.EMPTY : ParseFieldMatcher.STRICT); + MultiTermQuery.RewriteMethod method = QueryParsers.parseRewriteMethod(context.parseFieldMatcher(), queryBuilder.rewrite(), null); + + Query query = null; + MappedFieldType fieldType = context.fieldMapper(queryBuilder.fieldName()); + if (fieldType != null) { + query = fieldType.regexpQuery(queryBuilder.value(), queryBuilder.flags(), queryBuilder.maxDeterminizedStates(), method, context); + } + if (query == null) { + RegexpQuery regexpQuery = new RegexpQuery(new Term(queryBuilder.fieldName(), BytesRefs.toBytesRef(queryBuilder.value())), + queryBuilder.flags(), queryBuilder.maxDeterminizedStates()); + if (method != null) { + regexpQuery.setRewriteMethod(method); + } + query = regexpQuery; + } + return query; + } + + @Test + public void testValidate() { + RegexpQueryBuilder regexQueryBuilder = new RegexpQueryBuilder("", "regex"); + assertThat(regexQueryBuilder.validate().validationErrors().size(), is(1)); + + regexQueryBuilder = new RegexpQueryBuilder("field", null); + assertThat(regexQueryBuilder.validate().validationErrors().size(), is(1)); + + regexQueryBuilder = new RegexpQueryBuilder("field", "regex"); + assertNull(regexQueryBuilder.validate()); + + regexQueryBuilder = new RegexpQueryBuilder(null, null); + assertThat(regexQueryBuilder.validate().validationErrors().size(), is(2)); + } +}