From 8b74c42099d5b7f76b2fa0d3d85d5f03f4e5e234 Mon Sep 17 00:00:00 2001 From: uboness Date: Sat, 15 Dec 2012 23:20:46 +0100 Subject: [PATCH] Support for RegexpQuery & RegexpFilter - Added "regexp" query type (based on Lucene 4 RegexpQuery) - Added "regexp" filter type - Fixed a bug in IdFieldMapper where prefixQuery on a single type would be redundantly wrapped in a boolean query --- .../common/lucene/search/RegexpFilter.java | 110 ++++++++++++++ .../cache/filter/support/CacheKeyFilter.java | 4 + .../index/mapper/FieldMapper.java | 4 + .../mapper/core/AbstractFieldMapper.java | 15 ++ .../index/mapper/internal/IdFieldMapper.java | 41 ++++++ .../index/query/FilterBuilders.java | 10 ++ .../index/query/QueryBuilders.java | 11 ++ .../index/query/RegexpFilterBuilder.java | 114 +++++++++++++++ .../index/query/RegexpFilterParser.java | 138 ++++++++++++++++++ .../elasticsearch/index/query/RegexpFlag.java | 135 +++++++++++++++++ .../index/query/RegexpQueryBuilder.java | 99 +++++++++++++ .../index/query/RegexpQueryParser.java | 130 +++++++++++++++++ .../indices/query/IndicesQueriesModule.java | 2 + .../query/SimpleIndexQueryParserTests.java | 90 ++++++++++++ .../test/unit/index/query/regexp-boost.json | 8 + .../regexp-filter-flags-named-cached.json | 20 +++ .../unit/index/query/regexp-filter-flags.json | 18 +++ .../unit/index/query/regexp-filter-named.json | 15 ++ .../test/unit/index/query/regexp-filter.json | 14 ++ .../test/unit/index/query/regexp.json | 5 + 20 files changed, 983 insertions(+) create mode 100644 src/main/java/org/elasticsearch/common/lucene/search/RegexpFilter.java create mode 100644 src/main/java/org/elasticsearch/index/query/RegexpFilterBuilder.java create mode 100644 src/main/java/org/elasticsearch/index/query/RegexpFilterParser.java create mode 100644 src/main/java/org/elasticsearch/index/query/RegexpFlag.java create mode 100644 src/main/java/org/elasticsearch/index/query/RegexpQueryBuilder.java create mode 100644 src/main/java/org/elasticsearch/index/query/RegexpQueryParser.java create mode 100644 src/test/java/org/elasticsearch/test/unit/index/query/regexp-boost.json create mode 100644 src/test/java/org/elasticsearch/test/unit/index/query/regexp-filter-flags-named-cached.json create mode 100644 src/test/java/org/elasticsearch/test/unit/index/query/regexp-filter-flags.json create mode 100644 src/test/java/org/elasticsearch/test/unit/index/query/regexp-filter-named.json create mode 100644 src/test/java/org/elasticsearch/test/unit/index/query/regexp-filter.json create mode 100644 src/test/java/org/elasticsearch/test/unit/index/query/regexp.json diff --git a/src/main/java/org/elasticsearch/common/lucene/search/RegexpFilter.java b/src/main/java/org/elasticsearch/common/lucene/search/RegexpFilter.java new file mode 100644 index 00000000000..93a3261431a --- /dev/null +++ b/src/main/java/org/elasticsearch/common/lucene/search/RegexpFilter.java @@ -0,0 +1,110 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.lucene.search; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.MultiTermQueryWrapperFilter; +import org.apache.lucene.search.RegexpQuery; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.automaton.RegExp; + +import java.io.IOException; + +/** + * A lazy regexp filter which only builds the automaton on the first call to {@link #getDocIdSet(AtomicReaderContext, Bits)}. + * It is not thread safe (so can't be applied on multiple segments concurrently) + */ +public class RegexpFilter extends Filter { + + private final Term term; + private final int flags; + + // use delegation here to support efficient implementation of equals & hashcode for this + // filter (as it will be used as the filter cache key) + private final InternalFilter filter; + + public RegexpFilter(Term term) { + this(term, RegExp.ALL); + } + + public RegexpFilter(Term term, int flags) { + filter = new InternalFilter(term, flags); + this.term = term; + this.flags = flags; + } + + public String field() { + return term.field(); + } + + public String regexp() { + return term.text(); + } + + public int flags() { + return flags; + } + + @Override + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + return filter.getDocIdSet(context, acceptDocs); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + org.elasticsearch.common.lucene.search.RegexpFilter that = (org.elasticsearch.common.lucene.search.RegexpFilter) o; + + if (flags != that.flags) return false; + if (term != null ? !term.equals(that.term) : that.term != null) return false; + + return true; + } + + @Override + public int hashCode() { + int result = term != null ? term.hashCode() : 0; + result = 31 * result + flags; + return result; + } + + @Override + public String toString() { + // todo should we also show the flags? + return term.field() + ":" + term.text(); + } + + static class InternalFilter extends MultiTermQueryWrapperFilter { + + public InternalFilter(Term term) { + super(new RegexpQuery(term)); + } + + public InternalFilter(Term term, int flags) { + super(new RegexpQuery(term, flags)); + } + } + +} diff --git a/src/main/java/org/elasticsearch/index/cache/filter/support/CacheKeyFilter.java b/src/main/java/org/elasticsearch/index/cache/filter/support/CacheKeyFilter.java index 7ac5069d5ec..646348ff7f8 100644 --- a/src/main/java/org/elasticsearch/index/cache/filter/support/CacheKeyFilter.java +++ b/src/main/java/org/elasticsearch/index/cache/filter/support/CacheKeyFilter.java @@ -86,6 +86,10 @@ public interface CacheKeyFilter { return key; } + public Filter wrappedFilter() { + return filter; + } + @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { return filter.getDocIdSet(context, acceptDocs); diff --git a/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index c20da27c757..67a48861e96 100644 --- a/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -203,6 +203,10 @@ public interface FieldMapper { Filter prefixFilter(String value, @Nullable QueryParseContext context); + Query regexpQuery(String value, int flags, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryParseContext context); + + Filter regexpFilter(String value, int flags, @Nullable QueryParseContext parseContext); + /** * A term query to use when parsing a query string. Can return null. */ diff --git a/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java index daf1378440c..f691d27ab84 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java @@ -29,6 +29,7 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.common.lucene.search.RegexpFilter; import org.elasticsearch.common.lucene.search.TermFilter; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.analysis.NamedAnalyzer; @@ -475,6 +476,20 @@ public abstract class AbstractFieldMapper implements FieldMapper, Mapper { return new PrefixFilter(names().createIndexNameTerm(indexedValue(value))); } + @Override + public Query regexpQuery(String value, int flags, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryParseContext context) { + RegexpQuery query = new RegexpQuery(names().createIndexNameTerm(indexedValue(value)), flags); + if (method != null) { + query.setRewriteMethod(method); + } + return query; + } + + @Override + public Filter regexpFilter(String value, int flags, @Nullable QueryParseContext parseContext) { + return new RegexpFilter(names().createIndexNameTerm(indexedValue(value)), flags); + } + @Override public Query rangeQuery(String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context) { // LUCENE 4 UPGRADE: Perhaps indexedValue() should return a BytesRef? diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/IdFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/IdFieldMapper.java index 424590de745..a747826ad40 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/IdFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/IdFieldMapper.java @@ -30,6 +30,7 @@ import org.apache.lucene.search.*; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Strings; import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.common.lucene.search.RegexpFilter; import org.elasticsearch.common.lucene.search.XBooleanFilter; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; @@ -187,6 +188,7 @@ public class IdFieldMapper extends AbstractFieldMapper implements Intern if (method != null) { prefixQuery.setRewriteMethod(method); } + return prefixQuery; } BooleanQuery query = new BooleanQuery(); for (String queryType : queryTypes) { @@ -215,6 +217,45 @@ public class IdFieldMapper extends AbstractFieldMapper implements Intern return filter; } + @Override + public Query regexpQuery(String value, int flags, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryParseContext context) { + if (indexed() || context == null) { + return super.regexpQuery(value, flags, method, context); + } + Collection queryTypes = context.queryTypes(); + if (queryTypes.size() == 1) { + RegexpQuery regexpQuery = new RegexpQuery(new Term(UidFieldMapper.NAME, Uid.createUid(Iterables.getFirst(queryTypes, null), value)), flags); + if (method != null) { + regexpQuery.setRewriteMethod(method); + } + return regexpQuery; + } + BooleanQuery query = new BooleanQuery(); + for (String queryType : queryTypes) { + RegexpQuery regexpQuery = new RegexpQuery(new Term(UidFieldMapper.NAME, Uid.createUid(queryType, value)), flags); + if (method != null) { + regexpQuery.setRewriteMethod(method); + } + query.add(regexpQuery, BooleanClause.Occur.SHOULD); + } + return query; + } + + public Filter regexpFilter(String value, int flags, @Nullable QueryParseContext context) { + if (indexed() || context == null) { + return super.regexpFilter(value, flags, context); + } + Collection queryTypes = context.queryTypes(); + if (queryTypes.size() == 1) { + return new RegexpFilter(new Term(UidFieldMapper.NAME, Uid.createUid(Iterables.getFirst(queryTypes, null), value)), flags); + } + XBooleanFilter filter = new XBooleanFilter(); + for (String queryType : queryTypes) { + filter.add(new RegexpFilter(new Term(UidFieldMapper.NAME, Uid.createUid(queryType, value)), flags), BooleanClause.Occur.SHOULD); + } + return filter; + } + @Override public void preParse(ParseContext context) throws IOException { if (context.sourceToParse().id() != null) { diff --git a/src/main/java/org/elasticsearch/index/query/FilterBuilders.java b/src/main/java/org/elasticsearch/index/query/FilterBuilders.java index ac4099d80a3..154bf200d20 100644 --- a/src/main/java/org/elasticsearch/index/query/FilterBuilders.java +++ b/src/main/java/org/elasticsearch/index/query/FilterBuilders.java @@ -266,6 +266,16 @@ public abstract class FilterBuilders { return new PrefixFilterBuilder(name, prefix); } + /** + * A filter that restricts search results to field values that match a given regular expression. + * + * @param name The field name + * @param regexp The regular expression + */ + public static RegexpFilterBuilder regexpFilter(String name, String regexp) { + return new RegexpFilterBuilder(name, regexp); + } + /** * A filter that restricts search results to values that are within the given range. * diff --git a/src/main/java/org/elasticsearch/index/query/QueryBuilders.java b/src/main/java/org/elasticsearch/index/query/QueryBuilders.java index 9fbdd92eb83..a2397dbb219 100644 --- a/src/main/java/org/elasticsearch/index/query/QueryBuilders.java +++ b/src/main/java/org/elasticsearch/index/query/QueryBuilders.java @@ -355,6 +355,17 @@ public abstract class QueryBuilders { return new WildcardQueryBuilder(name, query); } + + /** + * A Query that matches documents containing terms with a specified regular expression. + * + * @param name The name of the field + * @param regexp The regular expression + */ + public static RegexpQueryBuilder regexpQuery(String name, String regexp) { + return new RegexpQueryBuilder(name, regexp); + } + /** * A query that parses a query string and runs it. There are two modes that this operates. The first, * when no field is added (using {@link QueryStringQueryBuilder#field(String)}, will run the query once and non prefixed fields diff --git a/src/main/java/org/elasticsearch/index/query/RegexpFilterBuilder.java b/src/main/java/org/elasticsearch/index/query/RegexpFilterBuilder.java new file mode 100644 index 00000000000..539e2641479 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/query/RegexpFilterBuilder.java @@ -0,0 +1,114 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import org.elasticsearch.common.xcontent.XContentBuilder; + +import java.io.IOException; + +/** + * A filter that restricts search results to values that have a matching regular expression in a given + * field. + * + * + */ +public class RegexpFilterBuilder extends BaseFilterBuilder { + + private final String name; + private final String regexp; + private int flags = -1; + + private Boolean cache; + private String cacheKey; + private String filterName; + + /** + * A filter that restricts search results to values that have a matching prefix in a given + * field. + * + * @param name The field name + * @param regexp The regular expression + */ + public RegexpFilterBuilder(String name, String regexp) { + this.name = name; + this.regexp = regexp; + } + + /** + * Sets the filter name for the filter that can be used when searching for matched_filters per hit. + */ + public RegexpFilterBuilder filterName(String filterName) { + this.filterName = filterName; + return this; + } + + /** + * Sets the regexp flags (see {@link RegexpFlag}). + */ + public RegexpFilterBuilder flags(RegexpFlag... flags) { + int value = 0; + if (flags.length == 0) { + value = RegexpFlag.ALL.value; + } else { + for (RegexpFlag flag : flags) { + value |= flag.value; + } + } + this.flags = value; + return this; + } + + /** + * Should the filter be cached or not. Defaults to false. + */ + public RegexpFilterBuilder cache(boolean cache) { + this.cache = cache; + return this; + } + + public RegexpFilterBuilder cacheKey(String cacheKey) { + this.cacheKey = cacheKey; + return this; + } + + @Override + public void doXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(RegexpFilterParser.NAME); + if (flags < 0) { + builder.field(name, regexp); + } else { + builder.startObject(name) + .field("value", regexp) + .field("flags_value", flags) + .endObject(); + } + + if (filterName != null) { + builder.field("_name", filterName); + } + if (cache != null) { + builder.field("_cache", cache); + } + if (cacheKey != null) { + builder.field("_cache_key", cacheKey); + } + builder.endObject(); + } +} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/query/RegexpFilterParser.java b/src/main/java/org/elasticsearch/index/query/RegexpFilterParser.java new file mode 100644 index 00000000000..5ea203c844e --- /dev/null +++ b/src/main/java/org/elasticsearch/index/query/RegexpFilterParser.java @@ -0,0 +1,138 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.index.Term; +import org.apache.lucene.search.Filter; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.lucene.search.RegexpFilter; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.cache.filter.support.CacheKeyFilter; +import org.elasticsearch.index.mapper.MapperService; + +import java.io.IOException; + +import static org.elasticsearch.index.query.support.QueryParsers.wrapSmartNameFilter; + +/** + * + */ +public class RegexpFilterParser implements FilterParser { + + public static final String NAME = "regexp"; + + @Inject + public RegexpFilterParser() { + } + + @Override + public String[] names() { + return new String[]{ NAME }; + } + + @Override + public Filter parse(QueryParseContext parseContext) throws IOException, QueryParsingException { + XContentParser parser = parseContext.parser(); + + boolean cache = true; + CacheKeyFilter.Key cacheKey = null; + String fieldName = null; + String secondaryFieldName = null; + String value = null; + String secondaryValue = null; + int flagsValue = -1; + + String filterName = null; + String currentFieldName = null; + XContentParser.Token token; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token == XContentParser.Token.START_OBJECT) { + fieldName = currentFieldName; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else { + if ("value".equals(currentFieldName)) { + value = parser.text(); + } else if ("flags".equals(currentFieldName)) { + String flags = parser.textOrNull(); + flagsValue = RegexpFlag.resolveValue(flags); + } else if ("flags_value".equals(currentFieldName)) { + flagsValue = parser.intValue(); + } else { + throw new QueryParsingException(parseContext.index(), "[regexp] filter does not support [" + currentFieldName + "]"); + } + } + } + } else { + if ("_name".equals(currentFieldName)) { + filterName = parser.text(); + } else if ("_cache".equals(currentFieldName)) { + cache = parser.booleanValue(); + } else if ("_cache_key".equals(currentFieldName) || "_cacheKey".equals(currentFieldName)) { + cacheKey = new CacheKeyFilter.Key(parser.text()); + } else { + secondaryFieldName = currentFieldName; + secondaryValue = parser.text(); + } + } + } + + if (fieldName == null) { + fieldName = secondaryFieldName; + value = secondaryValue; + } + + if (value == null) { + throw new QueryParsingException(parseContext.index(), "No value specified for regexp filter"); + } + + Filter filter = null; + + MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName); + if (smartNameFieldMappers != null && smartNameFieldMappers.hasMapper()) { + if (smartNameFieldMappers.explicitTypeInNameWithDocMapper()) { + String[] previousTypes = QueryParseContext.setTypesWithPrevious(new String[]{smartNameFieldMappers.docMapper().type()}); + try { + filter = smartNameFieldMappers.mapper().regexpFilter(value, flagsValue, parseContext); + } finally { + QueryParseContext.setTypes(previousTypes); + } + } else { + filter = smartNameFieldMappers.mapper().regexpFilter(value, flagsValue, parseContext); + } + } + if (filter == null) { + filter = new RegexpFilter(new Term(fieldName, value), flagsValue); + } + + if (cache) { + filter = parseContext.cacheFilter(filter, cacheKey); + } + + filter = wrapSmartNameFilter(filter, smartNameFieldMappers, parseContext); + if (filterName != null) { + parseContext.addNamedFilter(filterName, filter); + } + return filter; + } +} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/query/RegexpFlag.java b/src/main/java/org/elasticsearch/index/query/RegexpFlag.java new file mode 100644 index 00000000000..53961dbf63b --- /dev/null +++ b/src/main/java/org/elasticsearch/index/query/RegexpFlag.java @@ -0,0 +1,135 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.util.automaton.RegExp; +import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.Strings; + +/** + * Regular expression syntax flags. Each flag represents optional syntax support in the regular expression: + *
    + *
  • INTERSECTION - Support for intersection notation: <expression> & <expression>
  • + *
  • COMPLEMENT - Support for complement notation: <expression> & <expression>
  • + *
  • EMPTY - Support for the empty language symbol: #
  • + *
  • ANYSTRING - Support for the any string symbol: @
  • + *
  • INTERVAL - Support for numerical interval notation: <n-m>
  • + *
  • NONE - Disable support for all syntax options
  • + *
  • ALL - Enables support for all syntax options
  • + *
+ * + * @see RegexpQueryBuilder#flags(RegexpFlag...) + * @see RegexpFilterBuilder#flags(RegexpFlag...) + */ +public enum RegexpFlag { + + /** + * Enables intersection of the form: <expression> & <expression> + */ + INTERSECTION(RegExp.INTERSECTION), + + /** + * Enables complement expression of the form: ~<expression> + */ + COMPLEMENT(RegExp.COMPLEMENT), + + /** + * Enables empty language expression: # + */ + EMPTY(RegExp.EMPTY), + + /** + * Enables any string expression: @ + */ + ANYSTRING(RegExp.ANYSTRING), + + /** + * Enables numerical interval expression: <n-m> + */ + INTERVAL(RegExp.INTERVAL), + + /** + * Disables all available option flags + */ + NONE(RegExp.NONE), + + /** + * Enables all available option flags + */ + ALL(RegExp.ALL); + + + final int value; + + private RegexpFlag(int value) { + this.value = value; + } + + public int value() { + return value; + } + + /** + * Resolves the combined OR'ed value for the given list of regular expression flags. The given flags must follow the + * following syntax: + *

+ * flag_name(|flag_name)* + *

+ * Where flag_name is one of the following: + *

    + *
  • INTERSECTION
  • + *
  • COMPLEMENT
  • + *
  • EMPTY
  • + *
  • ANYSTRING
  • + *
  • INTERVAL
  • + *
  • NONE
  • + *
  • ALL
  • + *
+ *

+ * Example: INTERSECTION|COMPLEMENT|EMPTY + * + * @param flags A string representing a list of regualr expression flags + * @return The combined OR'ed value for all the flags + */ + static int resolveValue(String flags) { + if (flags == null || flags.isEmpty()) { + return RegExp.ALL; + } + int magic = RegExp.NONE; + for (String s : Strings.delimitedListToStringArray(flags, "|")) { + if (s.isEmpty()) { + continue; + } + try { + RegexpFlag flag = RegexpFlag.valueOf(s.toUpperCase()); + if (flag == RegexpFlag.NONE) { + continue; + } + if (flag == RegexpFlag.ALL) { + return flag.value(); + } + magic |= flag.value(); + } catch (IllegalArgumentException iae) { + throw new ElasticSearchIllegalArgumentException("Unknown regexp flag [" + s + "]"); + } + } + return magic; + } +} diff --git a/src/main/java/org/elasticsearch/index/query/RegexpQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/RegexpQueryBuilder.java new file mode 100644 index 00000000000..2a978cfeb43 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/query/RegexpQueryBuilder.java @@ -0,0 +1,99 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import org.elasticsearch.common.xcontent.XContentBuilder; + +import java.io.IOException; + +/** + * A Query that does fuzzy matching for a specific value. + * + * + */ +public class RegexpQueryBuilder extends BaseQueryBuilder implements BoostableQueryBuilder { + + private final String name; + private final String regexp; + + private int flags = -1; + private float boost = -1; + private String rewrite; + + /** + * Constructs a new term query. + * + * @param name The name of the field + * @param regexp The regular expression + */ + public RegexpQueryBuilder(String name, String regexp) { + this.name = name; + this.regexp = regexp; + } + + /** + * Sets the boost for this query. Documents matching this query will (in addition to the normal + * weightings) have their score multiplied by the boost provided. + */ + public RegexpQueryBuilder boost(float boost) { + this.boost = boost; + return this; + } + + public RegexpQueryBuilder flags(RegexpFlag... flags) { + int value = 0; + if (flags.length == 0) { + value = RegexpFlag.ALL.value; + } else { + for (RegexpFlag flag : flags) { + value |= flag.value; + } + } + this.flags = value; + return this; + } + + public RegexpQueryBuilder rewrite(String rewrite) { + this.rewrite = rewrite; + return this; + } + + @Override + public void doXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(RegexpQueryParser.NAME); + if (boost == -1 && rewrite == null) { + builder.field(name, regexp); + } else { + builder.startObject(name); + builder.field("value", regexp); + if (flags != -1) { + builder.field("flags_value", flags); + } + if (boost != -1) { + builder.field("boost", boost); + } + if (rewrite != null) { + builder.field("rewrite", rewrite); + } + builder.endObject(); + } + builder.endObject(); + } +} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/query/RegexpQueryParser.java b/src/main/java/org/elasticsearch/index/query/RegexpQueryParser.java new file mode 100644 index 00000000000..f2c51d4cf32 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/query/RegexpQueryParser.java @@ -0,0 +1,130 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.index.Term; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.RegexpQuery; +import org.apache.lucene.util.automaton.RegExp; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.query.support.QueryParsers; + +import java.io.IOException; + +import static org.elasticsearch.index.query.support.QueryParsers.wrapSmartNameQuery; + +/** + * + */ +public class RegexpQueryParser implements QueryParser { + + public static final String NAME = "regexp"; + + @Inject + public RegexpQueryParser() { + } + + @Override + public String[] names() { + return new String[]{ NAME }; + } + + @Override + public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException { + XContentParser parser = parseContext.parser(); + + XContentParser.Token token = parser.nextToken(); + if (token != XContentParser.Token.FIELD_NAME) { + throw new QueryParsingException(parseContext.index(), "[regexp] query malformed, no field"); + } + String fieldName = parser.currentName(); + String rewriteMethod = null; + + String value = null; + float boost = 1.0f; + int flagsValue = -1; + token = parser.nextToken(); + if (token == XContentParser.Token.START_OBJECT) { + String currentFieldName = null; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token.isValue()) { + if ("value".equals(currentFieldName)) { + value = parser.text(); + } else if ("boost".equals(currentFieldName)) { + boost = parser.floatValue(); + } else if ("rewrite".equals(currentFieldName)) { + rewriteMethod = parser.textOrNull(); + } else if ("flags".equals(currentFieldName)) { + String flags = parser.textOrNull(); + flagsValue = RegexpFlag.resolveValue(flags); + } else if ("flags_value".equals(currentFieldName)) { + flagsValue = parser.intValue(); + if (flagsValue < 0) { + flagsValue = RegExp.ALL; + } + } + } else { + throw new QueryParsingException(parseContext.index(), "[regexp] query does not support [" + currentFieldName + "]"); + } + } + parser.nextToken(); + } else { + value = parser.text(); + parser.nextToken(); + } + + if (value == null) { + throw new QueryParsingException(parseContext.index(), "No value specified for regexp query"); + } + + MultiTermQuery.RewriteMethod method = QueryParsers.parseRewriteMethod(rewriteMethod, null); + + Query query = null; + MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName); + if (smartNameFieldMappers != null && smartNameFieldMappers.hasMapper()) { + if (smartNameFieldMappers.explicitTypeInNameWithDocMapper()) { + String[] previousTypes = QueryParseContext.setTypesWithPrevious(new String[]{smartNameFieldMappers.docMapper().type()}); + try { + query = smartNameFieldMappers.mapper().regexpQuery(value, flagsValue, method, parseContext); + } finally { + QueryParseContext.setTypes(previousTypes); + } + } else { + query = smartNameFieldMappers.mapper().regexpQuery(value, flagsValue, method, parseContext); + } + } + if (query == null) { + RegexpQuery regexpQuery = new RegexpQuery(new Term(fieldName, value), flagsValue); + if (method != null) { + regexpQuery.setRewriteMethod(method); + } + query = regexpQuery; + } + query.setBoost(boost); + return wrapSmartNameQuery(query, smartNameFieldMappers, parseContext); + } + + +} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/indices/query/IndicesQueriesModule.java b/src/main/java/org/elasticsearch/indices/query/IndicesQueriesModule.java index cd2818fa527..23d9dc09818 100644 --- a/src/main/java/org/elasticsearch/indices/query/IndicesQueriesModule.java +++ b/src/main/java/org/elasticsearch/indices/query/IndicesQueriesModule.java @@ -80,6 +80,7 @@ public class IndicesQueriesModule extends AbstractModule { qpBinders.addBinding().to(TermQueryParser.class).asEagerSingleton(); qpBinders.addBinding().to(TermsQueryParser.class).asEagerSingleton(); qpBinders.addBinding().to(FuzzyQueryParser.class).asEagerSingleton(); + qpBinders.addBinding().to(RegexpQueryParser.class).asEagerSingleton(); qpBinders.addBinding().to(FieldQueryParser.class).asEagerSingleton(); qpBinders.addBinding().to(RangeQueryParser.class).asEagerSingleton(); qpBinders.addBinding().to(PrefixQueryParser.class).asEagerSingleton(); @@ -122,6 +123,7 @@ public class IndicesQueriesModule extends AbstractModule { fpBinders.addBinding().to(RangeFilterParser.class).asEagerSingleton(); fpBinders.addBinding().to(NumericRangeFilterParser.class).asEagerSingleton(); fpBinders.addBinding().to(PrefixFilterParser.class).asEagerSingleton(); + fpBinders.addBinding().to(RegexpFilterParser.class).asEagerSingleton(); fpBinders.addBinding().to(ScriptFilterParser.class).asEagerSingleton(); fpBinders.addBinding().to(GeoDistanceFilterParser.class).asEagerSingleton(); fpBinders.addBinding().to(GeoDistanceRangeFilterParser.class).asEagerSingleton(); diff --git a/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java b/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java index 4eb413e8c30..c22370aad90 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java @@ -42,6 +42,7 @@ import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexNameModule; import org.elasticsearch.index.analysis.AnalysisModule; import org.elasticsearch.index.cache.IndexCacheModule; +import org.elasticsearch.index.cache.filter.support.CacheKeyFilter; import org.elasticsearch.index.codec.CodecModule; import org.elasticsearch.index.engine.IndexEngineModule; import org.elasticsearch.index.mapper.MapperService; @@ -71,6 +72,7 @@ import static org.elasticsearch.common.io.Streams.copyToBytesFromClasspath; import static org.elasticsearch.common.io.Streams.copyToStringFromClasspath; import static org.elasticsearch.index.query.FilterBuilders.*; import static org.elasticsearch.index.query.QueryBuilders.*; +import static org.elasticsearch.index.query.RegexpFlag.*; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.*; @@ -640,6 +642,94 @@ public class SimpleIndexQueryParserTests { assertThat(prefixQuery.getRewriteMethod(), notNullValue()); } + @Test + public void testRegexpQueryBuilder() throws IOException { + IndexQueryParserService queryParser = queryParser(); + Query parsedQuery = queryParser.parse(regexpQuery("name.first", "s.*y")).query(); + assertThat(parsedQuery, instanceOf(RegexpQuery.class)); + RegexpQuery regexpQuery = (RegexpQuery) parsedQuery; + assertThat(regexpQuery.getField(), equalTo("name.first")); + } + + @Test + public void testRegexpQuery() throws IOException { + IndexQueryParserService queryParser = queryParser(); + String query = copyToStringFromClasspath("/org/elasticsearch/test/unit/index/query/regexp.json"); + Query parsedQuery = queryParser.parse(query).query(); + assertThat(parsedQuery, instanceOf(RegexpQuery.class)); + RegexpQuery regexpQuery = (RegexpQuery) parsedQuery; + assertThat(regexpQuery.getField(), equalTo("name.first")); + } + + @Test + public void testRegexpFilteredQuery() throws IOException { + IndexQueryParserService queryParser = queryParser(); + String query = copyToStringFromClasspath("/org/elasticsearch/test/unit/index/query/regexp-filter.json"); + Query parsedQuery = queryParser.parse(query).query(); + assertThat(parsedQuery, instanceOf(XFilteredQuery.class)); + Filter filter = ((XFilteredQuery) parsedQuery).getFilter(); + assertThat(filter, instanceOf(RegexpFilter.class)); + RegexpFilter regexpFilter = (RegexpFilter) filter; + assertThat(regexpFilter.field(), equalTo("name.first")); + assertThat(regexpFilter.regexp(), equalTo("s.*y")); + } + + @Test + public void testNamedRegexpFilteredQuery() throws IOException { + IndexQueryParserService queryParser = queryParser(); + String query = copyToStringFromClasspath("/org/elasticsearch/test/unit/index/query/regexp-filter-named.json"); + ParsedQuery parsedQuery = queryParser.parse(query); + assertThat(parsedQuery.namedFilters().containsKey("test"), equalTo(true)); + assertThat(parsedQuery.query(), instanceOf(XFilteredQuery.class)); + Filter filter = ((XFilteredQuery) parsedQuery.query()).getFilter(); + assertThat(filter, instanceOf(RegexpFilter.class)); + RegexpFilter regexpFilter = (RegexpFilter) filter; + assertThat(regexpFilter.field(), equalTo("name.first")); + assertThat(regexpFilter.regexp(), equalTo("s.*y")); + } + + @Test + public void testRegexpWithFlagsFilteredQuery() throws IOException { + IndexQueryParserService queryParser = queryParser(); + String query = copyToStringFromClasspath("/org/elasticsearch/test/unit/index/query/regexp-filter-flags.json"); + ParsedQuery parsedQuery = queryParser.parse(query); + assertThat(parsedQuery.query(), instanceOf(XFilteredQuery.class)); + Filter filter = ((XFilteredQuery) parsedQuery.query()).getFilter(); + assertThat(filter, instanceOf(RegexpFilter.class)); + RegexpFilter regexpFilter = (RegexpFilter) filter; + assertThat(regexpFilter.field(), equalTo("name.first")); + assertThat(regexpFilter.regexp(), equalTo("s.*y")); + assertThat(regexpFilter.flags(), equalTo(INTERSECTION.value() | COMPLEMENT.value() | EMPTY.value())); + } + + @Test + public void testNamedAndCachedRegexpWithFlagsFilteredQuery() throws IOException { + IndexQueryParserService queryParser = queryParser(); + String query = copyToStringFromClasspath("/org/elasticsearch/test/unit/index/query/regexp-filter-flags-named-cached.json"); + ParsedQuery parsedQuery = queryParser.parse(query); + assertThat(parsedQuery.query(), instanceOf(XFilteredQuery.class)); + Filter filter = ((XFilteredQuery) parsedQuery.query()).getFilter(); + assertThat(filter, instanceOf(CacheKeyFilter.Wrapper.class)); + CacheKeyFilter.Wrapper wrapper = (CacheKeyFilter.Wrapper) filter; + assertThat(wrapper.cacheKey().utf8ToString(), equalTo("key")); + assertThat(wrapper.wrappedFilter(), instanceOf(RegexpFilter.class)); + RegexpFilter regexpFilter = (RegexpFilter) wrapper.wrappedFilter(); + assertThat(regexpFilter.field(), equalTo("name.first")); + assertThat(regexpFilter.regexp(), equalTo("s.*y")); + assertThat(regexpFilter.flags(), equalTo(INTERSECTION.value() | COMPLEMENT.value() | EMPTY.value())); + } + + @Test + public void testRegexpBoostQuery() throws IOException { + IndexQueryParserService queryParser = queryParser(); + String query = copyToStringFromClasspath("/org/elasticsearch/test/unit/index/query/regexp-boost.json"); + Query parsedQuery = queryParser.parse(query).query(); + assertThat(parsedQuery, instanceOf(RegexpQuery.class)); + RegexpQuery regexpQuery = (RegexpQuery) parsedQuery; + assertThat(regexpQuery.getField(), equalTo("name.first")); + assertThat(regexpQuery.getBoost(), equalTo(1.2f)); + } + @Test public void testWildcardQueryBuilder() throws IOException { IndexQueryParserService queryParser = queryParser(); diff --git a/src/test/java/org/elasticsearch/test/unit/index/query/regexp-boost.json b/src/test/java/org/elasticsearch/test/unit/index/query/regexp-boost.json new file mode 100644 index 00000000000..ed8699b39c5 --- /dev/null +++ b/src/test/java/org/elasticsearch/test/unit/index/query/regexp-boost.json @@ -0,0 +1,8 @@ +{ + "regexp":{ + "name.first":{ + "value":"sh", + "boost":1.2 + } + } +} \ No newline at end of file diff --git a/src/test/java/org/elasticsearch/test/unit/index/query/regexp-filter-flags-named-cached.json b/src/test/java/org/elasticsearch/test/unit/index/query/regexp-filter-flags-named-cached.json new file mode 100644 index 00000000000..112f8fb3ab0 --- /dev/null +++ b/src/test/java/org/elasticsearch/test/unit/index/query/regexp-filter-flags-named-cached.json @@ -0,0 +1,20 @@ +{ + "filtered": { + "query": { + "term": { + "name.first": "shay" + } + }, + "filter": { + "regexp":{ + "name.first" : { + "value" : "s.*y", + "flags" : "INTERSECTION|COMPLEMENT|EMPTY" + }, + "_name":"test", + "_cache" : true, + "_cache_key" : "key" + } + } + } +} \ No newline at end of file diff --git a/src/test/java/org/elasticsearch/test/unit/index/query/regexp-filter-flags.json b/src/test/java/org/elasticsearch/test/unit/index/query/regexp-filter-flags.json new file mode 100644 index 00000000000..a5d7307e563 --- /dev/null +++ b/src/test/java/org/elasticsearch/test/unit/index/query/regexp-filter-flags.json @@ -0,0 +1,18 @@ +{ + "filtered": { + "query": { + "term": { + "name.first": "shay" + } + }, + "filter": { + "regexp":{ + "name.first" : { + "value" : "s.*y", + "flags" : "INTERSECTION|COMPLEMENT|EMPTY" + }, + "_name":"test" + } + } + } +} \ No newline at end of file diff --git a/src/test/java/org/elasticsearch/test/unit/index/query/regexp-filter-named.json b/src/test/java/org/elasticsearch/test/unit/index/query/regexp-filter-named.json new file mode 100644 index 00000000000..ac96b3ee01c --- /dev/null +++ b/src/test/java/org/elasticsearch/test/unit/index/query/regexp-filter-named.json @@ -0,0 +1,15 @@ +{ + "filtered": { + "query": { + "term": { + "name.first": "shay" + } + }, + "filter": { + "regexp":{ + "name.first" : "s.*y", + "_name" : "test" + } + } + } +} \ No newline at end of file diff --git a/src/test/java/org/elasticsearch/test/unit/index/query/regexp-filter.json b/src/test/java/org/elasticsearch/test/unit/index/query/regexp-filter.json new file mode 100644 index 00000000000..d7c7bfdb397 --- /dev/null +++ b/src/test/java/org/elasticsearch/test/unit/index/query/regexp-filter.json @@ -0,0 +1,14 @@ +{ + "filtered": { + "query": { + "term": { + "name.first": "shay" + } + }, + "filter": { + "regexp":{ + "name.first" : "s.*y" + } + } + } +} \ No newline at end of file diff --git a/src/test/java/org/elasticsearch/test/unit/index/query/regexp.json b/src/test/java/org/elasticsearch/test/unit/index/query/regexp.json new file mode 100644 index 00000000000..6c3d69469cb --- /dev/null +++ b/src/test/java/org/elasticsearch/test/unit/index/query/regexp.json @@ -0,0 +1,5 @@ +{ + "regexp":{ + "name.first": "s.*y" + } +} \ No newline at end of file