From 6aec68cd291eef8b8d9ed3d6ba829dc4b144078d Mon Sep 17 00:00:00 2001 From: Lee Hinman Date: Fri, 13 Mar 2015 13:51:41 -0600 Subject: [PATCH] Revert "[QUERY] Remove lowercase_expanded_terms and locale options" This reverts commit d1f7bd97cb989d8d98e009ef71a72c7cac5077dd. Ryan pointed out that this needs to work with the multi term query, so additional analysis and tests should be added. --- .../queries/query-string-query.asciidoc | 7 +++ .../queries/query-string-syntax.asciidoc | 3 +- .../simple-query-string-query.asciidoc | 7 +++ docs/reference/search/explain.asciidoc | 4 ++ docs/reference/search/uri-request.asciidoc | 3 ++ rest-api-spec/api/explain.json | 4 ++ rest-api-spec/api/search.json | 4 ++ .../classic/MapperQueryParser.java | 19 +++++++ .../classic/QueryParserSettings.java | 26 ++++++++++ .../index/query/QueryStringQueryBuilder.java | 25 +++++++++ .../index/query/QueryStringQueryParser.java | 9 ++++ .../index/query/SimpleQueryParser.java | 25 +++++++++ .../index/query/SimpleQueryStringBuilder.java | 20 ++++++++ .../index/query/SimpleQueryStringParser.java | 10 ++++ .../action/explain/RestExplainAction.java | 1 + .../rest/action/search/RestSearchAction.java | 1 + .../count/query/CountQueryTests.java | 25 ++++++++- .../count/simple/SimpleCountTests.java | 2 +- .../exists/SimpleExistsTests.java | 4 +- .../search/query/SearchQueryTests.java | 26 +++++++++- .../search/query/SimpleQueryStringTests.java | 51 ++++++++++++++++++- .../search/simple/SimpleSearchTests.java | 4 +- 22 files changed, 270 insertions(+), 10 deletions(-) diff --git a/docs/reference/query-dsl/queries/query-string-query.asciidoc b/docs/reference/query-dsl/queries/query-string-query.asciidoc index 0ea4852065b..0f0763f65c5 100644 --- a/docs/reference/query-dsl/queries/query-string-query.asciidoc +++ b/docs/reference/query-dsl/queries/query-string-query.asciidoc @@ -36,6 +36,10 @@ with default operator of `AND`, the same query is translated to |`allow_leading_wildcard` |When set, `*` or `?` are allowed as the first character. Defaults to `true`. +|`lowercase_expanded_terms` |Whether terms of wildcard, prefix, fuzzy, +and range queries are to be automatically lower-cased or not (since they +are not analyzed). Default it `true`. + |`enable_position_increments` |Set to `true` to enable position increments in result queries. Defaults to `true`. @@ -72,6 +76,9 @@ both>>. |`lenient` |If set to `true` will cause format based failures (like providing text to a numeric field) to be ignored. +|`locale` | Locale that should be used for string conversions. +Defaults to `ROOT`. + |`time_zone` | Time Zone to be applied to any range query related to dates. See also http://joda-time.sourceforge.net/api-release/org/joda/time/DateTimeZone.html[JODA timezone]. |======================================================================= diff --git a/docs/reference/query-dsl/queries/query-string-syntax.asciidoc b/docs/reference/query-dsl/queries/query-string-syntax.asciidoc index ca9d44532ae..2ad638e7d5e 100644 --- a/docs/reference/query-dsl/queries/query-string-syntax.asciidoc +++ b/docs/reference/query-dsl/queries/query-string-syntax.asciidoc @@ -65,7 +65,8 @@ they match. Leading wildcards can be disabled by setting `allow_leading_wildcard` to `false`. ====== -Wildcarded terms are not analyzed by default -- but no further analysis +Wildcarded terms are not analyzed by default -- they are lowercased +(`lowercase_expanded_terms` defaults to `true`) but no further analysis is done, mainly because it is impossible to accurately analyze a word that is missing some of its letters. However, by setting `analyze_wildcard` to `true`, an attempt will be made to analyze wildcarded words before searching diff --git a/docs/reference/query-dsl/queries/simple-query-string-query.asciidoc b/docs/reference/query-dsl/queries/simple-query-string-query.asciidoc index 3c2a446a159..3d6572fd8fd 100644 --- a/docs/reference/query-dsl/queries/simple-query-string-query.asciidoc +++ b/docs/reference/query-dsl/queries/simple-query-string-query.asciidoc @@ -40,6 +40,13 @@ creating composite queries. |`flags` |Flags specifying which features of the `simple_query_string` to enable. Defaults to `ALL`. +|`lowercase_expanded_terms` | Whether terms of prefix and fuzzy queries are to +be automatically lower-cased or not (since they are not analyzed). Defaults to +true. + +|`locale` | Locale that should be used for string conversions. +Defaults to `ROOT`. + |`lenient` | If set to `true` will cause format based failures (like providing text to a numeric field) to be ignored. diff --git a/docs/reference/search/explain.asciidoc b/docs/reference/search/explain.asciidoc index 89106e4cc01..125f3124bff 100644 --- a/docs/reference/search/explain.asciidoc +++ b/docs/reference/search/explain.asciidoc @@ -99,6 +99,10 @@ This will yield the same result as the previous request. Should wildcard and prefix queries be analyzed or not. Defaults to false. +`lowercase_expanded_terms`:: + Should terms be automatically lowercased + or not. Defaults to true. + `lenient`:: If set to true will cause format based failures (like providing text to a numeric field) to be ignored. Defaults to false. diff --git a/docs/reference/search/uri-request.asciidoc b/docs/reference/search/uri-request.asciidoc index 651ff9596ef..103b6614cb1 100644 --- a/docs/reference/search/uri-request.asciidoc +++ b/docs/reference/search/uri-request.asciidoc @@ -98,6 +98,9 @@ Defaults to no terminate_after. <> for more details on the different types of search that can be performed. +|`lowercase_expanded_terms` |Should terms be automatically lowercased or +not. Defaults to `true`. + |`analyze_wildcard` |Should wildcard and prefix queries be analyzed or not. Defaults to `false`. |======================================================================= diff --git a/rest-api-spec/api/explain.json b/rest-api-spec/api/explain.json index e7898869b9c..2703e16c94f 100644 --- a/rest-api-spec/api/explain.json +++ b/rest-api-spec/api/explain.json @@ -49,6 +49,10 @@ "type" : "boolean", "description" : "Specify whether format-based query failures (such as providing text to a numeric field) should be ignored" }, + "lowercase_expanded_terms": { + "type" : "boolean", + "description" : "Specify whether query terms should be lowercased" + }, "parent": { "type" : "string", "description" : "The ID of the parent document" diff --git a/rest-api-spec/api/search.json b/rest-api-spec/api/search.json index 9c0981dd338..141c41bea8a 100644 --- a/rest-api-spec/api/search.json +++ b/rest-api-spec/api/search.json @@ -68,6 +68,10 @@ "type" : "boolean", "description" : "Specify whether format-based query failures (such as providing text to a numeric field) should be ignored" }, + "lowercase_expanded_terms": { + "type" : "boolean", + "description" : "Specify whether query terms should be lowercased" + }, "preference": { "type" : "string", "description" : "Specify the node or shard the operation should be performed on (default: random)" diff --git a/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java b/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java index 8e04c3ea216..6ba2e7a5089 100644 --- a/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java +++ b/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java @@ -125,10 +125,12 @@ public class MapperQueryParser extends QueryParser { setAutoGeneratePhraseQueries(settings.autoGeneratePhraseQueries()); setMaxDeterminizedStates(settings.maxDeterminizedStates()); setAllowLeadingWildcard(settings.allowLeadingWildcard()); + setLowercaseExpandedTerms(settings.lowercaseExpandedTerms()); setPhraseSlop(settings.phraseSlop()); setDefaultOperator(settings.defaultOperator()); setFuzzyMinSim(settings.fuzzyMinSim()); setFuzzyPrefixLength(settings.fuzzyPrefixLength()); + setLocale(settings.locale()); if (settings.timeZone() != null) { setTimeZone(settings.timeZone().toTimeZone()); } @@ -369,6 +371,11 @@ public class MapperQueryParser extends QueryParser { currentMapper = fieldMappers.fieldMappers().mapper(); if (currentMapper != null) { + if (lowercaseExpandedTerms && !currentMapper.isNumeric()) { + part1 = part1 == null ? null : part1.toLowerCase(locale); + part2 = part2 == null ? null : part2.toLowerCase(locale); + } + try { return currentMapper.rangeQuery(part1, part2, startInclusive, endInclusive, parseContext); } catch (RuntimeException e) { @@ -383,6 +390,9 @@ public class MapperQueryParser extends QueryParser { } protected Query getFuzzyQuery(String field, String termStr, String minSimilarity) throws ParseException { + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(locale); + } Collection fields = extractMultiFields(field); if (fields != null) { if (fields.size() == 1) { @@ -449,6 +459,9 @@ public class MapperQueryParser extends QueryParser { @Override protected Query getPrefixQuery(String field, String termStr) throws ParseException { + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(locale); + } Collection fields = extractMultiFields(field); if (fields != null) { if (fields.size() == 1) { @@ -589,6 +602,9 @@ public class MapperQueryParser extends QueryParser { return fieldQueryExtensions.get(ExistsFieldQueryExtension.NAME).query(parseContext, actualField); } } + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(locale); + } Collection fields = extractMultiFields(field); if (fields != null) { if (fields.size() == 1) { @@ -722,6 +738,9 @@ public class MapperQueryParser extends QueryParser { @Override protected Query getRegexpQuery(String field, String termStr) throws ParseException { + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(locale); + } Collection fields = extractMultiFields(field); if (fields != null) { if (fields.size() == 1) { diff --git a/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java b/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java index eae582af30b..4c9225d1fa7 100644 --- a/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java +++ b/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java @@ -20,6 +20,7 @@ package org.apache.lucene.queryparser.classic; import com.carrotsearch.hppc.ObjectFloatOpenHashMap; + import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MultiTermQuery; @@ -28,6 +29,7 @@ import org.joda.time.DateTimeZone; import java.util.Collection; import java.util.List; +import java.util.Locale; /** * @@ -44,6 +46,7 @@ public class QueryParserSettings { private MapperQueryParser.Operator defaultOperator = QueryParser.Operator.OR; private boolean autoGeneratePhraseQueries = false; private boolean allowLeadingWildcard = DEFAULT_ALLOW_LEADING_WILDCARD; + private boolean lowercaseExpandedTerms = true; private boolean enablePositionIncrements = true; private int phraseSlop = 0; private float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; @@ -61,6 +64,7 @@ public class QueryParserSettings { private MultiTermQuery.RewriteMethod rewriteMethod = MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE; private String minimumShouldMatch; private boolean lenient; + private Locale locale; private DateTimeZone timeZone; List fields = null; @@ -130,6 +134,14 @@ public class QueryParserSettings { this.allowLeadingWildcard = allowLeadingWildcard; } + public boolean lowercaseExpandedTerms() { + return lowercaseExpandedTerms; + } + + public void lowercaseExpandedTerms(boolean lowercaseExpandedTerms) { + this.lowercaseExpandedTerms = lowercaseExpandedTerms; + } + public boolean enablePositionIncrements() { return enablePositionIncrements; } @@ -298,6 +310,14 @@ public class QueryParserSettings { this.useDisMax = useDisMax; } + public void locale(Locale locale) { + this.locale = locale; + } + + public Locale locale() { + return this.locale; + } + public void timeZone(DateTimeZone timeZone) { this.timeZone = timeZone; } @@ -325,6 +345,7 @@ public class QueryParserSettings { if (fuzzyMaxExpansions != that.fuzzyMaxExpansions) return false; if (fuzzyRewriteMethod != null ? !fuzzyRewriteMethod.equals(that.fuzzyRewriteMethod) : that.fuzzyRewriteMethod != null) return false; + if (lowercaseExpandedTerms != that.lowercaseExpandedTerms) return false; if (phraseSlop != that.phraseSlop) return false; if (defaultAnalyzer != null ? !defaultAnalyzer.equals(that.defaultAnalyzer) : that.defaultAnalyzer != null) return false; @@ -346,6 +367,9 @@ public class QueryParserSettings { if (lenient != that.lenient) { return false; } + if (locale != null ? !locale.equals(that.locale) : that.locale != null) { + return false; + } if (timeZone != null ? !timeZone.equals(that.timeZone) : that.timeZone != null) { return false; } @@ -368,6 +392,7 @@ public class QueryParserSettings { result = 31 * result + (autoGeneratePhraseQueries ? 1 : 0); result = 31 * result + maxDeterminizedStates; result = 31 * result + (allowLeadingWildcard ? 1 : 0); + result = 31 * result + (lowercaseExpandedTerms ? 1 : 0); result = 31 * result + (enablePositionIncrements ? 1 : 0); result = 31 * result + phraseSlop; result = 31 * result + (fuzzyMinSim != +0.0f ? Float.floatToIntBits(fuzzyMinSim) : 0); @@ -384,6 +409,7 @@ public class QueryParserSettings { result = 31 * result + (boosts != null ? boosts.hashCode() : 0); result = 31 * result + (tieBreaker != +0.0f ? Float.floatToIntBits(tieBreaker) : 0); result = 31 * result + (useDisMax ? 1 : 0); + result = 31 * result + (locale != null ? locale.hashCode() : 0); result = 31 * result + (timeZone != null ? timeZone.hashCode() : 0); return result; } diff --git a/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java index 79683f03bf5..d939a4654a2 100644 --- a/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java @@ -60,10 +60,15 @@ public class QueryStringQueryBuilder extends BaseQueryBuilder implements Boostab private Boolean allowLeadingWildcard; + private Boolean lowercaseExpandedTerms; + private Boolean enablePositionIncrements; private Boolean analyzeWildcard; + private Locale locale; + + private float boost = -1; private Fuzziness fuzziness; @@ -215,6 +220,15 @@ public class QueryStringQueryBuilder extends BaseQueryBuilder implements Boostab return this; } + /** + * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically + * lower-cased or not. Default is true. + */ + public QueryStringQueryBuilder lowercaseExpandedTerms(boolean lowercaseExpandedTerms) { + this.lowercaseExpandedTerms = lowercaseExpandedTerms; + return this; + } + /** * Set to true to enable position increments in result query. Defaults to * true. @@ -315,6 +329,11 @@ public class QueryStringQueryBuilder extends BaseQueryBuilder implements Boostab return this; } + public QueryStringQueryBuilder locale(Locale locale) { + this.locale = locale; + return this; + } + /** * In case of date field, we can adjust the from/to fields using a timezone */ @@ -364,6 +383,9 @@ public class QueryStringQueryBuilder extends BaseQueryBuilder implements Boostab if (allowLeadingWildcard != null) { builder.field("allow_leading_wildcard", allowLeadingWildcard); } + if (lowercaseExpandedTerms != null) { + builder.field("lowercase_expanded_terms", lowercaseExpandedTerms); + } if (enablePositionIncrements != null) { builder.field("enable_position_increments", enablePositionIncrements); } @@ -403,6 +425,9 @@ public class QueryStringQueryBuilder extends BaseQueryBuilder implements Boostab if (queryName != null) { builder.field("_name", queryName); } + if (locale != null) { + builder.field("locale", locale.toString()); + } if (timeZone != null) { builder.field("time_zone", timeZone); } diff --git a/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java b/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java index 5105b7a5fea..ded43bc50e8 100644 --- a/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.query; import com.carrotsearch.hppc.ObjectFloatOpenHashMap; import com.google.common.collect.Lists; + import org.apache.lucene.queryparser.classic.MapperQueryParser; import org.apache.lucene.queryparser.classic.QueryParserSettings; import org.apache.lucene.search.BooleanQuery; @@ -32,12 +33,14 @@ import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.common.util.LocaleUtils; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.query.support.QueryParsers; import org.joda.time.DateTimeZone; import java.io.IOException; +import java.util.Locale; import static org.elasticsearch.common.lucene.search.Queries.fixNegativeQueryIfNeeded; @@ -73,6 +76,7 @@ public class QueryStringQueryParser implements QueryParser { qpSettings.lenient(parseContext.queryStringLenient()); qpSettings.analyzeWildcard(defaultAnalyzeWildcard); qpSettings.allowLeadingWildcard(defaultAllowLeadingWildcard); + qpSettings.locale(Locale.ROOT); String currentFieldName = null; XContentParser.Token token; @@ -156,6 +160,8 @@ public class QueryStringQueryParser implements QueryParser { qpSettings.autoGeneratePhraseQueries(parser.booleanValue()); } else if ("max_determinized_states".equals(currentFieldName) || "maxDeterminizedStates".equals(currentFieldName)) { qpSettings.maxDeterminizedStates(parser.intValue()); + } else if ("lowercase_expanded_terms".equals(currentFieldName) || "lowercaseExpandedTerms".equals(currentFieldName)) { + qpSettings.lowercaseExpandedTerms(parser.booleanValue()); } else if ("enable_position_increments".equals(currentFieldName) || "enablePositionIncrements".equals(currentFieldName)) { qpSettings.enablePositionIncrements(parser.booleanValue()); } else if ("escape".equals(currentFieldName)) { @@ -186,6 +192,9 @@ public class QueryStringQueryParser implements QueryParser { qpSettings.quoteFieldSuffix(parser.textOrNull()); } else if ("lenient".equalsIgnoreCase(currentFieldName)) { qpSettings.lenient(parser.booleanValue()); + } else if ("locale".equals(currentFieldName)) { + String localeStr = parser.text(); + qpSettings.locale(LocaleUtils.parse(localeStr)); } else if ("time_zone".equals(currentFieldName)) { try { qpSettings.timeZone(DateTimeZone.forID(parser.text())); diff --git a/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java b/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java index a3ff21615fe..fc916f55611 100644 --- a/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java @@ -27,6 +27,7 @@ import org.apache.lucene.search.*; import org.apache.lucene.util.BytesRef; import java.io.IOException; +import java.util.Locale; import java.util.Map; /** @@ -76,6 +77,9 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp */ @Override public Query newFuzzyQuery(String text, int fuzziness) { + if (settings.lowercaseExpandedTerms()) { + text = text.toLowerCase(settings.locale()); + } BooleanQuery bq = new BooleanQuery(true); for (Map.Entry entry : weights.entrySet()) { try { @@ -114,6 +118,9 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp */ @Override public Query newPrefixQuery(String text) { + if (settings.lowercaseExpandedTerms()) { + text = text.toLowerCase(settings.locale()); + } BooleanQuery bq = new BooleanQuery(true); for (Map.Entry entry : weights.entrySet()) { try { @@ -201,6 +208,8 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp * their default values */ public static class Settings { + private Locale locale = Locale.ROOT; + private boolean lowercaseExpandedTerms = true; private boolean lenient = false; private boolean analyzeWildcard = false; @@ -208,6 +217,22 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp } + public void locale(Locale locale) { + this.locale = locale; + } + + public Locale locale() { + return this.locale; + } + + public void lowercaseExpandedTerms(boolean lowercaseExpandedTerms) { + this.lowercaseExpandedTerms = lowercaseExpandedTerms; + } + + public boolean lowercaseExpandedTerms() { + return this.lowercaseExpandedTerms; + } + public void lenient(boolean lenient) { this.lenient = lenient; } diff --git a/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java b/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java index 218aa67012d..f0cc2bbeb6d 100644 --- a/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java @@ -38,8 +38,10 @@ public class SimpleQueryStringBuilder extends BaseQueryBuilder { private String queryName; private String minimumShouldMatch; private int flags = -1; + private Boolean lowercaseExpandedTerms; private Boolean lenient; private Boolean analyzeWildcard; + private Locale locale; /** * Operators for the default_operator @@ -113,6 +115,16 @@ public class SimpleQueryStringBuilder extends BaseQueryBuilder { return this; } + public SimpleQueryStringBuilder lowercaseExpandedTerms(boolean lowercaseExpandedTerms) { + this.lowercaseExpandedTerms = lowercaseExpandedTerms; + return this; + } + + public SimpleQueryStringBuilder locale(Locale locale) { + this.locale = locale; + return this; + } + public SimpleQueryStringBuilder lenient(boolean lenient) { this.lenient = lenient; return this; @@ -160,6 +172,10 @@ public class SimpleQueryStringBuilder extends BaseQueryBuilder { builder.field("default_operator", operator.name().toLowerCase(Locale.ROOT)); } + if (lowercaseExpandedTerms != null) { + builder.field("lowercase_expanded_terms", lowercaseExpandedTerms); + } + if (lenient != null) { builder.field("lenient", lenient); } @@ -168,6 +184,10 @@ public class SimpleQueryStringBuilder extends BaseQueryBuilder { builder.field("analyze_wildcard", analyzeWildcard); } + if (locale != null) { + builder.field("locale", locale.toString()); + } + if (queryName != null) { builder.field("_name", queryName); } diff --git a/src/main/java/org/elasticsearch/index/query/SimpleQueryStringParser.java b/src/main/java/org/elasticsearch/index/query/SimpleQueryStringParser.java index c8e545c5929..43e64ce0280 100644 --- a/src/main/java/org/elasticsearch/index/query/SimpleQueryStringParser.java +++ b/src/main/java/org/elasticsearch/index/query/SimpleQueryStringParser.java @@ -28,12 +28,14 @@ import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.LocaleUtils; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.mapper.MapperService; import java.io.IOException; import java.util.Collections; import java.util.HashMap; +import java.util.Locale; import java.util.Map; /** @@ -52,6 +54,8 @@ import java.util.Map; *
  • '{@code ~}N' at the end of phrases specifies near/slop query: "term1 term2"~5 * *

    + * See: {@link XSimpleQueryParser} for more information. + *

    * This query supports these options: *

    * Required: @@ -169,6 +173,12 @@ public class SimpleQueryStringParser implements QueryParser { flags = SimpleQueryStringFlag.ALL.value(); } } + } else if ("locale".equals(currentFieldName)) { + String localeStr = parser.text(); + Locale locale = LocaleUtils.parse(localeStr); + sqsSettings.locale(locale); + } else if ("lowercase_expanded_terms".equals(currentFieldName)) { + sqsSettings.lowercaseExpandedTerms(parser.booleanValue()); } else if ("lenient".equals(currentFieldName)) { sqsSettings.lenient(parser.booleanValue()); } else if ("analyze_wildcard".equals(currentFieldName)) { diff --git a/src/main/java/org/elasticsearch/rest/action/explain/RestExplainAction.java b/src/main/java/org/elasticsearch/rest/action/explain/RestExplainAction.java index c5b4aecc82e..0ce02a36f41 100644 --- a/src/main/java/org/elasticsearch/rest/action/explain/RestExplainAction.java +++ b/src/main/java/org/elasticsearch/rest/action/explain/RestExplainAction.java @@ -74,6 +74,7 @@ public class RestExplainAction extends BaseRestHandler { queryStringBuilder.defaultField(request.param("df")); queryStringBuilder.analyzer(request.param("analyzer")); queryStringBuilder.analyzeWildcard(request.paramAsBoolean("analyze_wildcard", false)); + queryStringBuilder.lowercaseExpandedTerms(request.paramAsBoolean("lowercase_expanded_terms", true)); queryStringBuilder.lenient(request.paramAsBoolean("lenient", null)); String defaultOperator = request.param("default_operator"); if (defaultOperator != null) { diff --git a/src/main/java/org/elasticsearch/rest/action/search/RestSearchAction.java b/src/main/java/org/elasticsearch/rest/action/search/RestSearchAction.java index f9188e42c79..1f262a4da19 100644 --- a/src/main/java/org/elasticsearch/rest/action/search/RestSearchAction.java +++ b/src/main/java/org/elasticsearch/rest/action/search/RestSearchAction.java @@ -129,6 +129,7 @@ public class RestSearchAction extends BaseRestHandler { queryBuilder.defaultField(request.param("df")); queryBuilder.analyzer(request.param("analyzer")); queryBuilder.analyzeWildcard(request.paramAsBoolean("analyze_wildcard", false)); + queryBuilder.lowercaseExpandedTerms(request.paramAsBoolean("lowercase_expanded_terms", true)); queryBuilder.lenient(request.paramAsBoolean("lenient", null)); String defaultOperator = request.param("default_operator"); if (defaultOperator != null) { diff --git a/src/test/java/org/elasticsearch/count/query/CountQueryTests.java b/src/test/java/org/elasticsearch/count/query/CountQueryTests.java index 5fc5b67f32c..ae1a600a31a 100644 --- a/src/test/java/org/elasticsearch/count/query/CountQueryTests.java +++ b/src/test/java/org/elasticsearch/count/query/CountQueryTests.java @@ -161,6 +161,27 @@ public class CountQueryTests extends ElasticsearchIntegrationTest { assertHitCount(countResponse, 1l); } + @Test + public void testLowercaseExpandedTerms() { + createIndex("test"); + + client().prepareIndex("test", "type1", "1").setSource("field1", "value_1", "field2", "value_2").get(); + refresh(); + + CountResponse countResponse = client().prepareCount().setQuery(queryStringQuery("VALUE_3~1").lowercaseExpandedTerms(true)).get(); + assertHitCount(countResponse, 1l); + countResponse = client().prepareCount().setQuery(queryStringQuery("VALUE_3~1").lowercaseExpandedTerms(false)).get(); + assertHitCount(countResponse, 0l); + countResponse = client().prepareCount().setQuery(queryStringQuery("ValUE_*").lowercaseExpandedTerms(true)).get(); + assertHitCount(countResponse, 1l); + countResponse = client().prepareCount().setQuery(queryStringQuery("vAl*E_1")).get(); + assertHitCount(countResponse, 1l); + countResponse = client().prepareCount().setQuery(queryStringQuery("[VALUE_1 TO VALUE_3]")).get(); + assertHitCount(countResponse, 1l); + countResponse = client().prepareCount().setQuery(queryStringQuery("[VALUE_1 TO VALUE_3]").lowercaseExpandedTerms(false)).get(); + assertHitCount(countResponse, 0l); + } + @Test //https://github.com/elasticsearch/elasticsearch/issues/3540 public void testDateRangeInQueryString() { //the mapping needs to be provided upfront otherwise we are not sure how many failures we get back @@ -181,10 +202,10 @@ public class CountQueryTests extends ElasticsearchIntegrationTest { CountResponse countResponse = client().prepareCount().setQuery(queryStringQuery("past:[now-2M/d TO now/d]")).get(); assertHitCount(countResponse, 1l); - countResponse = client().prepareCount().setQuery(queryStringQuery("future:[now/d TO now+2M/d]")).get(); + countResponse = client().prepareCount().setQuery(queryStringQuery("future:[now/d TO now+2M/d]").lowercaseExpandedTerms(false)).get(); assertHitCount(countResponse, 1l); - countResponse = client().prepareCount("test").setQuery(queryStringQuery("future:[now/D TO now+2M/d]")).get(); + countResponse = client().prepareCount("test").setQuery(queryStringQuery("future:[now/D TO now+2M/d]").lowercaseExpandedTerms(false)).get(); //D is an unsupported unit in date math assertThat(countResponse.getSuccessfulShards(), equalTo(0)); assertThat(countResponse.getFailedShards(), equalTo(test.numPrimaries)); diff --git a/src/test/java/org/elasticsearch/count/simple/SimpleCountTests.java b/src/test/java/org/elasticsearch/count/simple/SimpleCountTests.java index 946c1a3137d..dd4ed24af5a 100644 --- a/src/test/java/org/elasticsearch/count/simple/SimpleCountTests.java +++ b/src/test/java/org/elasticsearch/count/simple/SimpleCountTests.java @@ -100,7 +100,7 @@ public class SimpleCountTests extends ElasticsearchIntegrationTest { countResponse = client().prepareCount().setQuery(QueryBuilders.prefixQuery("_id", "XXX")).execute().actionGet(); assertHitCount(countResponse, 1l); - countResponse = client().prepareCount().setQuery(QueryBuilders.queryStringQuery("_id:XXX*")).execute().actionGet(); + countResponse = client().prepareCount().setQuery(QueryBuilders.queryStringQuery("_id:XXX*").lowercaseExpandedTerms(false)).execute().actionGet(); assertHitCount(countResponse, 1l); } diff --git a/src/test/java/org/elasticsearch/exists/SimpleExistsTests.java b/src/test/java/org/elasticsearch/exists/SimpleExistsTests.java index e960262c3e9..78e50de0f50 100644 --- a/src/test/java/org/elasticsearch/exists/SimpleExistsTests.java +++ b/src/test/java/org/elasticsearch/exists/SimpleExistsTests.java @@ -95,7 +95,7 @@ public class SimpleExistsTests extends ElasticsearchIntegrationTest { existsResponse = client().prepareExists().setQuery(QueryBuilders.prefixQuery("_id", "XXX")).execute().actionGet(); assertExists(existsResponse, true); - existsResponse = client().prepareExists().setQuery(QueryBuilders.queryStringQuery("_id:XXX*")).execute().actionGet(); + existsResponse = client().prepareExists().setQuery(QueryBuilders.queryStringQuery("_id:XXX*").lowercaseExpandedTerms(false)).execute().actionGet(); assertExists(existsResponse, true); } @@ -110,7 +110,7 @@ public class SimpleExistsTests extends ElasticsearchIntegrationTest { ExistsResponse existsResponse = client().prepareExists("test").setQuery(QueryBuilders.rangeQuery("field").gte(6).lte(8)).execute().actionGet(); assertExists(existsResponse, false); - existsResponse = client().prepareExists("test").setQuery(QueryBuilders.queryStringQuery("_id:XXY*")).execute().actionGet(); + existsResponse = client().prepareExists("test").setQuery(QueryBuilders.queryStringQuery("_id:XXY*").lowercaseExpandedTerms(false)).execute().actionGet(); assertExists(existsResponse, false); } diff --git a/src/test/java/org/elasticsearch/search/query/SearchQueryTests.java b/src/test/java/org/elasticsearch/search/query/SearchQueryTests.java index 1a88da9ecdb..e0a17c02419 100644 --- a/src/test/java/org/elasticsearch/search/query/SearchQueryTests.java +++ b/src/test/java/org/elasticsearch/search/query/SearchQueryTests.java @@ -52,6 +52,7 @@ import java.util.Random; import java.util.Set; import java.util.concurrent.ExecutionException; +import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; @@ -515,6 +516,27 @@ public class SearchQueryTests extends ElasticsearchIntegrationTest { assertHitCount(searchResponse, 1l); } + @Test + public void testLowercaseExpandedTerms() { + createIndex("test"); + + client().prepareIndex("test", "type1", "1").setSource("field1", "value_1", "field2", "value_2").get(); + refresh(); + + SearchResponse searchResponse = client().prepareSearch().setQuery(queryStringQuery("VALUE_3~1").lowercaseExpandedTerms(true)).get(); + assertHitCount(searchResponse, 1l); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("VALUE_3~1").lowercaseExpandedTerms(false)).get(); + assertHitCount(searchResponse, 0l); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("ValUE_*").lowercaseExpandedTerms(true)).get(); + assertHitCount(searchResponse, 1l); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("vAl*E_1")).get(); + assertHitCount(searchResponse, 1l); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("[VALUE_1 TO VALUE_3]")).get(); + assertHitCount(searchResponse, 1l); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("[VALUE_1 TO VALUE_3]").lowercaseExpandedTerms(false)).get(); + assertHitCount(searchResponse, 0l); + } + @Test //https://github.com/elasticsearch/elasticsearch/issues/3540 public void testDateRangeInQueryString() { //the mapping needs to be provided upfront otherwise we are not sure how many failures we get back @@ -532,11 +554,11 @@ public class SearchQueryTests extends ElasticsearchIntegrationTest { SearchResponse searchResponse = client().prepareSearch().setQuery(queryStringQuery("past:[now-2M/d TO now/d]")).get(); assertHitCount(searchResponse, 1l); - searchResponse = client().prepareSearch().setQuery(queryStringQuery("future:[now/d TO now+2M/d]")).get(); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("future:[now/d TO now+2M/d]").lowercaseExpandedTerms(false)).get(); assertHitCount(searchResponse, 1l); try { - client().prepareSearch().setQuery(queryStringQuery("future:[now/D TO now+2M/d]")).get(); + client().prepareSearch().setQuery(queryStringQuery("future:[now/D TO now+2M/d]").lowercaseExpandedTerms(false)).get(); fail("expected SearchPhaseExecutionException (total failure)"); } catch (SearchPhaseExecutionException e) { assertThat(e.status(), equalTo(RestStatus.BAD_REQUEST)); diff --git a/src/test/java/org/elasticsearch/search/query/SimpleQueryStringTests.java b/src/test/java/org/elasticsearch/search/query/SimpleQueryStringTests.java index accf68cf440..34813a2dc40 100644 --- a/src/test/java/org/elasticsearch/search/query/SimpleQueryStringTests.java +++ b/src/test/java/org/elasticsearch/search/query/SimpleQueryStringTests.java @@ -29,11 +29,15 @@ import org.elasticsearch.test.ElasticsearchIntegrationTest; import org.junit.Test; import java.io.IOException; +import java.util.Locale; import java.util.concurrent.ExecutionException; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; -import static org.elasticsearch.index.query.QueryBuilders.*; +import static org.elasticsearch.index.query.QueryBuilders.boolQuery; +import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery; +import static org.elasticsearch.index.query.QueryBuilders.simpleQueryStringQuery; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits; import static org.hamcrest.Matchers.equalTo; /** @@ -135,6 +139,51 @@ public class SimpleQueryStringTests extends ElasticsearchIntegrationTest { assertSearchHits(searchResponse, "6", "7", "8"); } + @Test + public void testSimpleQueryStringLowercasing() { + createIndex("test"); + client().prepareIndex("test", "type1", "1").setSource("body", "Professional").get(); + refresh(); + + SearchResponse searchResponse = client().prepareSearch().setQuery(simpleQueryStringQuery("Professio*")).get(); + assertHitCount(searchResponse, 1l); + assertSearchHits(searchResponse, "1"); + + searchResponse = client().prepareSearch().setQuery( + simpleQueryStringQuery("Professio*").lowercaseExpandedTerms(false)).get(); + assertHitCount(searchResponse, 0l); + + searchResponse = client().prepareSearch().setQuery( + simpleQueryStringQuery("Professionan~1")).get(); + assertHitCount(searchResponse, 1l); + assertSearchHits(searchResponse, "1"); + + searchResponse = client().prepareSearch().setQuery( + simpleQueryStringQuery("Professionan~1").lowercaseExpandedTerms(false)).get(); + assertHitCount(searchResponse, 0l); + } + + @Test + public void testQueryStringLocale() { + createIndex("test"); + client().prepareIndex("test", "type1", "1").setSource("body", "bılly").get(); + refresh(); + + SearchResponse searchResponse = client().prepareSearch().setQuery(simpleQueryStringQuery("BILL*")).get(); + assertHitCount(searchResponse, 0l); + searchResponse = client().prepareSearch().setQuery(queryStringQuery("body:BILL*")).get(); + assertHitCount(searchResponse, 0l); + + searchResponse = client().prepareSearch().setQuery( + simpleQueryStringQuery("BILL*").locale(new Locale("tr", "TR"))).get(); + assertHitCount(searchResponse, 1l); + assertSearchHits(searchResponse, "1"); + searchResponse = client().prepareSearch().setQuery( + queryStringQuery("body:BILL*").locale(new Locale("tr", "TR"))).get(); + assertHitCount(searchResponse, 1l); + assertSearchHits(searchResponse, "1"); + } + @Test public void testNestedFieldSimpleQueryString() throws IOException { assertAcked(prepareCreate("test") diff --git a/src/test/java/org/elasticsearch/search/simple/SimpleSearchTests.java b/src/test/java/org/elasticsearch/search/simple/SimpleSearchTests.java index 078dee48297..2e3276107fa 100644 --- a/src/test/java/org/elasticsearch/search/simple/SimpleSearchTests.java +++ b/src/test/java/org/elasticsearch/search/simple/SimpleSearchTests.java @@ -23,9 +23,11 @@ import org.elasticsearch.ElasticsearchIllegalArgumentException; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchPhaseExecutionException; import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.test.ElasticsearchIntegrationTest; +import org.elasticsearch.test.junit.annotations.TestLogging; import org.junit.Test; import java.util.ArrayList; @@ -119,7 +121,7 @@ public class SimpleSearchTests extends ElasticsearchIntegrationTest { searchResponse = client().prepareSearch().setQuery(QueryBuilders.prefixQuery("_id", "XXX")).execute().actionGet(); assertHitCount(searchResponse, 1l); - searchResponse = client().prepareSearch().setQuery(QueryBuilders.queryStringQuery("_id:XXX*")).execute().actionGet(); + searchResponse = client().prepareSearch().setQuery(QueryBuilders.queryStringQuery("_id:XXX*").lowercaseExpandedTerms(false)).execute().actionGet(); assertHitCount(searchResponse, 1l); }