From 4bce7271659889d839388d7df5c61a6d2a5c3c7a Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Mon, 21 Aug 2017 13:12:27 +0200 Subject: [PATCH] Refactor simple_query_string to handle text part like multi_match and query_string (#26145) This change is a continuation of #25726 that aligns field expansions for the simple_query_string with the query_string and multi_match query. The main changes are: * For exact field name, the new behavior is to rewrite to a matchnodocs query when the field name is not found in the mapping. * For partial field names (with * suffix), the expansion is done only on keyword, text, date, ip and number field types. Other field types are simply ignored. * For all fields (*), the expansion is done on accepted field types only (see above) and metadata fields are also filtered. The use_all_fields option is deprecated in this change and can be replaced by setting `*` in the fields parameter. This commit also changes how text fields are analyzed. Previously the default search analyzer (or the provided analyzer) was used to analyze every text part , ignoring the analyzer set on the field in the mapping. With this change, the field analyzer is used instead unless an analyzer has been forced in the parameter of the query. Finally now that all full text queries can handle the special "*" expansion (`all_fields` mode), the `index.query.default_field` is now set to `*` for indices created in 6. --- .../common/lucene/search/Queries.java | 9 - .../elasticsearch/index/IndexSettings.java | 19 +- .../index/query/MultiMatchQueryBuilder.java | 4 +- .../index/query/QueryStringQueryBuilder.java | 23 +- .../index/query/SimpleQueryStringBuilder.java | 130 +++++------ .../index/query/SimpleQueryStringFlag.java | 23 +- .../index/search/MatchQuery.java | 1 - .../index/search/MultiMatchQuery.java | 5 +- .../index/search/QueryParserHelper.java | 165 +++++++++++++ .../index/search/QueryStringQueryParser.java | 156 ++----------- .../SimpleQueryStringQueryParser.java} | 179 +++++++-------- .../index/query/SimpleQueryParserTests.java | 208 ----------------- .../query/SimpleQueryStringBuilderTests.java | 217 +++++++++++++----- .../search/query/SimpleQueryStringIT.java | 6 - docs/reference/query-dsl/match-query.asciidoc | 2 +- .../simple-query-string-query.asciidoc | 21 +- 16 files changed, 532 insertions(+), 636 deletions(-) create mode 100644 core/src/main/java/org/elasticsearch/index/search/QueryParserHelper.java rename core/src/main/java/org/elasticsearch/index/{query/SimpleQueryParser.java => search/SimpleQueryStringQueryParser.java} (71%) delete mode 100644 core/src/test/java/org/elasticsearch/index/query/SimpleQueryParserTests.java diff --git a/core/src/main/java/org/elasticsearch/common/lucene/search/Queries.java b/core/src/main/java/org/elasticsearch/common/lucene/search/Queries.java index 36b94718776..5129cd5485e 100644 --- a/core/src/main/java/org/elasticsearch/common/lucene/search/Queries.java +++ b/core/src/main/java/org/elasticsearch/common/lucene/search/Queries.java @@ -116,15 +116,6 @@ public class Queries { return q; } - public static boolean isConstantMatchAllQuery(Query query) { - if (query instanceof ConstantScoreQuery) { - return isConstantMatchAllQuery(((ConstantScoreQuery) query).getQuery()); - } else if (query instanceof MatchAllDocsQuery) { - return true; - } - return false; - } - public static Query applyMinimumShouldMatch(BooleanQuery query, @Nullable String minimumShouldMatch) { if (minimumShouldMatch == null) { return query; diff --git a/core/src/main/java/org/elasticsearch/index/IndexSettings.java b/core/src/main/java/org/elasticsearch/index/IndexSettings.java index fc2e476afc3..4e3574c70a8 100644 --- a/core/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/core/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -23,6 +23,7 @@ import org.apache.lucene.index.MergePolicy; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.logging.Loggers; +import org.elasticsearch.common.lucene.all.AllField; import org.elasticsearch.common.settings.IndexScopedSettings; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; @@ -48,9 +49,21 @@ import java.util.function.Function; * be called for each settings update. */ public final class IndexSettings { - - public static final Setting DEFAULT_FIELD_SETTING = - new Setting<>("index.query.default_field", AllFieldMapper.NAME, Function.identity(), Property.IndexScope); + public static final String DEFAULT_FIELD_SETTING_KEY = "index.query.default_field"; + public static final Setting DEFAULT_FIELD_SETTING; + static { + Function defValue = settings -> { + final String defaultField; + if (settings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, null) != null && + Version.indexCreated(settings).before(Version.V_6_0_0_alpha1)) { + defaultField = AllFieldMapper.NAME; + } else { + defaultField = "*"; + } + return defaultField; + }; + DEFAULT_FIELD_SETTING = new Setting<>(DEFAULT_FIELD_SETTING_KEY, defValue, Function.identity(), Property.IndexScope, Property.Dynamic); + } public static final Setting QUERY_STRING_LENIENT_SETTING = Setting.boolSetting("index.query_string.lenient", false, Property.IndexScope); public static final Setting QUERY_STRING_ANALYZE_WILDCARD = diff --git a/core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java index f81474f3416..ac2c9b559e8 100644 --- a/core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/MultiMatchQueryBuilder.java @@ -35,7 +35,7 @@ import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.support.QueryParsers; import org.elasticsearch.index.search.MatchQuery; import org.elasticsearch.index.search.MultiMatchQuery; -import org.elasticsearch.index.search.QueryStringQueryParser; +import org.elasticsearch.index.search.QueryParserHelper; import java.io.IOException; import java.util.HashMap; @@ -767,7 +767,7 @@ public class MultiMatchQueryBuilder extends AbstractQueryBuilder newFieldsBoosts = QueryStringQueryParser.resolveMappingFields(context, fieldsBoosts); + Map newFieldsBoosts = QueryParserHelper.resolveMappingFields(context, fieldsBoosts); return multiMatchQuery.parse(type, newFieldsBoosts, value, minimumShouldMatch); } diff --git a/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java index 9cb635d6d48..17c4b25fe95 100644 --- a/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java @@ -34,7 +34,9 @@ import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.mapper.AllFieldMapper; import org.elasticsearch.index.query.support.QueryParsers; +import org.elasticsearch.index.search.QueryParserHelper; import org.elasticsearch.index.search.QueryStringQueryParser; import org.joda.time.DateTimeZone; @@ -304,7 +306,7 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder 0) { - final Map resolvedFields = QueryStringQueryParser.resolveMappingFields(context, fieldsAndWeights); + final Map resolvedFields = QueryParserHelper.resolveMappingFields(context, fieldsAndWeights); queryParser = new QueryStringQueryParser(context, resolvedFields, isLenient); } else { - // Expand to all fields if: - // - The index default search field is "*" - // - The index default search field is "_all" and _all is disabled - // TODO the index default search field should be "*" for new indices. - if (Regex.isMatchAllPattern(context.defaultField()) || - (context.getMapperService().allEnabled() == false && "_all".equals(context.defaultField()))) { - // Automatically determine the fields from the index mapping. - // Automatically set leniency to "true" if unset so mismatched fields don't cause exceptions; + String defaultField = context.defaultField(); + if (context.getMapperService().allEnabled() == false && + AllFieldMapper.NAME.equals(defaultField)) { + // For indices created before 6.0 with _all disabled + defaultField = "*"; + } + if (Regex.isMatchAllPattern(defaultField)) { queryParser = new QueryStringQueryParser(context, lenient == null ? true : lenient); } else { - queryParser = new QueryStringQueryParser(context, context.defaultField(), isLenient); + queryParser = new QueryStringQueryParser(context, defaultField, isLenient); } } diff --git a/core/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java b/core/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java index 17e7418fbac..c4c1431570c 100644 --- a/core/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/SimpleQueryStringBuilder.java @@ -31,16 +31,17 @@ import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.index.query.SimpleQueryParser.Settings; -import org.elasticsearch.index.search.QueryStringQueryParser; +import org.elasticsearch.index.mapper.AllFieldMapper; +import org.elasticsearch.index.search.QueryParserHelper; +import org.elasticsearch.index.search.SimpleQueryStringQueryParser; +import org.elasticsearch.index.search.SimpleQueryStringQueryParser.Settings; import java.io.IOException; +import java.util.Collections; import java.util.HashMap; import java.util.Locale; import java.util.Map; import java.util.Objects; -import java.util.TreeMap; /** * SimpleQuery is a query parser that acts similar to a query_string query, but @@ -57,7 +58,7 @@ import java.util.TreeMap; *
  • '{@code ~}N' at the end of phrases specifies near/slop query: "term1 term2"~5 * *

    - * See: {@link SimpleQueryParser} for more information. + * See: {@link SimpleQueryStringQueryParser} for more information. *

    * This query supports these options: *

    @@ -104,7 +105,8 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuilder fieldsAndWeights = new TreeMap<>(); + private Map fieldsAndWeights = new HashMap<>(); /** If specified, analyzer to use to parse the query text, defaults to registered default in toQuery. */ private String analyzer; /** Default operator to use for linking boolean clauses. Defaults to OR according to docs. */ @@ -126,8 +126,6 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuilder resolvedFieldsAndWeights = new TreeMap<>(); - - if ((useAllFields != null && useAllFields) && (fieldsAndWeights.size() != 0)) { - throw addValidationError("cannot use [all_fields] parameter in conjunction with [fields]", null); - } - - // If explicitly required to use all fields, use all fields, OR: - // Automatically determine the fields (to replace the _all field) if all of the following are true: - // - The _all field is disabled, - // - and the default_field has not been changed in the settings - // - and no fields are specified in the request Settings newSettings = new Settings(settings); - if ((this.useAllFields != null && this.useAllFields) || - (context.getMapperService().allEnabled() == false && - "_all".equals(context.defaultField()) && - this.fieldsAndWeights.isEmpty())) { - resolvedFieldsAndWeights = QueryStringQueryParser.resolveMappingField(context, "*", 1.0f, - false, false); - // Need to use lenient mode when using "all-mode" so exceptions aren't thrown due to mismatched types - newSettings.lenient(lenientSet ? settings.lenient() : true); + final Map resolvedFieldsAndWeights; + if (fieldsAndWeights.isEmpty() == false) { + resolvedFieldsAndWeights = QueryParserHelper.resolveMappingFields(context, fieldsAndWeights); } else { - // Use the default field if no fields specified - if (fieldsAndWeights.isEmpty()) { - resolvedFieldsAndWeights.put(resolveIndexName(context.defaultField(), context), AbstractQueryBuilder.DEFAULT_BOOST); - } else { - for (Map.Entry fieldEntry : fieldsAndWeights.entrySet()) { - if (Regex.isSimpleMatchPattern(fieldEntry.getKey())) { - for (String fieldName : context.getMapperService().simpleMatchToIndexNames(fieldEntry.getKey())) { - resolvedFieldsAndWeights.put(fieldName, fieldEntry.getValue()); - } - } else { - resolvedFieldsAndWeights.put(resolveIndexName(fieldEntry.getKey(), context), fieldEntry.getValue()); - } - } + String defaultField = context.defaultField(); + if (context.getMapperService().allEnabled() == false && + AllFieldMapper.NAME.equals(defaultField)) { + // For indices created before 6.0 with _all disabled + defaultField = "*"; } + boolean isAllField = Regex.isMatchAllPattern(defaultField); + if (isAllField) { + newSettings.lenient(lenientSet ? settings.lenient() : true); + } + resolvedFieldsAndWeights = QueryParserHelper.resolveMappingField(context, defaultField, 1.0f, + false, !isAllField); } - // Use standard analyzer by default if none specified - Analyzer luceneAnalyzer; + final SimpleQueryStringQueryParser sqp; if (analyzer == null) { - luceneAnalyzer = context.getMapperService().searchAnalyzer(); + sqp = new SimpleQueryStringQueryParser(resolvedFieldsAndWeights, flags, newSettings, context); } else { - luceneAnalyzer = context.getIndexAnalyzers().get(analyzer); + Analyzer luceneAnalyzer = context.getIndexAnalyzers().get(analyzer); if (luceneAnalyzer == null) { throw new QueryShardException(context, "[" + SimpleQueryStringBuilder.NAME + "] analyzer [" + analyzer + "] not found"); } - + sqp = new SimpleQueryStringQueryParser(luceneAnalyzer, resolvedFieldsAndWeights, flags, newSettings, context); } - - SimpleQueryParser sqp = new SimpleQueryParser(luceneAnalyzer, resolvedFieldsAndWeights, flags, newSettings, context); sqp.setDefaultOperator(defaultOperator.toBooleanClauseOccur()); Query query = sqp.parse(queryText); return Queries.maybeApplyMinimumShouldMatch(query, minimumShouldMatch); } - private static String resolveIndexName(String fieldName, QueryShardContext context) { - MappedFieldType fieldType = context.fieldMapper(fieldName); - if (fieldType != null) { - return fieldType.name(); - } - return fieldName; - } - @Override protected void doXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(NAME); @@ -477,9 +463,6 @@ public class SimpleQueryStringBuilder extends AbstractQueryBuilder ALLOWED_QUERY_MAPPER_TYPES; + + static { + ALLOWED_QUERY_MAPPER_TYPES = new HashSet<>(); + ALLOWED_QUERY_MAPPER_TYPES.add(DateFieldMapper.CONTENT_TYPE); + ALLOWED_QUERY_MAPPER_TYPES.add(IpFieldMapper.CONTENT_TYPE); + ALLOWED_QUERY_MAPPER_TYPES.add(KeywordFieldMapper.CONTENT_TYPE); + for (NumberFieldMapper.NumberType nt : NumberFieldMapper.NumberType.values()) { + ALLOWED_QUERY_MAPPER_TYPES.add(nt.typeName()); + } + ALLOWED_QUERY_MAPPER_TYPES.add(ScaledFloatFieldMapper.CONTENT_TYPE); + ALLOWED_QUERY_MAPPER_TYPES.add(TextFieldMapper.CONTENT_TYPE); + } + + private QueryParserHelper() {} + + /** + * Get a {@link FieldMapper} associated with a field name or null. + * @param mapperService The mapper service where to find the mapping. + * @param field The field name to search. + */ + public static FieldMapper getFieldMapper(MapperService mapperService, String field) { + for (DocumentMapper mapper : mapperService.docMappers(true)) { + FieldMapper fieldMapper = mapper.mappers().smartNameFieldMapper(field); + if (fieldMapper != null) { + return fieldMapper; + } + } + return null; + } + + public static Map resolveMappingFields(QueryShardContext context, + Map fieldsAndWeights) { + return resolveMappingFields(context, fieldsAndWeights, null); + } + + /** + * Resolve all the field names and patterns present in the provided map with the + * {@link QueryShardContext} and returns a new map containing all the expanded fields with their original boost. + * @param context The context of the query. + * @param fieldsAndWeights The map of fields and weights to expand. + * @param fieldSuffix The suffix name to add to the expanded field names if a mapping exists for that name. + * The original name of the field is kept if adding the suffix to the field name does not point to a valid field + * in the mapping. + */ + public static Map resolveMappingFields(QueryShardContext context, + Map fieldsAndWeights, + String fieldSuffix) { + Map resolvedFields = new HashMap<>(); + for (Map.Entry fieldEntry : fieldsAndWeights.entrySet()) { + boolean allField = Regex.isMatchAllPattern(fieldEntry.getKey()); + boolean multiField = Regex.isSimpleMatchPattern(fieldEntry.getKey()); + float weight = fieldEntry.getValue() == null ? 1.0f : fieldEntry.getValue(); + Map fieldMap = resolveMappingField(context, fieldEntry.getKey(), weight, + !multiField, !allField, fieldSuffix); + resolvedFields.putAll(fieldMap); + } + return resolvedFields; + } + + /** + * Resolves the provided pattern or field name from the {@link QueryShardContext} and return a map of + * the expanded fields with their original boost. + * @param context The context of the query + * @param fieldOrPattern The field name or the pattern to resolve + * @param weight The weight for the field + * @param acceptAllTypes Whether all field type should be added when a pattern is expanded. + * If false, only {@link #ALLOWED_QUERY_MAPPER_TYPES} are accepted and other field types + * are discarded from the query. + * @param acceptMetadataField Whether metadata fields should be added when a pattern is expanded. + */ + public static Map resolveMappingField(QueryShardContext context, String fieldOrPattern, float weight, + boolean acceptAllTypes, boolean acceptMetadataField) { + return resolveMappingField(context, fieldOrPattern, weight, acceptAllTypes, acceptMetadataField, null); + } + + /** + * Resolves the provided pattern or field name from the {@link QueryShardContext} and return a map of + * the expanded fields with their original boost. + * @param context The context of the query + * @param fieldOrPattern The field name or the pattern to resolve + * @param weight The weight for the field + * @param acceptAllTypes Whether all field type should be added when a pattern is expanded. + * If false, only {@link #ALLOWED_QUERY_MAPPER_TYPES} are accepted and other field types + * are discarded from the query. + * @param acceptMetadataField Whether metadata fields should be added when a pattern is expanded. + * @param fieldSuffix The suffix name to add to the expanded field names if a mapping exists for that name. + * The original name of the field is kept if adding the suffix to the field name does not point to a valid field + * in the mapping. + */ + public static Map resolveMappingField(QueryShardContext context, String fieldOrPattern, float weight, + boolean acceptAllTypes, boolean acceptMetadataField, String fieldSuffix) { + Collection allFields = context.simpleMatchToIndexNames(fieldOrPattern); + Map fields = new HashMap<>(); + for (String fieldName : allFields) { + if (fieldSuffix != null && context.fieldMapper(fieldName + fieldSuffix) != null) { + fieldName = fieldName + fieldSuffix; + } + FieldMapper mapper = getFieldMapper(context.getMapperService(), fieldName); + if (mapper == null) { + // Unmapped fields are not ignored + fields.put(fieldOrPattern, weight); + continue; + } + if (acceptMetadataField == false && mapper instanceof MetadataFieldMapper) { + // Ignore metadata fields + continue; + } + // Ignore fields that are not in the allowed mapper types. Some + // types do not support term queries, and thus we cannot generate + // a special query for them. + String mappingType = mapper.fieldType().typeName(); + if (acceptAllTypes == false && ALLOWED_QUERY_MAPPER_TYPES.contains(mappingType) == false) { + continue; + } + fields.put(fieldName, weight); + } + return fields; + } +} diff --git a/core/src/main/java/org/elasticsearch/index/search/QueryStringQueryParser.java b/core/src/main/java/org/elasticsearch/index/search/QueryStringQueryParser.java index b6537f6deb5..60817a38af0 100644 --- a/core/src/main/java/org/elasticsearch/index/search/QueryStringQueryParser.java +++ b/core/src/main/java/org/elasticsearch/index/search/QueryStringQueryParser.java @@ -21,7 +21,6 @@ package org.elasticsearch.index.search; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.index.Term; @@ -49,18 +48,10 @@ import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.mapper.AllFieldMapper; import org.elasticsearch.index.mapper.DateFieldMapper; -import org.elasticsearch.index.mapper.DocumentMapper; -import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.FieldNamesFieldMapper; -import org.elasticsearch.index.mapper.IpFieldMapper; -import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.index.mapper.MetadataFieldMapper; -import org.elasticsearch.index.mapper.NumberFieldMapper; -import org.elasticsearch.index.mapper.ScaledFloatFieldMapper; import org.elasticsearch.index.mapper.StringFieldType; import org.elasticsearch.index.mapper.MapperService; -import org.elasticsearch.index.mapper.TextFieldMapper; import org.elasticsearch.index.query.ExistsQueryBuilder; import org.elasticsearch.index.query.MultiMatchQueryBuilder; import org.elasticsearch.index.query.QueryShardContext; @@ -69,17 +60,14 @@ import org.joda.time.DateTimeZone; import java.io.IOException; import java.util.ArrayList; -import java.util.Collection; import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Set; import static org.elasticsearch.common.lucene.search.Queries.fixNegativeQueryIfNeeded; import static org.elasticsearch.common.lucene.search.Queries.newLenientFieldQuery; import static org.elasticsearch.common.lucene.search.Queries.newUnmappedFieldQuery; +import static org.elasticsearch.index.search.QueryParserHelper.resolveMappingField; /** * A {@link XQueryParser} that uses the {@link MapperService} in order to build smarter @@ -88,22 +76,8 @@ import static org.elasticsearch.common.lucene.search.Queries.newUnmappedFieldQue * to assemble the result logically. */ public class QueryStringQueryParser extends XQueryParser { - // Mapping types the "all-ish" query can be executed against - private static final Set ALLOWED_QUERY_MAPPER_TYPES; private static final String EXISTS_FIELD = "_exists_"; - static { - ALLOWED_QUERY_MAPPER_TYPES = new HashSet<>(); - ALLOWED_QUERY_MAPPER_TYPES.add(DateFieldMapper.CONTENT_TYPE); - ALLOWED_QUERY_MAPPER_TYPES.add(IpFieldMapper.CONTENT_TYPE); - ALLOWED_QUERY_MAPPER_TYPES.add(KeywordFieldMapper.CONTENT_TYPE); - for (NumberFieldMapper.NumberType nt : NumberFieldMapper.NumberType.values()) { - ALLOWED_QUERY_MAPPER_TYPES.add(nt.typeName()); - } - ALLOWED_QUERY_MAPPER_TYPES.add(ScaledFloatFieldMapper.CONTENT_TYPE); - ALLOWED_QUERY_MAPPER_TYPES.add(TextFieldMapper.CONTENT_TYPE); - } - private final QueryShardContext context; private final Map fieldsAndWeights; private final boolean lenient; @@ -162,8 +136,9 @@ public class QueryStringQueryParser extends XQueryParser { * @param lenient If set to `true` will cause format based failures (like providing text to a numeric field) to be ignored. */ public QueryStringQueryParser(QueryShardContext context, boolean lenient) { - this(context, "*", resolveMappingField(context, "*", 1.0f, false, false), - lenient, context.getMapperService().searchAnalyzer()); + this(context, "*", + resolveMappingField(context, "*", 1.0f, false, false), + lenient, context.getMapperService().searchAnalyzer()); } private QueryStringQueryParser(QueryShardContext context, String defaultField, @@ -177,69 +152,6 @@ public class QueryStringQueryParser extends XQueryParser { this.lenient = lenient; } - - private static FieldMapper getFieldMapper(MapperService mapperService, String field) { - for (DocumentMapper mapper : mapperService.docMappers(true)) { - FieldMapper fieldMapper = mapper.mappers().smartNameFieldMapper(field); - if (fieldMapper != null) { - return fieldMapper; - } - } - return null; - } - - public static Map resolveMappingFields(QueryShardContext context, Map fieldsAndWeights) { - Map resolvedFields = new HashMap<>(); - for (Map.Entry fieldEntry : fieldsAndWeights.entrySet()) { - boolean allField = Regex.isMatchAllPattern(fieldEntry.getKey()); - boolean multiField = Regex.isSimpleMatchPattern(fieldEntry.getKey()); - float weight = fieldEntry.getValue() == null ? 1.0f : fieldEntry.getValue(); - Map fieldMap = resolveMappingField(context, fieldEntry.getKey(), weight, !multiField, !allField); - resolvedFields.putAll(fieldMap); - } - return resolvedFields; - } - - public static Map resolveMappingField(QueryShardContext context, String field, float weight, - boolean acceptMetadataField, boolean acceptAllTypes) { - return resolveMappingField(context, field, weight, acceptMetadataField, acceptAllTypes, false, null); - } - - /** - * Given a shard context, return a map of all fields in the mappings that - * can be queried. The map will be field name to a float of 1.0f. - */ - private static Map resolveMappingField(QueryShardContext context, String field, float weight, - boolean acceptAllTypes, boolean acceptMetadataField, - boolean quoted, String quoteFieldSuffix) { - Collection allFields = context.simpleMatchToIndexNames(field); - Map fields = new HashMap<>(); - for (String fieldName : allFields) { - if (quoted && quoteFieldSuffix != null && context.fieldMapper(fieldName + quoteFieldSuffix) != null) { - fieldName = fieldName + quoteFieldSuffix; - } - FieldMapper mapper = getFieldMapper(context.getMapperService(), fieldName); - if (mapper == null) { - // Unmapped fields are not ignored - fields.put(field, weight); - continue; - } - if (acceptMetadataField == false && mapper instanceof MetadataFieldMapper) { - // Ignore metadata fields - continue; - } - // Ignore fields that are not in the allowed mapper types. Some - // types do not support term queries, and thus we cannot generate - // a special query for them. - String mappingType = mapper.fieldType().typeName(); - if (acceptAllTypes == false && ALLOWED_QUERY_MAPPER_TYPES.contains(mappingType) == false) { - continue; - } - fields.put(fieldName, weight); - } - return fields; - } - @Override public void setDefaultOperator(Operator op) { super.setDefaultOperator(op); @@ -343,7 +255,7 @@ public class QueryStringQueryParser extends XQueryParser { boolean multiFields = Regex.isSimpleMatchPattern(field); // Filters unsupported fields if a pattern is requested // Filters metadata fields if all fields are requested - return resolveMappingField(context, field, 1.0f, !allFields, !multiFields, quoted, quoteFieldSuffix); + return resolveMappingField(context, field, 1.0f, !allFields, !multiFields, quoted ? quoteFieldSuffix : null); } else { return fieldsAndWeights; } @@ -577,22 +489,20 @@ public class QueryStringQueryParser extends XQueryParser { } private Query getPrefixQuerySingle(String field, String termStr) throws ParseException { - currentFieldType = null; Analyzer oldAnalyzer = getAnalyzer(); try { currentFieldType = context.fieldMapper(field); - if (currentFieldType != null) { - setAnalyzer(forceAnalyzer == null ? queryBuilder.context.getSearchAnalyzer(currentFieldType) : forceAnalyzer); - Query query = null; - if (currentFieldType instanceof StringFieldType == false) { - query = currentFieldType.prefixQuery(termStr, getMultiTermRewriteMethod(), context); - } - if (query == null) { - query = getPossiblyAnalyzedPrefixQuery(currentFieldType.name(), termStr); - } - return query; + if (currentFieldType == null) { + return newUnmappedFieldQuery(field); } - return getPossiblyAnalyzedPrefixQuery(field, termStr); + setAnalyzer(forceAnalyzer == null ? queryBuilder.context.getSearchAnalyzer(currentFieldType) : forceAnalyzer); + Query query = null; + if (currentFieldType instanceof StringFieldType == false) { + query = currentFieldType.prefixQuery(termStr, getMultiTermRewriteMethod(), context); + } else { + query = getPossiblyAnalyzedPrefixQuery(currentFieldType.name(), termStr); + } + return query; } catch (RuntimeException e) { if (lenient) { return newLenientFieldQuery(field, e); @@ -784,12 +694,12 @@ public class QueryStringQueryParser extends XQueryParser { Analyzer oldAnalyzer = getAnalyzer(); try { currentFieldType = queryBuilder.context.fieldMapper(field); - if (currentFieldType != null) { - setAnalyzer(forceAnalyzer == null ? queryBuilder.context.getSearchAnalyzer(currentFieldType) : forceAnalyzer); - Query query = super.getRegexpQuery(field, termStr); - return query; + if (currentFieldType == null) { + return newUnmappedFieldQuery(field); } - return super.getRegexpQuery(field, termStr); + setAnalyzer(forceAnalyzer == null ? queryBuilder.context.getSearchAnalyzer(currentFieldType) : forceAnalyzer); + Query query = super.getRegexpQuery(field, termStr); + return query; } catch (RuntimeException e) { if (lenient) { return newLenientFieldQuery(field, e); @@ -863,30 +773,4 @@ public class QueryStringQueryParser extends XQueryParser { } return super.parse(query); } - - /** - * Checks if graph analysis should be enabled for the field depending - * on the provided {@link Analyzer} - */ - protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field, - String queryText, boolean quoted, int phraseSlop) { - assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST; - - // Use the analyzer to get all the tokens, and then build an appropriate - // query based on the analysis chain. - try (TokenStream source = analyzer.tokenStream(field, queryText)) { - if (source.hasAttribute(DisableGraphAttribute.class)) { - /** - * A {@link TokenFilter} in this {@link TokenStream} disabled the graph analysis to avoid - * paths explosion. See {@link ShingleTokenFilterFactory} for details. - */ - setEnableGraphQueries(false); - } - Query query = super.createFieldQuery(source, operator, field, quoted, phraseSlop); - setEnableGraphQueries(true); - return query; - } catch (IOException e) { - throw new RuntimeException("Error analyzing query text", e); - } - } } diff --git a/core/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java b/core/src/main/java/org/elasticsearch/index/search/SimpleQueryStringQueryParser.java similarity index 71% rename from core/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java rename to core/src/main/java/org/elasticsearch/index/search/SimpleQueryStringQueryParser.java index 141b935eb13..fc7f1349e5c 100644 --- a/core/src/main/java/org/elasticsearch/index/query/SimpleQueryParser.java +++ b/core/src/main/java/org/elasticsearch/index/search/SimpleQueryStringQueryParser.java @@ -16,48 +16,74 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.index.query; +package org.elasticsearch.index.search; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.index.Term; +import org.apache.lucene.queryparser.simple.SimpleQueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.index.analysis.ShingleTokenFilterFactory; +import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.query.AbstractQueryBuilder; +import org.elasticsearch.index.query.MultiMatchQueryBuilder; +import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.index.query.SimpleQueryStringBuilder; import java.io.IOException; -import java.util.Iterator; import java.util.Map; import java.util.Objects; import java.util.List; import java.util.ArrayList; +import static org.elasticsearch.common.lucene.search.Queries.newUnmappedFieldQuery; + /** - * Wrapper class for Lucene's SimpleQueryParser that allows us to redefine + * Wrapper class for Lucene's SimpleQueryStringQueryParser that allows us to redefine * different types of queries. */ -public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.SimpleQueryParser { +public class SimpleQueryStringQueryParser extends SimpleQueryParser { private final Settings settings; private QueryShardContext context; + private final MultiMatchQuery queryBuilder; /** Creates a new parser with custom flags used to enable/disable certain features. */ - public SimpleQueryParser(Analyzer analyzer, Map weights, int flags, - Settings settings, QueryShardContext context) { + public SimpleQueryStringQueryParser(Map weights, int flags, + Settings settings, QueryShardContext context) { + this(null, weights, flags, settings, context); + } + + /** Creates a new parser with custom flags used to enable/disable certain features. */ + public SimpleQueryStringQueryParser(Analyzer analyzer, Map weights, int flags, + Settings settings, QueryShardContext context) { super(analyzer, weights, flags); this.settings = settings; this.context = context; + this.queryBuilder = new MultiMatchQuery(context); + this.queryBuilder.setAutoGenerateSynonymsPhraseQuery(settings.autoGenerateSynonymsPhraseQuery()); + this.queryBuilder.setLenient(settings.lenient()); + if (analyzer != null) { + this.queryBuilder.setAnalyzer(analyzer); + } + } + + private Analyzer getAnalyzer(MappedFieldType ft) { + if (getAnalyzer() != null) { + return analyzer; + } + return ft.searchAnalyzer(); } /** @@ -70,46 +96,44 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp throw e; } + @Override + public void setDefaultOperator(BooleanClause.Occur operator) { + super.setDefaultOperator(operator); + queryBuilder.setOccur(operator); + } + @Override protected Query newTermQuery(Term term) { - MappedFieldType currentFieldType = context.fieldMapper(term.field()); - if (currentFieldType == null || currentFieldType.tokenized()) { - return super.newTermQuery(term); + MappedFieldType ft = context.fieldMapper(term.field()); + if (ft == null) { + return newUnmappedFieldQuery(term.field()); } - return currentFieldType.termQuery(term.bytes(), context); + return ft.termQuery(term.bytes(), context); } @Override public Query newDefaultQuery(String text) { - List disjuncts = new ArrayList<>(); - for (Map.Entry entry : weights.entrySet()) { - try { - Query q = createBooleanQuery(entry.getKey(), text, super.getDefaultOperator()); - if (q != null) { - disjuncts.add(wrapWithBoost(q, entry.getValue())); - } - } catch (RuntimeException e) { - rethrowUnlessLenient(e); - } + try { + return queryBuilder.parse(MultiMatchQueryBuilder.Type.MOST_FIELDS, weights, text, null); + } catch (IOException e) { + return rethrowUnlessLenient(new IllegalArgumentException(e.getMessage())); } - if (disjuncts.size() == 1) { - return disjuncts.get(0); - } - return new DisjunctionMaxQuery(disjuncts, 1.0f); } - /** - * Dispatches to Lucene's SimpleQueryParser's newFuzzyQuery, optionally - * lowercasing the term first - */ @Override public Query newFuzzyQuery(String text, int fuzziness) { List disjuncts = new ArrayList<>(); for (Map.Entry entry : weights.entrySet()) { final String fieldName = entry.getKey(); + final MappedFieldType ft = context.fieldMapper(fieldName); + if (ft == null) { + disjuncts.add(newUnmappedFieldQuery(fieldName)); + continue; + } try { - final BytesRef term = getAnalyzer().normalize(fieldName, text); - Query query = new FuzzyQuery(new Term(fieldName, term), fuzziness); + final BytesRef term = getAnalyzer(ft).normalize(fieldName, text); + Query query = ft.fuzzyQuery(term, Fuzziness.fromEdits(fuzziness), FuzzyQuery.defaultPrefixLength, + FuzzyQuery.defaultMaxExpansions, FuzzyQuery.defaultTranspositions); disjuncts.add(wrapWithBoost(query, entry.getValue())); } catch (RuntimeException e) { rethrowUnlessLenient(e); @@ -123,50 +147,41 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp @Override public Query newPhraseQuery(String text, int slop) { - List disjuncts = new ArrayList<>(); - for (Map.Entry entry : weights.entrySet()) { - try { - String field = entry.getKey(); - if (settings.quoteFieldSuffix() != null) { - String quoteField = field + settings.quoteFieldSuffix(); - MappedFieldType quotedFieldType = context.fieldMapper(quoteField); - if (quotedFieldType != null) { - field = quoteField; - } - } - Float boost = entry.getValue(); - Query q = createPhraseQuery(field, text, slop); - if (q != null) { - disjuncts.add(wrapWithBoost(q, boost)); - } - } catch (RuntimeException e) { - rethrowUnlessLenient(e); + try { + queryBuilder.setPhraseSlop(slop); + Map phraseWeights; + if (settings.quoteFieldSuffix() != null) { + phraseWeights = QueryParserHelper.resolveMappingFields(context, weights, settings.quoteFieldSuffix()); + } else { + phraseWeights = weights; } + return queryBuilder.parse(MultiMatchQueryBuilder.Type.PHRASE, phraseWeights, text, null); + } catch (IOException e) { + return rethrowUnlessLenient(new IllegalArgumentException(e.getMessage())); + } finally { + queryBuilder.setPhraseSlop(0); } - if (disjuncts.size() == 1) { - return disjuncts.get(0); - } - return new DisjunctionMaxQuery(disjuncts, 1.0f); } - /** - * Dispatches to Lucene's SimpleQueryParser's newPrefixQuery, optionally - * lowercasing the term first or trying to analyze terms - */ @Override public Query newPrefixQuery(String text) { List disjuncts = new ArrayList<>(); for (Map.Entry entry : weights.entrySet()) { final String fieldName = entry.getKey(); + final MappedFieldType ft = context.fieldMapper(fieldName); + if (ft == null) { + disjuncts.add(newUnmappedFieldQuery(fieldName)); + continue; + } try { if (settings.analyzeWildcard()) { - Query analyzedQuery = newPossiblyAnalyzedQuery(fieldName, text); + Query analyzedQuery = newPossiblyAnalyzedQuery(fieldName, text, getAnalyzer(ft)); if (analyzedQuery != null) { disjuncts.add(wrapWithBoost(analyzedQuery, entry.getValue())); } } else { - Term term = new Term(fieldName, getAnalyzer().normalize(fieldName, text)); - Query query = new PrefixQuery(term); + BytesRef term = getAnalyzer(ft).normalize(fieldName, text); + Query query = ft.prefixQuery(term.utf8ToString(), null, context); disjuncts.add(wrapWithBoost(query, entry.getValue())); } } catch (RuntimeException e) { @@ -179,33 +194,10 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp return new DisjunctionMaxQuery(disjuncts, 1.0f); } - /** - * Checks if graph analysis should be enabled for the field depending - * on the provided {@link Analyzer} - */ - protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field, - String queryText, boolean quoted, int phraseSlop) { - assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST; - - // Use the analyzer to get all the tokens, and then build an appropriate - // query based on the analysis chain. - try (TokenStream source = analyzer.tokenStream(field, queryText)) { - if (source.hasAttribute(DisableGraphAttribute.class)) { - /** - * A {@link TokenFilter} in this {@link TokenStream} disabled the graph analysis to avoid - * paths explosion. See {@link ShingleTokenFilterFactory} for details. - */ - setEnableGraphQueries(false); - } - Query query = super.createFieldQuery(source, operator, field, quoted, phraseSlop); - setEnableGraphQueries(true); - return query; - } catch (IOException e) { - throw new RuntimeException("Error analyzing query text", e); - } - } - private static Query wrapWithBoost(Query query, float boost) { + if (query instanceof MatchNoDocsQuery) { + return query; + } if (boost != AbstractQueryBuilder.DEFAULT_BOOST) { return new BoostQuery(query, boost); } @@ -217,10 +209,9 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp * {@code PrefixQuery} or a {@code BooleanQuery} made up * of {@code TermQuery}s and {@code PrefixQuery}s */ - private Query newPossiblyAnalyzedQuery(String field, String termStr) { + private Query newPossiblyAnalyzedQuery(String field, String termStr, Analyzer analyzer) { List> tlist = new ArrayList<> (); - // get Analyzer from superclass and tokenize the term - try (TokenStream source = getAnalyzer().tokenStream(field, termStr)) { + try (TokenStream source = analyzer.tokenStream(field, termStr)) { source.reset(); List currentPos = new ArrayList<>(); CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); @@ -233,7 +224,7 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp tlist.add(currentPos); currentPos = new ArrayList<>(); } - final BytesRef term = getAnalyzer().normalize(field, termAtt.toString()); + final BytesRef term = analyzer.normalize(field, termAtt.toString()); currentPos.add(term); hasMoreTokens = source.incrementToken(); } @@ -293,7 +284,7 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp * Class encapsulating the settings for the SimpleQueryString query, with * their default values */ - static class Settings { + public static class Settings { /** Specifies whether lenient query parsing should be used. */ private boolean lenient = SimpleQueryStringBuilder.DEFAULT_LENIENT; /** Specifies whether wildcards should be analyzed. */ @@ -307,10 +298,10 @@ public class SimpleQueryParser extends org.apache.lucene.queryparser.simple.Simp * Generates default {@link Settings} object (uses ROOT locale, does * lowercase terms, no lenient parsing, no wildcard analysis). * */ - Settings() { + public Settings() { } - Settings(Settings other) { + public Settings(Settings other) { this.lenient = other.lenient; this.analyzeWildcard = other.analyzeWildcard; this.quoteFieldSuffix = other.quoteFieldSuffix; diff --git a/core/src/test/java/org/elasticsearch/index/query/SimpleQueryParserTests.java b/core/src/test/java/org/elasticsearch/index/query/SimpleQueryParserTests.java deleted file mode 100644 index 2516a3abc09..00000000000 --- a/core/src/test/java/org/elasticsearch/index/query/SimpleQueryParserTests.java +++ /dev/null @@ -1,208 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.query; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockSynonymAnalyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.PrefixQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.SynonymQuery; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.spans.SpanNearQuery; -import org.apache.lucene.search.spans.SpanOrQuery; -import org.apache.lucene.search.spans.SpanQuery; -import org.apache.lucene.search.spans.SpanTermQuery; -import org.elasticsearch.Version; -import org.elasticsearch.cluster.metadata.IndexMetaData; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.index.mapper.MockFieldMapper; -import org.elasticsearch.test.ESTestCase; - -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; - -import static org.hamcrest.Matchers.equalTo; - -public class SimpleQueryParserTests extends ESTestCase { - private static class MockSimpleQueryParser extends SimpleQueryParser { - MockSimpleQueryParser(Analyzer analyzer, Map weights, int flags, Settings settings) { - super(analyzer, weights, flags, settings, null); - } - - @Override - protected Query newTermQuery(Term term) { - return new TermQuery(term); - } - } - - public void testAnalyzeWildcard() { - SimpleQueryParser.Settings settings = new SimpleQueryParser.Settings(); - settings.analyzeWildcard(true); - Map weights = new HashMap<>(); - weights.put("field1", 1.0f); - SimpleQueryParser parser = new MockSimpleQueryParser(new StandardAnalyzer(), weights, -1, settings); - for (Operator op : Operator.values()) { - BooleanClause.Occur defaultOp = op.toBooleanClauseOccur(); - parser.setDefaultOperator(defaultOp); - Query query = parser.parse("first foo-bar-foobar* last"); - Query expectedQuery = - new BooleanQuery.Builder() - .add(new BooleanClause(new TermQuery(new Term("field1", "first")), defaultOp)) - .add(new BooleanQuery.Builder() - .add(new BooleanClause(new TermQuery(new Term("field1", "foo")), defaultOp)) - .add(new BooleanClause(new TermQuery(new Term("field1", "bar")), defaultOp)) - .add(new BooleanClause(new PrefixQuery(new Term("field1", "foobar")), defaultOp)) - .build(), defaultOp) - .add(new BooleanClause(new TermQuery(new Term("field1", "last")), defaultOp)) - .build(); - assertThat(query, equalTo(expectedQuery)); - } - } - - public void testAnalyzerWildcardWithSynonyms() { - SimpleQueryParser.Settings settings = new SimpleQueryParser.Settings(); - settings.analyzeWildcard(true); - Map weights = new HashMap<>(); - weights.put("field1", 1.0f); - SimpleQueryParser parser = new MockSimpleQueryParser(new MockRepeatAnalyzer(), weights, -1, settings); - - for (Operator op : Operator.values()) { - BooleanClause.Occur defaultOp = op.toBooleanClauseOccur(); - parser.setDefaultOperator(defaultOp); - Query query = parser.parse("first foo-bar-foobar* last"); - - Query expectedQuery = new BooleanQuery.Builder() - .add(new BooleanClause(new SynonymQuery(new Term("field1", "first"), - new Term("field1", "first")), defaultOp)) - .add(new BooleanQuery.Builder() - .add(new BooleanClause(new SynonymQuery(new Term("field1", "foo"), - new Term("field1", "foo")), defaultOp)) - .add(new BooleanClause(new SynonymQuery(new Term("field1", "bar"), - new Term("field1", "bar")), defaultOp)) - .add(new BooleanQuery.Builder() - .add(new BooleanClause(new PrefixQuery(new Term("field1", "foobar")), - BooleanClause.Occur.SHOULD)) - .add(new BooleanClause(new PrefixQuery(new Term("field1", "foobar")), - BooleanClause.Occur.SHOULD)) - .build(), defaultOp) - .build(), defaultOp) - .add(new BooleanClause(new SynonymQuery(new Term("field1", "last"), - new Term("field1", "last")), defaultOp)) - .build(); - assertThat(query, equalTo(expectedQuery)); - } - } - - public void testAnalyzerWithGraph() { - SimpleQueryParser.Settings settings = new SimpleQueryParser.Settings(); - settings.analyzeWildcard(true); - Map weights = new HashMap<>(); - weights.put("field1", 1.0f); - SimpleQueryParser parser = new MockSimpleQueryParser(new MockSynonymAnalyzer(), weights, -1, settings); - - for (Operator op : Operator.values()) { - BooleanClause.Occur defaultOp = op.toBooleanClauseOccur(); - parser.setDefaultOperator(defaultOp); - - // non-phrase won't detect multi-word synonym because of whitespace splitting - Query query = parser.parse("guinea pig"); - - Query expectedQuery = new BooleanQuery.Builder() - .add(new BooleanClause(new TermQuery(new Term("field1", "guinea")), defaultOp)) - .add(new BooleanClause(new TermQuery(new Term("field1", "pig")), defaultOp)) - .build(); - assertThat(query, equalTo(expectedQuery)); - - // phrase will pick it up - query = parser.parse("\"guinea pig\""); - SpanTermQuery span1 = new SpanTermQuery(new Term("field1", "guinea")); - SpanTermQuery span2 = new SpanTermQuery(new Term("field1", "pig")); - expectedQuery = new SpanOrQuery( - new SpanNearQuery(new SpanQuery[] { span1, span2 }, 0, true), - new SpanTermQuery(new Term("field1", "cavy"))); - - assertThat(query, equalTo(expectedQuery)); - - // phrase with slop - query = parser.parse("big \"tiny guinea pig\"~2"); - - expectedQuery = new BooleanQuery.Builder() - .add(new TermQuery(new Term("field1", "big")), defaultOp) - .add(new SpanNearQuery(new SpanQuery[] { - new SpanTermQuery(new Term("field1", "tiny")), - new SpanOrQuery( - new SpanNearQuery(new SpanQuery[] { span1, span2 }, 0, true), - new SpanTermQuery(new Term("field1", "cavy")) - ) - }, 2, true), defaultOp) - .build(); - assertThat(query, equalTo(expectedQuery)); - } - } - - public void testQuoteFieldSuffix() { - SimpleQueryParser.Settings sqpSettings = new SimpleQueryParser.Settings(); - sqpSettings.quoteFieldSuffix(".quote"); - - Settings indexSettings = Settings.builder() - .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) - .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0) - .put(IndexMetaData.SETTING_INDEX_UUID, "some_uuid") - .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) - .build(); - IndexMetaData indexState = IndexMetaData.builder("index").settings(indexSettings).build(); - IndexSettings settings = new IndexSettings(indexState, Settings.EMPTY); - QueryShardContext mockShardContext = new QueryShardContext(0, settings, null, null, null, null, null, xContentRegistry(), - writableRegistry(), null, null, System::currentTimeMillis, null) { - @Override - public MappedFieldType fieldMapper(String name) { - return new MockFieldMapper.FakeFieldType(); - } - }; - - SimpleQueryParser parser = new SimpleQueryParser(new StandardAnalyzer(), - Collections.singletonMap("foo", 1f), -1, sqpSettings, mockShardContext); - assertEquals(new TermQuery(new Term("foo", "bar")), parser.parse("bar")); - assertEquals(new TermQuery(new Term("foo.quote", "bar")), parser.parse("\"bar\"")); - - // Now check what happens if foo.quote does not exist - mockShardContext = new QueryShardContext(0, settings, null, null, null, null, null, xContentRegistry(), - writableRegistry(), null, null, System::currentTimeMillis, null) { - @Override - public MappedFieldType fieldMapper(String name) { - if (name.equals("foo.quote")) { - return null; - } - return new MockFieldMapper.FakeFieldType(); - } - }; - parser = new SimpleQueryParser(new StandardAnalyzer(), - Collections.singletonMap("foo", 1f), -1, sqpSettings, mockShardContext); - assertEquals(new TermQuery(new Term("foo", "bar")), parser.parse("bar")); - assertEquals(new TermQuery(new Term("foo", "bar")), parser.parse("\"bar\"")); - } -} diff --git a/core/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java index c0c70559110..1832def54e7 100644 --- a/core/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/SimpleQueryStringBuilderTests.java @@ -19,6 +19,8 @@ package org.elasticsearch.index.query; +import org.apache.lucene.analysis.MockSynonymAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; @@ -29,27 +31,29 @@ import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanOrQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.util.TestUtil; import org.elasticsearch.Version; -import org.elasticsearch.cluster.metadata.MetaData; -import org.elasticsearch.common.ParsingException; -import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.search.SimpleQueryStringQueryParser; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.test.AbstractQueryTestCase; import java.io.IOException; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; -import java.util.Iterator; import java.util.Locale; import java.util.Map; import java.util.Set; import static org.hamcrest.Matchers.anyOf; -import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.either; import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.notNullValue; @@ -85,13 +89,13 @@ public class SimpleQueryStringBuilderTests extends AbstractQueryTestCase fields = new HashMap<>(); for (int i = 0; i < fieldCount; i++) { if (randomBoolean()) { - fields.put(randomAlphaOfLengthBetween(1, 10), AbstractQueryBuilder.DEFAULT_BOOST); + fields.put(STRING_FIELD_NAME, AbstractQueryBuilder.DEFAULT_BOOST); } else { - fields.put(randomBoolean() ? STRING_FIELD_NAME : randomAlphaOfLengthBetween(1, 10), 2.0f / randomIntBetween(1, 20)); + fields.put(STRING_FIELD_NAME_2, 2.0f / randomIntBetween(1, 20)); } } result.fields(fields); @@ -234,52 +238,35 @@ public class SimpleQueryStringBuilderTests extends AbstractQueryTestCase 1) { - assertThat(query, anyOf(instanceOf(BooleanQuery.class), instanceOf(DisjunctionMaxQuery.class))); - if (query instanceof BooleanQuery) { - BooleanQuery boolQuery = (BooleanQuery) query; - for (BooleanClause clause : boolQuery.clauses()) { - if (clause.getQuery() instanceof TermQuery) { - TermQuery inner = (TermQuery) clause.getQuery(); - assertThat(inner.getTerm().bytes().toString(), is(inner.getTerm().bytes().toString().toLowerCase(Locale.ROOT))); - } + assertThat(query, instanceOf(DisjunctionMaxQuery.class)); + DisjunctionMaxQuery maxQuery = (DisjunctionMaxQuery) query; + for (Query disjunct : maxQuery.getDisjuncts()) { + assertThat(disjunct, either(instanceOf(TermQuery.class)) + .or(instanceOf(BoostQuery.class)) + .or(instanceOf(MatchNoDocsQuery.class))); + Query termQuery = disjunct; + if (disjunct instanceof BoostQuery) { + termQuery = ((BoostQuery) disjunct).getQuery(); } - assertThat(boolQuery.clauses().size(), equalTo(queryBuilder.fields().size())); - Iterator> fieldsIterator = queryBuilder.fields().entrySet().iterator(); - for (BooleanClause booleanClause : boolQuery) { - Map.Entry field = fieldsIterator.next(); - assertTermOrBoostQuery(booleanClause.getQuery(), field.getKey(), queryBuilder.value(), field.getValue()); - } - if (queryBuilder.minimumShouldMatch() != null) { - assertThat(boolQuery.getMinimumNumberShouldMatch(), greaterThan(0)); - } - } else if (query instanceof DisjunctionMaxQuery) { - DisjunctionMaxQuery maxQuery = (DisjunctionMaxQuery) query; - for (Query disjunct : maxQuery.getDisjuncts()) { - if (disjunct instanceof TermQuery) { - TermQuery inner = (TermQuery) disjunct; - assertThat(inner.getTerm().bytes().toString(), is(inner.getTerm().bytes().toString().toLowerCase(Locale.ROOT))); - } - } - assertThat(maxQuery.getDisjuncts().size(), equalTo(queryBuilder.fields().size())); - Iterator> fieldsIterator = queryBuilder.fields().entrySet().iterator(); - for (Query disjunct : maxQuery) { - Map.Entry field = fieldsIterator.next(); - assertTermOrBoostQuery(disjunct, field.getKey(), queryBuilder.value(), field.getValue()); + if (termQuery instanceof TermQuery) { + TermQuery inner = (TermQuery) termQuery; + assertThat(inner.getTerm().bytes().toString(), is(inner.getTerm().bytes().toString().toLowerCase(Locale.ROOT))); + } else { + assertThat(termQuery, instanceOf(MatchNoDocsQuery.class)); } } } else if (queryBuilder.fields().size() == 1) { Map.Entry field = queryBuilder.fields().entrySet().iterator().next(); assertTermOrBoostQuery(query, field.getKey(), queryBuilder.value(), field.getValue()); } else if (queryBuilder.fields().size() == 0) { - MapperService ms = context.mapperService(); - if (ms.allEnabled()) { - assertTermQuery(query, MetaData.ALL, queryBuilder.value()); - } else { - assertThat(query.getClass(), - anyOf(equalTo(BooleanQuery.class), equalTo(DisjunctionMaxQuery.class), equalTo(MatchNoDocsQuery.class))); + assertThat(query, either(instanceOf(DisjunctionMaxQuery.class)).or(instanceOf(MatchNoDocsQuery.class))); + if (query instanceof DisjunctionMaxQuery) { + for (Query disjunct : (DisjunctionMaxQuery) query) { + assertThat(disjunct, either(instanceOf(TermQuery.class)).or(instanceOf(MatchNoDocsQuery.class))); + } } } else { fail("Encountered lucene query type we do not have a validation implementation for in our " @@ -335,7 +322,7 @@ public class SimpleQueryStringBuilderTests extends AbstractQueryTestCase 0); QueryShardContext shardContext = createShardContext(); int numberOfTerms = randomIntBetween(1, 4); StringBuilder queryString = new StringBuilder(); @@ -369,7 +357,7 @@ public class SimpleQueryStringBuilderTests extends AbstractQueryTestCase 0); // Prefix Query query = new SimpleQueryStringBuilder("aBc*") .field(STRING_FIELD_NAME) @@ -430,18 +419,122 @@ public class SimpleQueryStringBuilderTests extends AbstractQueryTestCase 0); + SimpleQueryStringQueryParser.Settings settings = new SimpleQueryStringQueryParser.Settings(); + settings.analyzeWildcard(true); + SimpleQueryStringQueryParser parser = new SimpleQueryStringQueryParser(new StandardAnalyzer(), + Collections.singletonMap(STRING_FIELD_NAME, 1.0f), -1, settings, createShardContext()); + for (Operator op : Operator.values()) { + BooleanClause.Occur defaultOp = op.toBooleanClauseOccur(); + parser.setDefaultOperator(defaultOp); + Query query = parser.parse("first foo-bar-foobar* last"); + Query expectedQuery = + new BooleanQuery.Builder() + .add(new BooleanClause(new TermQuery(new Term(STRING_FIELD_NAME, "first")), defaultOp)) + .add(new BooleanQuery.Builder() + .add(new BooleanClause(new TermQuery(new Term(STRING_FIELD_NAME, "foo")), defaultOp)) + .add(new BooleanClause(new TermQuery(new Term(STRING_FIELD_NAME, "bar")), defaultOp)) + .add(new BooleanClause(new PrefixQuery(new Term(STRING_FIELD_NAME, "foobar")), defaultOp)) + .build(), defaultOp) + .add(new BooleanClause(new TermQuery(new Term(STRING_FIELD_NAME, "last")), defaultOp)) + .build(); + assertThat(query, equalTo(expectedQuery)); + } + } - ParsingException e = expectThrows(ParsingException.class, () -> parseQuery(json)); - assertThat(e.getMessage(), - containsString("cannot use [all_fields] parameter in conjunction with [fields]")); + public void testAnalyzerWildcardWithSynonyms() throws IOException { + assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); + SimpleQueryStringQueryParser.Settings settings = new SimpleQueryStringQueryParser.Settings(); + settings.analyzeWildcard(true); + SimpleQueryStringQueryParser parser = new SimpleQueryStringQueryParser(new MockRepeatAnalyzer(), + Collections.singletonMap(STRING_FIELD_NAME, 1.0f), -1, settings, createShardContext()); + for (Operator op : Operator.values()) { + BooleanClause.Occur defaultOp = op.toBooleanClauseOccur(); + parser.setDefaultOperator(defaultOp); + Query query = parser.parse("first foo-bar-foobar* last"); + Query expectedQuery = new BooleanQuery.Builder() + .add(new BooleanClause(new SynonymQuery(new Term(STRING_FIELD_NAME, "first"), + new Term(STRING_FIELD_NAME, "first")), defaultOp)) + .add(new BooleanQuery.Builder() + .add(new BooleanClause(new SynonymQuery(new Term(STRING_FIELD_NAME, "foo"), + new Term(STRING_FIELD_NAME, "foo")), defaultOp)) + .add(new BooleanClause(new SynonymQuery(new Term(STRING_FIELD_NAME, "bar"), + new Term(STRING_FIELD_NAME, "bar")), defaultOp)) + .add(new BooleanQuery.Builder() + .add(new BooleanClause(new PrefixQuery(new Term(STRING_FIELD_NAME, "foobar")), + BooleanClause.Occur.SHOULD)) + .add(new BooleanClause(new PrefixQuery(new Term(STRING_FIELD_NAME, "foobar")), + BooleanClause.Occur.SHOULD)) + .build(), defaultOp) + .build(), defaultOp) + .add(new BooleanClause(new SynonymQuery(new Term(STRING_FIELD_NAME, "last"), + new Term(STRING_FIELD_NAME, "last")), defaultOp)) + .build(); + assertThat(query, equalTo(expectedQuery)); + } + } + + public void testAnalyzerWithGraph() { + assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); + SimpleQueryStringQueryParser.Settings settings = new SimpleQueryStringQueryParser.Settings(); + settings.analyzeWildcard(true); + SimpleQueryStringQueryParser parser = new SimpleQueryStringQueryParser(new MockSynonymAnalyzer(), + Collections.singletonMap(STRING_FIELD_NAME, 1.0f), -1, settings, createShardContext()); + for (Operator op : Operator.values()) { + BooleanClause.Occur defaultOp = op.toBooleanClauseOccur(); + parser.setDefaultOperator(defaultOp); + // non-phrase won't detect multi-word synonym because of whitespace splitting + Query query = parser.parse("guinea pig"); + + Query expectedQuery = new BooleanQuery.Builder() + .add(new BooleanClause(new TermQuery(new Term(STRING_FIELD_NAME, "guinea")), defaultOp)) + .add(new BooleanClause(new TermQuery(new Term(STRING_FIELD_NAME, "pig")), defaultOp)) + .build(); + assertThat(query, equalTo(expectedQuery)); + + // phrase will pick it up + query = parser.parse("\"guinea pig\""); + SpanTermQuery span1 = new SpanTermQuery(new Term(STRING_FIELD_NAME, "guinea")); + SpanTermQuery span2 = new SpanTermQuery(new Term(STRING_FIELD_NAME, "pig")); + expectedQuery = new SpanOrQuery( + new SpanNearQuery(new SpanQuery[] { span1, span2 }, 0, true), + new SpanTermQuery(new Term(STRING_FIELD_NAME, "cavy"))); + + assertThat(query, equalTo(expectedQuery)); + + // phrase with slop + query = parser.parse("big \"tiny guinea pig\"~2"); + + expectedQuery = new BooleanQuery.Builder() + .add(new TermQuery(new Term(STRING_FIELD_NAME, "big")), defaultOp) + .add(new SpanNearQuery(new SpanQuery[] { + new SpanTermQuery(new Term(STRING_FIELD_NAME, "tiny")), + new SpanOrQuery( + new SpanNearQuery(new SpanQuery[] { span1, span2 }, 0, true), + new SpanTermQuery(new Term(STRING_FIELD_NAME, "cavy")) + ) + }, 2, true), defaultOp) + .build(); + assertThat(query, equalTo(expectedQuery)); + } + } + + public void testQuoteFieldSuffix() { + assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); + SimpleQueryStringQueryParser.Settings settings = new SimpleQueryStringQueryParser.Settings(); + settings.analyzeWildcard(true); + settings.quoteFieldSuffix("_2"); + SimpleQueryStringQueryParser parser = new SimpleQueryStringQueryParser(new MockSynonymAnalyzer(), + Collections.singletonMap(STRING_FIELD_NAME, 1.0f), -1, settings, createShardContext()); + assertEquals(new TermQuery(new Term(STRING_FIELD_NAME, "bar")), parser.parse("bar")); + assertEquals(new TermQuery(new Term(STRING_FIELD_NAME_2, "bar")), parser.parse("\"bar\"")); + + // Now check what happens if the quote field does not exist + settings.quoteFieldSuffix(".quote"); + parser = new SimpleQueryStringQueryParser(new MockSynonymAnalyzer(), + Collections.singletonMap(STRING_FIELD_NAME, 1.0f), -1, settings, createShardContext()); + assertEquals(new TermQuery(new Term(STRING_FIELD_NAME, "bar")), parser.parse("bar")); + assertEquals(new TermQuery(new Term(STRING_FIELD_NAME, "bar")), parser.parse("\"bar\"")); } } diff --git a/core/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java b/core/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java index 0b8c2ecf129..8aa50b41228 100644 --- a/core/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java +++ b/core/src/test/java/org/elasticsearch/search/query/SimpleQueryStringIT.java @@ -548,12 +548,6 @@ public class SimpleQueryStringIT extends ESIntegTestCase { simpleQueryStringQuery("foo eggplant").defaultOperator(Operator.AND).useAllFields(true)).get(); assertHits(resp.getHits(), "1"); assertHitCount(resp, 1L); - - Exception e = expectThrows(Exception.class, () -> - client().prepareSearch("test").setQuery( - simpleQueryStringQuery("blah").field("f1").useAllFields(true)).get()); - assertThat(ExceptionsHelper.detailedMessage(e), - containsString("cannot use [all_fields] parameter in conjunction with [fields]")); } public void testAllFieldsWithSpecifiedLeniency() throws IOException { diff --git a/docs/reference/query-dsl/match-query.asciidoc b/docs/reference/query-dsl/match-query.asciidoc index ed47a1c8f1a..56874f25a85 100644 --- a/docs/reference/query-dsl/match-query.asciidoc +++ b/docs/reference/query-dsl/match-query.asciidoc @@ -19,7 +19,7 @@ GET /_search // CONSOLE Note, `message` is the name of a field, you can substitute the name of -any field (including `_all`) instead. +any field instead. [[query-dsl-match-query-boolean]] ==== match diff --git a/docs/reference/query-dsl/simple-query-string-query.asciidoc b/docs/reference/query-dsl/simple-query-string-query.asciidoc index 1251803fca9..c43dde982f1 100644 --- a/docs/reference/query-dsl/simple-query-string-query.asciidoc +++ b/docs/reference/query-dsl/simple-query-string-query.asciidoc @@ -13,8 +13,7 @@ GET /_search "query": { "simple_query_string" : { "query": "\"fried eggs\" +(eggplant | potato) -frittata", - "analyzer": "snowball", - "fields": ["body^5","_all"], + "fields": ["title^5", "body"], "default_operator": "and" } } @@ -30,7 +29,9 @@ The `simple_query_string` top level parameters include: |`query` |The actual query to be parsed. See below for syntax. |`fields` |The fields to perform the parsed query against. Defaults to the -`index.query.default_field` index settings, which in turn defaults to `_all`. +`index.query.default_field` index settings, which in turn defaults to `*`. +`*` extracts all fields in the mapping that are eligible to term queries +and filters the metadata fields. |`default_operator` |The default operator used if no explicit operator is specified. For example, with a default operator of `OR`, the query @@ -38,7 +39,7 @@ is specified. For example, with a default operator of `OR`, the query with default operator of `AND`, the same query is translated to `capital AND of AND Hungary`. The default value is `OR`. -|`analyzer` |The analyzer used to analyze each term of the query when +|`analyzer` |Force the analyzer to use to analyze each term of the query when creating composite queries. |`flags` |Flags specifying which features of the `simple_query_string` to @@ -65,7 +66,8 @@ comprehensive example. |`auto_generate_synonyms_phrase_query` |Whether phrase queries should be automatically generated for multi terms synonyms. Defaults to `true`. -|`all_fields` | Perform the query on all fields detected in the mapping that can +|`all_fields` | deprecated[6.0.0, set `fields` to `*` instead] +Perform the query on all fields detected in the mapping that can be queried. Will be used by default when the `_all` field is disabled and no `default_field` is specified index settings, and no `fields` are specified. |======================================================================= @@ -114,12 +116,9 @@ documents that contain "baz". ==== Default Field When not explicitly specifying the field to search on in the query string syntax, the `index.query.default_field` will be used to derive -which field to search on. It defaults to `_all` field. - -If the `_all` field is disabled and no `fields` are specified in the request`, -the `simple_query_string` query will automatically attempt to determine the -existing fields in the index's mapping that are queryable, and perform the -search on those fields. +which field to search on. It defaults to `*` and the query will automatically +attempt to determine the existing fields in the index's mapping that are queryable, +and perform the search on those fields. [float] ==== Multi Field