From 8d6a41f67145b3bbc369eb5eec7caceaa39f5345 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 14 Feb 2017 16:05:19 +0100 Subject: [PATCH] Nested queries should avoid adding unnecessary filters when possible. (#23079) When nested objects are present in the mappings, many queries get deoptimized due to the need to exclude documents that are not in the right space. For instance, a filter is applied to all queries that prevents them from matching non-root documents (`+*:* -_type:__*`). Moreover, a filter is applied to all child queries of `nested` queries in order to make sure that the child query only matches child documents (`_type:__nested_path`), which is required by `ToParentBlockJoinQuery` (the Lucene query behing Elasticsearch's `nested` queries). These additional filters slow down `nested` queries. In 1.7-, the cost was somehow amortized by the fact that we cached filters very aggressively. However, this has proven to be a significant source of slow downs since 2.0 for users of `nested` mappings and queries, see #20797. This change makes the filtering a bit smarter. For instance if the query is a `match_all` query, then we need to exclude nested docs. However, if the query is `foo: bar` then it may only match root documents since `foo` is a top-level field, so no additional filtering is required. Another improvement is to use a `FILTER` clause on all types rather than a `MUST_NOT` clause on all nested paths when possible since `FILTER` clauses are more efficient. Here are some examples of queries and how they get rewritten: ``` "match_all": {} ``` This query gets rewritten to `ConstantScore(+*:* -_type:__*)` on master and `ConstantScore(_type:AutomatonQuery {\norg.apache.lucene.util.automaton.Automaton@4371da44})` with this change. The automaton is the complement of `_type:__*` so it matches the same documents, but is faster since it is now a positive clause. Simplistic performance testing on a 10M index where each root document has 5 nested documents on average gave a latency of 420ms on master and 90ms with this change applied. ``` "term": { "foo": { "value": "0" } } ``` This query is rewritten to `+foo:0 #(ConstantScore(+*:* -_type:__*))^0.0` on master and `foo:0` with this change: we do not need to filter nested docs out since the query cannot match nested docs. While doing performance testing in the same conditions as above, response times went from 250ms to 50ms. ``` "nested": { "path": "nested", "query": { "term": { "nested.foo": { "value": "0" } } } } ``` This query is rewritten to `+ToParentBlockJoinQuery (+nested.foo:0 #_type:__nested) #(ConstantScore(+*:* -_type:__*))^0.0` on master and `ToParentBlockJoinQuery (nested.foo:0)` with this change. The top-level filter (`-_type:__*`) could be removed since `nested` queries only match documents of the parent space, as well as the child filter (`#_type:__nested`) since the child query may only match nested docs since the `nested` object has both `include_in_parent` and `include_in_root` set to `false`. While doing performance testing in the same conditions as above, response times went from 850ms to 270ms. --- .../vectorhighlight/CustomFieldQuery.java | 6 +- .../common/lucene/search/Queries.java | 31 +- .../index/query/NestedQueryBuilder.java | 17 +- .../search/ESToParentBlockJoinQuery.java | 101 ++++++ .../index/search/NestedHelper.java | 178 ++++++++++ .../search/DefaultSearchContext.java | 88 ++--- .../search/aggregations/AggregationPhase.java | 12 +- .../internal/FilteredSearchContext.java | 4 +- .../search/internal/SearchContext.java | 4 +- .../search/internal/SubSearchContext.java | 2 +- .../apache/lucene/search/QueriesTests.java | 33 ++ .../index/query/NestedQueryBuilderTests.java | 5 +- .../search/ESToParentBlockJoinQueryTests.java | 91 +++++ .../index/search/NestedHelperTests.java | 319 ++++++++++++++++++ .../search/DefaultSearchContextTests.java | 71 ---- .../search/nested/SimpleNestedIT.java | 2 +- docs/reference/search/explain.asciidoc | 107 ++---- .../elasticsearch/test/TestSearchContext.java | 2 +- 18 files changed, 853 insertions(+), 220 deletions(-) create mode 100644 core/src/main/java/org/elasticsearch/index/search/ESToParentBlockJoinQuery.java create mode 100644 core/src/main/java/org/elasticsearch/index/search/NestedHelper.java create mode 100644 core/src/test/java/org/apache/lucene/search/QueriesTests.java create mode 100644 core/src/test/java/org/elasticsearch/index/search/ESToParentBlockJoinQueryTests.java create mode 100644 core/src/test/java/org/elasticsearch/index/search/NestedHelperTests.java delete mode 100644 core/src/test/java/org/elasticsearch/search/DefaultSearchContextTests.java diff --git a/core/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java b/core/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java index f58d4b47424..d6fb9b808eb 100644 --- a/core/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java +++ b/core/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java @@ -30,11 +30,11 @@ import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.join.ToParentBlockJoinQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery; import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery; +import org.elasticsearch.index.search.ESToParentBlockJoinQuery; import java.io.IOException; import java.util.Collection; @@ -77,8 +77,8 @@ public class CustomFieldQuery extends FieldQuery { } else if (sourceQuery instanceof BlendedTermQuery) { final BlendedTermQuery blendedTermQuery = (BlendedTermQuery) sourceQuery; flatten(blendedTermQuery.rewrite(reader), reader, flatQueries, boost); - } else if (sourceQuery instanceof ToParentBlockJoinQuery) { - ToParentBlockJoinQuery blockJoinQuery = (ToParentBlockJoinQuery) sourceQuery; + } else if (sourceQuery instanceof ESToParentBlockJoinQuery) { + ESToParentBlockJoinQuery blockJoinQuery = (ESToParentBlockJoinQuery) sourceQuery; flatten(blockJoinQuery.getChildQuery(), reader, flatQueries, boost); } else if (sourceQuery instanceof BoostingQuery) { BoostingQuery boostingQuery = (BoostingQuery) sourceQuery; diff --git a/core/src/main/java/org/elasticsearch/common/lucene/search/Queries.java b/core/src/main/java/org/elasticsearch/common/lucene/search/Queries.java index dab11444255..68a02ed256d 100644 --- a/core/src/main/java/org/elasticsearch/common/lucene/search/Queries.java +++ b/core/src/main/java/org/elasticsearch/common/lucene/search/Queries.java @@ -21,6 +21,7 @@ package org.elasticsearch.common.lucene.search; import org.apache.lucene.index.Term; import org.apache.lucene.queries.ExtendedCommonTermsQuery; +import org.apache.lucene.search.AutomatonQuery; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; @@ -30,6 +31,9 @@ import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.Automata; +import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.Operations; import org.elasticsearch.common.Nullable; import org.elasticsearch.index.mapper.TypeFieldMapper; @@ -38,6 +42,29 @@ import java.util.regex.Pattern; public class Queries { + private static final Automaton NON_NESTED_TYPE_AUTOMATON; + static { + Automaton nestedTypeAutomaton = Operations.concatenate( + Automata.makeString("__"), + Automata.makeAnyString()); + NON_NESTED_TYPE_AUTOMATON = Operations.complement(nestedTypeAutomaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES); + } + + // We use a custom class rather than AutomatonQuery directly in order to + // have a better toString + private static class NonNestedQuery extends AutomatonQuery { + + NonNestedQuery() { + super(new Term(TypeFieldMapper.NAME), NON_NESTED_TYPE_AUTOMATON); + } + + @Override + public String toString(String field) { + return "_type:[^_].*"; + } + + } + public static Query newMatchAllQuery() { return new MatchAllDocsQuery(); } @@ -52,7 +79,9 @@ public class Queries { } public static Query newNonNestedFilter() { - return not(newNestedFilter()); + // we use this automaton query rather than a negation of newNestedFilter + // since purely negative queries against high-cardinality clauses are costly + return new NonNestedQuery(); } public static BooleanQuery filtered(@Nullable Query query, @Nullable Query filter) { diff --git a/core/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java index 322db3815f9..b0c49a3f4a8 100644 --- a/core/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java @@ -32,6 +32,8 @@ import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.mapper.ObjectMapper; +import org.elasticsearch.index.search.ESToParentBlockJoinQuery; +import org.elasticsearch.index.search.NestedHelper; import java.io.IOException; import java.util.Map; @@ -232,22 +234,29 @@ public class NestedQueryBuilder extends AbstractQueryBuilder throw new IllegalStateException("[" + NAME + "] nested object under path [" + path + "] is not of nested type"); } final BitSetProducer parentFilter; - final Query childFilter; - final Query innerQuery; + Query innerQuery; ObjectMapper objectMapper = context.nestedScope().getObjectMapper(); if (objectMapper == null) { parentFilter = context.bitsetFilter(Queries.newNonNestedFilter()); } else { parentFilter = context.bitsetFilter(objectMapper.nestedTypeFilter()); } - childFilter = nestedObjectMapper.nestedTypeFilter(); + try { context.nestedScope().nextLevel(nestedObjectMapper); innerQuery = this.query.toQuery(context); } finally { context.nestedScope().previousLevel(); } - return new ToParentBlockJoinQuery(Queries.filtered(innerQuery, childFilter), parentFilter, scoreMode); + + // ToParentBlockJoinQuery requires that the inner query only matches documents + // in its child space + if (new NestedHelper(context.getMapperService()).mightMatchNonNestedDocs(innerQuery, path)) { + innerQuery = Queries.filtered(innerQuery, nestedObjectMapper.nestedTypeFilter()); + } + + return new ESToParentBlockJoinQuery(innerQuery, parentFilter, scoreMode, + objectMapper == null ? null : objectMapper.fullPath()); } @Override diff --git a/core/src/main/java/org/elasticsearch/index/search/ESToParentBlockJoinQuery.java b/core/src/main/java/org/elasticsearch/index/search/ESToParentBlockJoinQuery.java new file mode 100644 index 00000000000..1ee427599ca --- /dev/null +++ b/core/src/main/java/org/elasticsearch/index/search/ESToParentBlockJoinQuery.java @@ -0,0 +1,101 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.search; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Weight; +import org.apache.lucene.search.join.BitSetProducer; +import org.apache.lucene.search.join.ScoreMode; +import org.apache.lucene.search.join.ToParentBlockJoinQuery; + +import java.io.IOException; +import java.util.Objects; + +/** A {@link ToParentBlockJoinQuery} that allows to retrieve its nested path. */ +public final class ESToParentBlockJoinQuery extends Query { + + private final ToParentBlockJoinQuery query; + private final String path; + + public ESToParentBlockJoinQuery(Query childQuery, BitSetProducer parentsFilter, ScoreMode scoreMode, String path) { + this(new ToParentBlockJoinQuery(childQuery, parentsFilter, scoreMode), path); + } + + private ESToParentBlockJoinQuery(ToParentBlockJoinQuery query, String path) { + this.query = query; + this.path = path; + } + + /** Return the child query. */ + public Query getChildQuery() { + return query.getChildQuery(); + } + + /** Return the path of results of this query, or {@code null} if matches are at the root level. */ + public String getPath() { + return path; + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + Query innerRewrite = query.rewrite(reader); + if (innerRewrite != query) { + // Right now ToParentBlockJoinQuery always rewrites to a ToParentBlockJoinQuery + // so the else block will never be used. It is useful in the case that + // ToParentBlockJoinQuery one day starts to rewrite to a different query, eg. + // a MatchNoDocsQuery if it realizes that it cannot match any docs and rewrites + // to a MatchNoDocsQuery. In that case it would be fine to lose information + // about the nested path. + if (innerRewrite instanceof ToParentBlockJoinQuery) { + return new ESToParentBlockJoinQuery((ToParentBlockJoinQuery) innerRewrite, path); + } else { + return innerRewrite; + } + } + return super.rewrite(reader); + } + + @Override + public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { + return query.createWeight(searcher, needsScores); + } + + @Override + public boolean equals(Object obj) { + if (sameClassAs(obj) == false) { + return false; + } + ESToParentBlockJoinQuery that = (ESToParentBlockJoinQuery) obj; + return query.equals(that.query) && Objects.equals(path, that.path); + } + + @Override + public int hashCode() { + return Objects.hash(getClass(), query, path); + } + + @Override + public String toString(String field) { + return query.toString(field); + } + +} diff --git a/core/src/main/java/org/elasticsearch/index/search/NestedHelper.java b/core/src/main/java/org/elasticsearch/index/search/NestedHelper.java new file mode 100644 index 00000000000..cffb42bf809 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/index/search/NestedHelper.java @@ -0,0 +1,178 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.search; + +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.PointRangeQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.BooleanClause.Occur; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.ObjectMapper; + +/** Utility class to filter parent and children clauses when building nested + * queries. */ +public final class NestedHelper { + + private final MapperService mapperService; + + public NestedHelper(MapperService mapperService) { + this.mapperService = mapperService; + } + + /** Returns true if the given query might match nested documents. */ + public boolean mightMatchNestedDocs(Query query) { + if (query instanceof ConstantScoreQuery) { + return mightMatchNestedDocs(((ConstantScoreQuery) query).getQuery()); + } else if (query instanceof BoostQuery) { + return mightMatchNestedDocs(((BoostQuery) query).getQuery()); + } else if (query instanceof MatchAllDocsQuery) { + return true; + } else if (query instanceof MatchNoDocsQuery) { + return false; + } else if (query instanceof TermQuery) { + // We only handle term queries and range queries, which should already + // cover a high majority of use-cases + return mightMatchNestedDocs(((TermQuery) query).getTerm().field()); + } else if (query instanceof PointRangeQuery) { + return mightMatchNestedDocs(((PointRangeQuery) query).getField()); + } else if (query instanceof BooleanQuery) { + final BooleanQuery bq = (BooleanQuery) query; + final boolean hasRequiredClauses = bq.clauses().stream().anyMatch(BooleanClause::isRequired); + if (hasRequiredClauses) { + return bq.clauses().stream() + .filter(BooleanClause::isRequired) + .map(BooleanClause::getQuery) + .allMatch(this::mightMatchNestedDocs); + } else { + return bq.clauses().stream() + .filter(c -> c.getOccur() == Occur.SHOULD) + .map(BooleanClause::getQuery) + .anyMatch(this::mightMatchNestedDocs); + } + } else if (query instanceof ESToParentBlockJoinQuery) { + return ((ESToParentBlockJoinQuery) query).getPath() != null; + } else { + return true; + } + } + + /** Returns true if a query on the given field might match nested documents. */ + boolean mightMatchNestedDocs(String field) { + if (field.startsWith("_")) { + // meta field. Every meta field behaves differently, eg. nested + // documents have the same _uid as their parent, put their path in + // the _type field but do not have _field_names. So we just ignore + // meta fields and return true, which is always safe, it just means + // we might add a nested filter when it is nor required. + return true; + } + if (mapperService.fullName(field) == null) { + // field does not exist + return false; + } + for (String parent = parentObject(field); parent != null; parent = parentObject(parent)) { + ObjectMapper mapper = mapperService.getObjectMapper(parent); + if (mapper != null && mapper.nested().isNested()) { + return true; + } + } + return false; + } + + /** Returns true if the given query might match parent documents or documents + * that are nested under a different path. */ + public boolean mightMatchNonNestedDocs(Query query, String nestedPath) { + if (query instanceof ConstantScoreQuery) { + return mightMatchNonNestedDocs(((ConstantScoreQuery) query).getQuery(), nestedPath); + } else if (query instanceof BoostQuery) { + return mightMatchNonNestedDocs(((BoostQuery) query).getQuery(), nestedPath); + } else if (query instanceof MatchAllDocsQuery) { + return true; + } else if (query instanceof MatchNoDocsQuery) { + return false; + } else if (query instanceof TermQuery) { + return mightMatchNonNestedDocs(((TermQuery) query).getTerm().field(), nestedPath); + } else if (query instanceof PointRangeQuery) { + return mightMatchNonNestedDocs(((PointRangeQuery) query).getField(), nestedPath); + } else if (query instanceof BooleanQuery) { + final BooleanQuery bq = (BooleanQuery) query; + final boolean hasRequiredClauses = bq.clauses().stream().anyMatch(BooleanClause::isRequired); + if (hasRequiredClauses) { + return bq.clauses().stream() + .filter(BooleanClause::isRequired) + .map(BooleanClause::getQuery) + .allMatch(q -> mightMatchNonNestedDocs(q, nestedPath)); + } else { + return bq.clauses().stream() + .filter(c -> c.getOccur() == Occur.SHOULD) + .map(BooleanClause::getQuery) + .anyMatch(q -> mightMatchNonNestedDocs(q, nestedPath)); + } + } else { + return true; + } + } + + /** Returns true if a query on the given field might match parent documents + * or documents that are nested under a different path. */ + boolean mightMatchNonNestedDocs(String field, String nestedPath) { + if (field.startsWith("_")) { + // meta field. Every meta field behaves differently, eg. nested + // documents have the same _uid as their parent, put their path in + // the _type field but do not have _field_names. So we just ignore + // meta fields and return true, which is always safe, it just means + // we might add a nested filter when it is nor required. + return true; + } + if (mapperService.fullName(field) == null) { + return false; + } + for (String parent = parentObject(field); parent != null; parent = parentObject(parent)) { + ObjectMapper mapper = mapperService.getObjectMapper(parent); + if (mapper!= null && mapper.nested().isNested()) { + if (mapper.fullPath().equals(nestedPath)) { + // If the mapper does not include in its parent or in the root object then + // the query might only match nested documents with the given path + return mapper.nested().isIncludeInParent() || mapper.nested().isIncludeInRoot(); + } else { + // the first parent nested mapper does not have the expected path + // It might be misconfiguration or a sub nested mapper + return true; + } + } + } + return true; // the field is not a sub field of the nested path + } + + private static String parentObject(String field) { + int lastDot = field.lastIndexOf('.'); + if (lastDot == -1) { + return null; + } + return field.substring(0, lastDot); + } + +} diff --git a/core/src/main/java/org/elasticsearch/search/DefaultSearchContext.java b/core/src/main/java/org/elasticsearch/search/DefaultSearchContext.java index d981f4e68df..6619c1ab9e5 100644 --- a/core/src/main/java/org/elasticsearch/search/DefaultSearchContext.java +++ b/core/src/main/java/org/elasticsearch/search/DefaultSearchContext.java @@ -22,7 +22,6 @@ package org.elasticsearch.search; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Collector; -import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; @@ -49,6 +48,7 @@ import org.elasticsearch.index.query.AbstractQueryBuilder; import org.elasticsearch.index.query.ParsedQuery; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.index.search.NestedHelper; import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.index.similarity.SimilarityService; import org.elasticsearch.search.aggregations.SearchContextAggregations; @@ -244,7 +244,7 @@ final class DefaultSearchContext extends SearchContext { if (queryBoost() != AbstractQueryBuilder.DEFAULT_BOOST) { parsedQuery(new ParsedQuery(new FunctionScoreQuery(query(), new WeightFactorFunction(queryBoost)), parsedQuery())); } - this.query = buildFilteredQuery(); + this.query = buildFilteredQuery(query); if (rewrite) { try { this.query = searcher.rewrite(query); @@ -254,67 +254,51 @@ final class DefaultSearchContext extends SearchContext { } } - private Query buildFilteredQuery() { - final Query searchFilter = searchFilter(queryShardContext.getTypes()); - if (searchFilter == null) { - return originalQuery.query(); - } - final Query result; - if (Queries.isConstantMatchAllQuery(query())) { - result = new ConstantScoreQuery(searchFilter); - } else { - result = new BooleanQuery.Builder() - .add(query, Occur.MUST) - .add(searchFilter, Occur.FILTER) - .build(); - } - return result; - } - @Override - @Nullable - public Query searchFilter(String[] types) { - Query typesFilter = createSearchFilter(types, aliasFilter, mapperService().hasNested()); - if (sliceBuilder == null) { - return typesFilter; + public Query buildFilteredQuery(Query query) { + List filters = new ArrayList<>(); + Query typeFilter = createTypeFilter(queryShardContext.getTypes()); + if (typeFilter != null) { + filters.add(typeFilter); } - Query sliceFilter = sliceBuilder.toFilter(queryShardContext, shardTarget().getShardId().getId(), - queryShardContext.getIndexSettings().getNumberOfShards()); - if (typesFilter == null) { - return sliceFilter; + + if (mapperService().hasNested() + && typeFilter == null // when a _type filter is set, it will automatically exclude nested docs + && new NestedHelper(mapperService()).mightMatchNestedDocs(query) + && (aliasFilter == null || new NestedHelper(mapperService()).mightMatchNestedDocs(aliasFilter))) { + filters.add(Queries.newNonNestedFilter()); + } + + if (aliasFilter != null) { + filters.add(aliasFilter); + } + + if (sliceBuilder != null) { + filters.add(sliceBuilder.toFilter(queryShardContext, shardTarget().getShardId().getId(), + queryShardContext.getIndexSettings().getNumberOfShards())); + } + + if (filters.isEmpty()) { + return query; + } else { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.add(query, Occur.MUST); + for (Query filter : filters) { + builder.add(filter, Occur.FILTER); + } + return builder.build(); } - return new BooleanQuery.Builder() - .add(typesFilter, Occur.FILTER) - .add(sliceFilter, Occur.FILTER) - .build(); } - // extracted to static helper method to make writing unit tests easier: - static Query createSearchFilter(String[] types, Query aliasFilter, boolean hasNestedFields) { - Query typesFilter = null; + private static Query createTypeFilter(String[] types) { if (types != null && types.length >= 1) { BytesRef[] typesBytes = new BytesRef[types.length]; for (int i = 0; i < typesBytes.length; i++) { typesBytes[i] = new BytesRef(types[i]); } - typesFilter = new TypeFieldMapper.TypesQuery(typesBytes); + return new TypeFieldMapper.TypesQuery(typesBytes); } - - if (typesFilter == null && aliasFilter == null && hasNestedFields == false) { - return null; - } - - BooleanQuery.Builder bq = new BooleanQuery.Builder(); - if (typesFilter != null) { - bq.add(typesFilter, Occur.FILTER); - } else if (hasNestedFields) { - bq.add(Queries.newNonNestedFilter(), Occur.FILTER); - } - if (aliasFilter != null) { - bq.add(aliasFilter, Occur.FILTER); - } - - return bq.build(); + return null; } @Override diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/AggregationPhase.java b/core/src/main/java/org/elasticsearch/search/aggregations/AggregationPhase.java index cb7c716198e..9b4a02cd816 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/AggregationPhase.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/AggregationPhase.java @@ -18,8 +18,6 @@ */ package org.elasticsearch.search.aggregations; -import org.apache.lucene.search.BooleanClause.Occur; -import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Collector; import org.apache.lucene.search.Query; import org.elasticsearch.common.inject.Inject; @@ -100,16 +98,8 @@ public class AggregationPhase implements SearchPhase { // optimize the global collector based execution if (!globals.isEmpty()) { BucketCollector globalsCollector = BucketCollector.wrap(globals); - Query query = Queries.newMatchAllQuery(); - Query searchFilter = context.searchFilter(context.getQueryShardContext().getTypes()); + Query query = context.buildFilteredQuery(Queries.newMatchAllQuery()); - if (searchFilter != null) { - BooleanQuery filtered = new BooleanQuery.Builder() - .add(query, Occur.MUST) - .add(searchFilter, Occur.FILTER) - .build(); - query = filtered; - } try { final Collector collector; if (context.getProfilers() == null) { diff --git a/core/src/main/java/org/elasticsearch/search/internal/FilteredSearchContext.java b/core/src/main/java/org/elasticsearch/search/internal/FilteredSearchContext.java index adc528728e0..fadf979d911 100644 --- a/core/src/main/java/org/elasticsearch/search/internal/FilteredSearchContext.java +++ b/core/src/main/java/org/elasticsearch/search/internal/FilteredSearchContext.java @@ -102,8 +102,8 @@ public abstract class FilteredSearchContext extends SearchContext { } @Override - public Query searchFilter(String[] types) { - return in.searchFilter(types); + public Query buildFilteredQuery(Query query) { + return in.buildFilteredQuery(query); } @Override diff --git a/core/src/main/java/org/elasticsearch/search/internal/SearchContext.java b/core/src/main/java/org/elasticsearch/search/internal/SearchContext.java index 4201dc0180b..59d3646f18f 100644 --- a/core/src/main/java/org/elasticsearch/search/internal/SearchContext.java +++ b/core/src/main/java/org/elasticsearch/search/internal/SearchContext.java @@ -125,7 +125,9 @@ public abstract class SearchContext extends AbstractRefCounted implements Releas */ public abstract void preProcess(boolean rewrite); - public abstract Query searchFilter(String[] types); + /** Automatically apply all required filters to the given query such as + * alias filters, types filters, etc. */ + public abstract Query buildFilteredQuery(Query query); public abstract long id(); diff --git a/core/src/main/java/org/elasticsearch/search/internal/SubSearchContext.java b/core/src/main/java/org/elasticsearch/search/internal/SubSearchContext.java index ebdcd2d72b7..0a6d684ccf5 100644 --- a/core/src/main/java/org/elasticsearch/search/internal/SubSearchContext.java +++ b/core/src/main/java/org/elasticsearch/search/internal/SubSearchContext.java @@ -81,7 +81,7 @@ public class SubSearchContext extends FilteredSearchContext { } @Override - public Query searchFilter(String[] types) { + public Query buildFilteredQuery(Query query) { throw new UnsupportedOperationException("this context should be read only"); } diff --git a/core/src/test/java/org/apache/lucene/search/QueriesTests.java b/core/src/test/java/org/apache/lucene/search/QueriesTests.java new file mode 100644 index 00000000000..43723ddcdbb --- /dev/null +++ b/core/src/test/java/org/apache/lucene/search/QueriesTests.java @@ -0,0 +1,33 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.lucene.search; + +import org.elasticsearch.common.lucene.search.Queries; +import org.elasticsearch.test.ESTestCase; + +public class QueriesTests extends ESTestCase { + + public void testNonNestedQuery() { + // This is a custom query that extends AutomatonQuery and want to make sure the equals method works + assertEquals(Queries.newNonNestedFilter(), Queries.newNonNestedFilter()); + assertEquals(Queries.newNonNestedFilter().hashCode(), Queries.newNonNestedFilter().hashCode()); + } + +} diff --git a/core/src/test/java/org/elasticsearch/index/query/NestedQueryBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/NestedQueryBuilderTests.java index 6fdac686287..f6f8fde7a49 100644 --- a/core/src/test/java/org/elasticsearch/index/query/NestedQueryBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/NestedQueryBuilderTests.java @@ -27,6 +27,7 @@ import org.apache.lucene.search.join.ToParentBlockJoinQuery; import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.search.ESToParentBlockJoinQuery; import org.elasticsearch.search.fetch.subphase.InnerHitsContext; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.sort.FieldSortBuilder; @@ -85,8 +86,8 @@ public class NestedQueryBuilderTests extends AbstractQueryTestCase 0); + NestedQueryBuilder queryBuilder = new NestedQueryBuilder("nested1", new MatchAllQueryBuilder(), ScoreMode.Avg); + ESToParentBlockJoinQuery query = (ESToParentBlockJoinQuery) queryBuilder.toQuery(context); + + Query expectedChildQuery = new BooleanQuery.Builder() + .add(new MatchAllDocsQuery(), Occur.MUST) + // we automatically add a filter since the inner query might match non-nested docs + .add(new TermQuery(new Term("_type", "__nested1")), Occur.FILTER) + .build(); + assertEquals(expectedChildQuery, query.getChildQuery()); + + assertFalse(new NestedHelper(mapperService).mightMatchNestedDocs(query)); + assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(query, "nested1")); + assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(query, "nested2")); + assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(query, "nested3")); + assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(query, "nested_missing")); + + queryBuilder = new NestedQueryBuilder("nested1", new TermQueryBuilder("nested1.foo", "bar"), ScoreMode.Avg); + query = (ESToParentBlockJoinQuery) queryBuilder.toQuery(context); + + // this time we do not add a filter since the inner query only matches inner docs + expectedChildQuery = new TermQuery(new Term("nested1.foo", "bar")); + assertEquals(expectedChildQuery, query.getChildQuery()); + + assertFalse(new NestedHelper(mapperService).mightMatchNestedDocs(query)); + assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(query, "nested1")); + assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(query, "nested2")); + assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(query, "nested3")); + assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(query, "nested_missing")); + + queryBuilder = new NestedQueryBuilder("nested2", new TermQueryBuilder("nested2.foo", "bar"), ScoreMode.Avg); + query = (ESToParentBlockJoinQuery) queryBuilder.toQuery(context); + + // we need to add the filter again because of include_in_parent + expectedChildQuery = new BooleanQuery.Builder() + .add(new TermQuery(new Term("nested2.foo", "bar")), Occur.MUST) + .add(new TermQuery(new Term("_type", "__nested2")), Occur.FILTER) + .build(); + assertEquals(expectedChildQuery, query.getChildQuery()); + + assertFalse(new NestedHelper(mapperService).mightMatchNestedDocs(query)); + assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(query, "nested1")); + assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(query, "nested2")); + assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(query, "nested3")); + assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(query, "nested_missing")); + + queryBuilder = new NestedQueryBuilder("nested3", new TermQueryBuilder("nested3.foo", "bar"), ScoreMode.Avg); + query = (ESToParentBlockJoinQuery) queryBuilder.toQuery(context); + + // we need to add the filter again because of include_in_root + expectedChildQuery = new BooleanQuery.Builder() + .add(new TermQuery(new Term("nested3.foo", "bar")), Occur.MUST) + .add(new TermQuery(new Term("_type", "__nested3")), Occur.FILTER) + .build(); + assertEquals(expectedChildQuery, query.getChildQuery()); + + assertFalse(new NestedHelper(mapperService).mightMatchNestedDocs(query)); + assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(query, "nested1")); + assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(query, "nested2")); + assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(query, "nested3")); + assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(query, "nested_missing")); + } +} diff --git a/core/src/test/java/org/elasticsearch/search/DefaultSearchContextTests.java b/core/src/test/java/org/elasticsearch/search/DefaultSearchContextTests.java deleted file mode 100644 index 2c3676f1317..00000000000 --- a/core/src/test/java/org/elasticsearch/search/DefaultSearchContextTests.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.search; - -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.util.BytesRef; -import org.elasticsearch.common.lucene.search.Queries; -import org.elasticsearch.index.mapper.TypeFieldMapper; -import org.elasticsearch.test.ESTestCase; - -import static org.apache.lucene.search.BooleanClause.Occur.FILTER; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.nullValue; - -public class DefaultSearchContextTests extends ESTestCase { - - public void testCreateSearchFilter() { - Query searchFilter = DefaultSearchContext.createSearchFilter(new String[]{"type1", "type2"}, null, randomBoolean()); - Query expectedQuery = new BooleanQuery.Builder() - .add(new TypeFieldMapper.TypesQuery(new BytesRef("type1"), new BytesRef("type2")), FILTER) - .build(); - assertThat(searchFilter, equalTo(expectedQuery)); - - searchFilter = DefaultSearchContext.createSearchFilter(new String[]{"type1", "type2"}, new MatchAllDocsQuery(), randomBoolean()); - expectedQuery = new BooleanQuery.Builder() - .add(new TypeFieldMapper.TypesQuery(new BytesRef("type1"), new BytesRef("type2")), FILTER) - .add(new MatchAllDocsQuery(), FILTER) - .build(); - assertThat(searchFilter, equalTo(expectedQuery)); - - searchFilter = DefaultSearchContext.createSearchFilter(null, null, false); - assertThat(searchFilter, nullValue()); - - searchFilter = DefaultSearchContext.createSearchFilter(null, null, true); - expectedQuery = new BooleanQuery.Builder().add(Queries.newNonNestedFilter(), FILTER).build(); - assertThat(searchFilter, equalTo(expectedQuery)); - - searchFilter = DefaultSearchContext.createSearchFilter(null, new MatchAllDocsQuery(), true); - expectedQuery = new BooleanQuery.Builder() - .add(new MatchAllDocsQuery(), FILTER) - .add(Queries.newNonNestedFilter(), FILTER) - .build(); - assertThat(searchFilter, equalTo(expectedQuery)); - - searchFilter = DefaultSearchContext.createSearchFilter(null, new MatchAllDocsQuery(), false); - expectedQuery = new BooleanQuery.Builder() - .add(new MatchAllDocsQuery(), FILTER) - .build(); - assertThat(searchFilter, equalTo(expectedQuery)); - } - -} diff --git a/core/src/test/java/org/elasticsearch/search/nested/SimpleNestedIT.java b/core/src/test/java/org/elasticsearch/search/nested/SimpleNestedIT.java index 718893b42db..47164688ea5 100644 --- a/core/src/test/java/org/elasticsearch/search/nested/SimpleNestedIT.java +++ b/core/src/test/java/org/elasticsearch/search/nested/SimpleNestedIT.java @@ -318,7 +318,7 @@ public class SimpleNestedIT extends ESIntegTestCase { assertThat(searchResponse.getHits().getTotalHits(), equalTo(1L)); Explanation explanation = searchResponse.getHits().getHits()[0].getExplanation(); assertThat(explanation.getValue(), equalTo(searchResponse.getHits().getHits()[0].getScore())); - assertThat(explanation.toString(), startsWith("0.36464313 = sum of:\n 0.36464313 = Score based on 2 child docs in range from 0 to 1")); + assertThat(explanation.toString(), startsWith("0.36464313 = Score based on 2 child docs in range from 0 to 1")); } public void testSimpleNestedSorting() throws Exception { diff --git a/docs/reference/search/explain.asciidoc b/docs/reference/search/explain.asciidoc index 7185ddbb9b5..61a8ac641c0 100644 --- a/docs/reference/search/explain.asciidoc +++ b/docs/reference/search/explain.asciidoc @@ -36,88 +36,55 @@ This will yield the following result: "matched": true, "explanation": { "value": 1.55077, - "description": "sum of:", + "description": "weight(message:elasticsearch in 0) [PerFieldSimilarity], result of:", "details": [ { "value": 1.55077, - "description": "weight(message:elasticsearch in 0) [PerFieldSimilarity], result of:", + "description": "score(doc=0,freq=1.0 = termFreq=1.0\n), product of:", "details": [ { - "value": 1.55077, - "description": "score(doc=0,freq=1.0 = termFreq=1.0\n), product of:", - "details": [ - { - "value": 1.3862944, - "description": "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:", - "details": [ - { - "value": 1.0, - "description": "docFreq", - "details": [] - }, - { - "value": 5.0, - "description": "docCount", - "details": [] - } - ] - }, - { - "value": 1.1186441, - "description": "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:", - "details": [ - { - "value": 1.0, - "description": "termFreq=1.0", - "details": [] - }, - { - "value": 1.2, - "description": "parameter k1", - "details": [] - }, - { - "value": 0.75, - "description": "parameter b", - "details": [] - }, - { - "value": 5.4, - "description": "avgFieldLength", - "details": [] - }, - { - "value": 4.0, - "description": "fieldLength", - "details": [] - } - ] - } - ] - } - ] - }, - { - "value": 0.0, - "description": "match on required clause, product of:", - "details": [ - { - "value": 0.0, - "description": "# clause", - "details": [] - }, - { - "value": 1.0, - "description": "*:*, product of:", + "value": 1.3862944, + "description": "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:", "details": [ { "value": 1.0, - "description": "boost", + "description": "docFreq", "details": [] }, + { + "value": 5.0, + "description": "docCount", + "details": [] + } + ] + }, + { + "value": 1.1186441, + "description": "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:", + "details": [ { "value": 1.0, - "description": "queryNorm", + "description": "termFreq=1.0", + "details": [] + }, + { + "value": 1.2, + "description": "parameter k1", + "details": [] + }, + { + "value": 0.75, + "description": "parameter b", + "details": [] + }, + { + "value": 5.4, + "description": "avgFieldLength", + "details": [] + }, + { + "value": 4.0, + "description": "fieldLength", "details": [] } ] diff --git a/test/framework/src/main/java/org/elasticsearch/test/TestSearchContext.java b/test/framework/src/main/java/org/elasticsearch/test/TestSearchContext.java index bec80036230..f77414e7d50 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/TestSearchContext.java +++ b/test/framework/src/main/java/org/elasticsearch/test/TestSearchContext.java @@ -115,7 +115,7 @@ public class TestSearchContext extends SearchContext { } @Override - public Query searchFilter(String[] types) { + public Query buildFilteredQuery(Query query) { return null; }