From 75a676c70bb4333bde6f6b1cd89940c82d8299ad Mon Sep 17 00:00:00 2001 From: Nirmal Chidambaram Date: Thu, 7 Jun 2018 07:34:39 +0000 Subject: [PATCH] Fail `span_multi` queries that exceeds boolean max clause limit (#30913) By default span_multi query will limit term expansions = boolean max clause. This will limit high heap usage in case of high cardinality term expansions. This applies only if top_terms_N is not used in inner multi query. --- .../query-dsl/span-multi-term-query.asciidoc | 13 ++-- .../query/SpanMultiTermQueryBuilder.java | 67 +++++++++++++++++-- .../query/SpanMultiTermQueryBuilderTests.java | 35 +++++++++- .../search/query/SearchQueryIT.java | 33 +++++++++ 4 files changed, 134 insertions(+), 14 deletions(-) diff --git a/docs/reference/query-dsl/span-multi-term-query.asciidoc b/docs/reference/query-dsl/span-multi-term-query.asciidoc index ff7af83451b..40bd1553298 100644 --- a/docs/reference/query-dsl/span-multi-term-query.asciidoc +++ b/docs/reference/query-dsl/span-multi-term-query.asciidoc @@ -37,10 +37,9 @@ GET /_search -------------------------------------------------- // CONSOLE -WARNING: By default `span_multi queries are rewritten to a `span_or` query -containing **all** the expanded terms. This can be expensive if the number of expanded -terms is large. To avoid an unbounded expansion you can set the -<> of the multi term query to `top_terms_*` -rewrite. Or, if you use `span_multi` on `prefix` query only, you can -activate the <> field option of the `text` field instead. This will -rewrite any prefix query on the field to a a single term query that matches the indexed prefix. \ No newline at end of file +WARNING: `span_multi` queries will hit too many clauses failure if the number of terms that match the query exceeds the +boolean query limit (defaults to 1024).To avoid an unbounded expansion you can set the <> of the multi term query to `top_terms_*` rewrite. Or, if you use `span_multi` on `prefix` query only, +you can activate the <> field option of the `text` field instead. This will +rewrite any prefix query on the field to a a single term query that matches the indexed prefix. + diff --git a/server/src/main/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilder.java index b574cadc423..0576a68a85b 100644 --- a/server/src/main/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilder.java @@ -19,6 +19,9 @@ package org.elasticsearch.index.query; import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.TermContext; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.MultiTermQuery; @@ -26,11 +29,15 @@ import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.spans.FieldMaskingSpanQuery; +import org.apache.lucene.search.ScoringRewrite; +import org.apache.lucene.search.TopTermsRewrite; import org.apache.lucene.search.spans.SpanBoostQuery; import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; +import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.elasticsearch.Version; +import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.io.stream.StreamInput; @@ -42,6 +49,8 @@ import org.elasticsearch.index.mapper.TextFieldMapper; import org.elasticsearch.index.query.support.QueryParsers; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import java.util.Objects; /** @@ -49,12 +58,10 @@ import java.util.Objects; * as a {@link SpanQueryBuilder} so it can be nested. */ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder - implements SpanQueryBuilder { + implements SpanQueryBuilder { public static final String NAME = "span_multi"; - private static final ParseField MATCH_FIELD = new ParseField("match"); - private final MultiTermQueryBuilder multiTermQueryBuilder; public SpanMultiTermQueryBuilder(MultiTermQueryBuilder multiTermQueryBuilder) { @@ -83,7 +90,7 @@ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder> delegate = new ScoringRewrite>() { + + @Override + protected List getTopLevelBuilder() { + return new ArrayList(); + } + + @Override + protected Query build(List builder) { + return new SpanOrQuery((SpanQuery[]) builder.toArray(new SpanQuery[builder.size()])); + } + + @Override + protected void checkMaxClauseCount(int count) { + if (count > maxExpansions) { + throw new ElasticsearchException("[" + multiTermQuery.toString() + " ] " + + "exceeds maxClauseCount [ Boolean maxClauseCount is set to " + BooleanQuery.getMaxClauseCount() + "]"); + } + } + + @Override + protected void addClause(List topLevel, Term term, int docCount, float boost, TermContext states) { + SpanTermQuery q = new SpanTermQuery(term, states); + topLevel.add(q); + } + }; + } + @Override protected Query doToQuery(QueryShardContext context) throws IOException { Query subQuery = multiTermQueryBuilder.toQuery(context); @@ -190,10 +240,15 @@ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder((MultiTermQuery) subQuery); + if (((MultiTermQuery) subQuery).getRewriteMethod() instanceof TopTermsRewrite == false) { + ((SpanMultiTermQueryWrapper) spanQuery).setRewriteMethod(new + TopTermSpanBooleanQueryRewriteWithMaxClause(BooleanQuery.getMaxClauseCount())); + } } if (boost != AbstractQueryBuilder.DEFAULT_BOOST) { return new SpanBoostQuery(spanQuery, boost); } + return spanQuery; } diff --git a/server/src/test/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilderTests.java index b7781682359..fae66dd68bc 100644 --- a/server/src/test/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilderTests.java @@ -34,7 +34,6 @@ import org.elasticsearch.Version; import org.elasticsearch.common.Strings; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.search.internal.SearchContext; @@ -238,4 +237,38 @@ public class SpanMultiTermQueryBuilderTests extends AbstractQueryTestCase reqs = new ArrayList<>(); + int origBoolMaxClauseCount = BooleanQuery.getMaxClauseCount(); + try { + BooleanQuery.setMaxClauseCount(2); + for (int i = 0; i < BooleanQuery.getMaxClauseCount() + 1; i++) { + reqs.add(client().prepareIndex("test", "_doc", Integer.toString(i)).setSource("body", "foo" + + Integer.toString(i) + " bar baz")); + } + indexRandom(true, false, reqs); + + QueryBuilder queryBuilder = new SpanNearQueryBuilder(new SpanMultiTermQueryBuilder(QueryBuilders.wildcardQuery + ("body", "f*")), 0).addClause(new SpanTermQueryBuilder("body", "bar")); + + expectThrows(ElasticsearchException.class, () -> + client().prepareSearch().setIndices("test").setQuery(queryBuilder).get()); + } finally { + BooleanQuery.setMaxClauseCount(origBoolMaxClauseCount); + } + + } + }