Fail `span_multi` queries that exceeds boolean max clause limit (#30913)
By default span_multi query will limit term expansions = boolean max clause. This will limit high heap usage in case of high cardinality term expansions. This applies only if top_terms_N is not used in inner multi query.
This commit is contained in:
parent
b30aa3137d
commit
75a676c70b
|
@ -37,10 +37,9 @@ GET /_search
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
|
||||||
WARNING: By default `span_multi queries are rewritten to a `span_or` query
|
WARNING: `span_multi` queries will hit too many clauses failure if the number of terms that match the query exceeds the
|
||||||
containing **all** the expanded terms. This can be expensive if the number of expanded
|
boolean query limit (defaults to 1024).To avoid an unbounded expansion you can set the <<query-dsl-multi-term-rewrite,
|
||||||
terms is large. To avoid an unbounded expansion you can set the
|
rewrite method>> of the multi term query to `top_terms_*` rewrite. Or, if you use `span_multi` on `prefix` query only,
|
||||||
<<query-dsl-multi-term-rewrite,rewrite method>> of the multi term query to `top_terms_*`
|
you can activate the <<index-prefix-config,`index_prefixes`>> field option of the `text` field instead. This will
|
||||||
rewrite. Or, if you use `span_multi` on `prefix` query only, you can
|
|
||||||
activate the <<index-prefix-config,`index_prefixes`>> field option of the `text` field instead. This will
|
|
||||||
rewrite any prefix query on the field to a a single term query that matches the indexed prefix.
|
rewrite any prefix query on the field to a a single term query that matches the indexed prefix.
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,9 @@
|
||||||
package org.elasticsearch.index.query;
|
package org.elasticsearch.index.query;
|
||||||
|
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.TermContext;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.BoostQuery;
|
import org.apache.lucene.search.BoostQuery;
|
||||||
import org.apache.lucene.search.ConstantScoreQuery;
|
import org.apache.lucene.search.ConstantScoreQuery;
|
||||||
import org.apache.lucene.search.MultiTermQuery;
|
import org.apache.lucene.search.MultiTermQuery;
|
||||||
|
@ -26,11 +29,15 @@ import org.apache.lucene.search.PrefixQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
|
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
|
||||||
|
import org.apache.lucene.search.ScoringRewrite;
|
||||||
|
import org.apache.lucene.search.TopTermsRewrite;
|
||||||
import org.apache.lucene.search.spans.SpanBoostQuery;
|
import org.apache.lucene.search.spans.SpanBoostQuery;
|
||||||
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
|
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
|
||||||
|
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||||
import org.apache.lucene.search.spans.SpanQuery;
|
import org.apache.lucene.search.spans.SpanQuery;
|
||||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
import org.elasticsearch.Version;
|
import org.elasticsearch.Version;
|
||||||
|
import org.elasticsearch.ElasticsearchException;
|
||||||
import org.elasticsearch.common.ParseField;
|
import org.elasticsearch.common.ParseField;
|
||||||
import org.elasticsearch.common.ParsingException;
|
import org.elasticsearch.common.ParsingException;
|
||||||
import org.elasticsearch.common.io.stream.StreamInput;
|
import org.elasticsearch.common.io.stream.StreamInput;
|
||||||
|
@ -42,6 +49,8 @@ import org.elasticsearch.index.mapper.TextFieldMapper;
|
||||||
import org.elasticsearch.index.query.support.QueryParsers;
|
import org.elasticsearch.index.query.support.QueryParsers;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -52,9 +61,7 @@ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTer
|
||||||
implements SpanQueryBuilder {
|
implements SpanQueryBuilder {
|
||||||
|
|
||||||
public static final String NAME = "span_multi";
|
public static final String NAME = "span_multi";
|
||||||
|
|
||||||
private static final ParseField MATCH_FIELD = new ParseField("match");
|
private static final ParseField MATCH_FIELD = new ParseField("match");
|
||||||
|
|
||||||
private final MultiTermQueryBuilder multiTermQueryBuilder;
|
private final MultiTermQueryBuilder multiTermQueryBuilder;
|
||||||
|
|
||||||
public SpanMultiTermQueryBuilder(MultiTermQueryBuilder multiTermQueryBuilder) {
|
public SpanMultiTermQueryBuilder(MultiTermQueryBuilder multiTermQueryBuilder) {
|
||||||
|
@ -130,6 +137,49 @@ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTer
|
||||||
return new SpanMultiTermQueryBuilder(subQuery).queryName(queryName).boost(boost);
|
return new SpanMultiTermQueryBuilder(subQuery).queryName(queryName).boost(boost);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static class TopTermSpanBooleanQueryRewriteWithMaxClause extends SpanMultiTermQueryWrapper.SpanRewriteMethod {
|
||||||
|
|
||||||
|
private MultiTermQuery multiTermQuery;
|
||||||
|
private final long maxExpansions;
|
||||||
|
|
||||||
|
TopTermSpanBooleanQueryRewriteWithMaxClause(long max) {
|
||||||
|
maxExpansions = max;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SpanQuery rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
|
||||||
|
multiTermQuery = query;
|
||||||
|
return (SpanQuery) this.delegate.rewrite(reader, multiTermQuery);
|
||||||
|
}
|
||||||
|
|
||||||
|
final ScoringRewrite<List<SpanQuery>> delegate = new ScoringRewrite<List<SpanQuery>>() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<SpanQuery> getTopLevelBuilder() {
|
||||||
|
return new ArrayList();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Query build(List<SpanQuery> builder) {
|
||||||
|
return new SpanOrQuery((SpanQuery[]) builder.toArray(new SpanQuery[builder.size()]));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void checkMaxClauseCount(int count) {
|
||||||
|
if (count > maxExpansions) {
|
||||||
|
throw new ElasticsearchException("[" + multiTermQuery.toString() + " ] " +
|
||||||
|
"exceeds maxClauseCount [ Boolean maxClauseCount is set to " + BooleanQuery.getMaxClauseCount() + "]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void addClause(List<SpanQuery> topLevel, Term term, int docCount, float boost, TermContext states) {
|
||||||
|
SpanTermQuery q = new SpanTermQuery(term, states);
|
||||||
|
topLevel.add(q);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Query doToQuery(QueryShardContext context) throws IOException {
|
protected Query doToQuery(QueryShardContext context) throws IOException {
|
||||||
Query subQuery = multiTermQueryBuilder.toQuery(context);
|
Query subQuery = multiTermQueryBuilder.toQuery(context);
|
||||||
|
@ -190,10 +240,15 @@ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTer
|
||||||
+ MultiTermQuery.class.getName() + " but was " + subQuery.getClass().getName());
|
+ MultiTermQuery.class.getName() + " but was " + subQuery.getClass().getName());
|
||||||
}
|
}
|
||||||
spanQuery = new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery);
|
spanQuery = new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery);
|
||||||
|
if (((MultiTermQuery) subQuery).getRewriteMethod() instanceof TopTermsRewrite == false) {
|
||||||
|
((SpanMultiTermQueryWrapper<MultiTermQuery>) spanQuery).setRewriteMethod(new
|
||||||
|
TopTermSpanBooleanQueryRewriteWithMaxClause(BooleanQuery.getMaxClauseCount()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (boost != AbstractQueryBuilder.DEFAULT_BOOST) {
|
if (boost != AbstractQueryBuilder.DEFAULT_BOOST) {
|
||||||
return new SpanBoostQuery(spanQuery, boost);
|
return new SpanBoostQuery(spanQuery, boost);
|
||||||
}
|
}
|
||||||
|
|
||||||
return spanQuery;
|
return spanQuery;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -34,7 +34,6 @@ import org.elasticsearch.Version;
|
||||||
import org.elasticsearch.common.Strings;
|
import org.elasticsearch.common.Strings;
|
||||||
import org.elasticsearch.common.compress.CompressedXContent;
|
import org.elasticsearch.common.compress.CompressedXContent;
|
||||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
|
||||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
import org.elasticsearch.index.mapper.MapperService;
|
import org.elasticsearch.index.mapper.MapperService;
|
||||||
import org.elasticsearch.search.internal.SearchContext;
|
import org.elasticsearch.search.internal.SearchContext;
|
||||||
|
@ -238,4 +237,38 @@ public class SpanMultiTermQueryBuilderTests extends AbstractQueryTestCase<SpanMu
|
||||||
assertEquals(json, "ki", ((PrefixQueryBuilder) parsed.innerQuery()).value());
|
assertEquals(json, "ki", ((PrefixQueryBuilder) parsed.innerQuery()).value());
|
||||||
assertEquals(json, 1.08, parsed.innerQuery().boost(), 0.0001);
|
assertEquals(json, 1.08, parsed.innerQuery().boost(), 0.0001);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testDefaultMaxRewriteBuilder() throws Exception {
|
||||||
|
Query query = QueryBuilders.spanMultiTermQueryBuilder(QueryBuilders.prefixQuery("foo", "b")).
|
||||||
|
toQuery(createShardContext());
|
||||||
|
|
||||||
|
if (query instanceof SpanBoostQuery) {
|
||||||
|
query = ((SpanBoostQuery)query).getQuery();
|
||||||
|
}
|
||||||
|
|
||||||
|
assertTrue(query instanceof SpanMultiTermQueryWrapper);
|
||||||
|
if (query instanceof SpanMultiTermQueryWrapper) {
|
||||||
|
MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper)query).getRewriteMethod();
|
||||||
|
assertTrue(rewriteMethod instanceof SpanMultiTermQueryBuilder.TopTermSpanBooleanQueryRewriteWithMaxClause);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTopNMultiTermsRewriteInsideSpan() throws Exception {
|
||||||
|
|
||||||
|
Query query = QueryBuilders.spanMultiTermQueryBuilder(QueryBuilders.prefixQuery("foo", "b").rewrite
|
||||||
|
("top_terms_boost_2000")).
|
||||||
|
toQuery(createShardContext());
|
||||||
|
|
||||||
|
if (query instanceof SpanBoostQuery) {
|
||||||
|
query = ((SpanBoostQuery)query).getQuery();
|
||||||
|
}
|
||||||
|
|
||||||
|
assertTrue(query instanceof SpanMultiTermQueryWrapper);
|
||||||
|
if (query instanceof SpanMultiTermQueryWrapper) {
|
||||||
|
MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper)query).getRewriteMethod();
|
||||||
|
assertFalse(rewriteMethod instanceof SpanMultiTermQueryBuilder.TopTermSpanBooleanQueryRewriteWithMaxClause);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,9 @@
|
||||||
|
|
||||||
package org.elasticsearch.search.query;
|
package org.elasticsearch.search.query;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.util.English;
|
import org.apache.lucene.util.English;
|
||||||
|
import org.elasticsearch.ElasticsearchException;
|
||||||
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
|
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
|
||||||
import org.elasticsearch.action.index.IndexRequestBuilder;
|
import org.elasticsearch.action.index.IndexRequestBuilder;
|
||||||
import org.elasticsearch.action.search.SearchPhaseExecutionException;
|
import org.elasticsearch.action.search.SearchPhaseExecutionException;
|
||||||
|
@ -33,8 +35,12 @@ import org.elasticsearch.index.query.BoolQueryBuilder;
|
||||||
import org.elasticsearch.index.query.MatchQueryBuilder;
|
import org.elasticsearch.index.query.MatchQueryBuilder;
|
||||||
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
|
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
|
||||||
import org.elasticsearch.index.query.Operator;
|
import org.elasticsearch.index.query.Operator;
|
||||||
|
import org.elasticsearch.index.query.QueryBuilder;
|
||||||
import org.elasticsearch.index.query.QueryBuilders;
|
import org.elasticsearch.index.query.QueryBuilders;
|
||||||
import org.elasticsearch.index.query.RangeQueryBuilder;
|
import org.elasticsearch.index.query.RangeQueryBuilder;
|
||||||
|
import org.elasticsearch.index.query.SpanMultiTermQueryBuilder;
|
||||||
|
import org.elasticsearch.index.query.SpanNearQueryBuilder;
|
||||||
|
import org.elasticsearch.index.query.SpanTermQueryBuilder;
|
||||||
import org.elasticsearch.index.query.TermQueryBuilder;
|
import org.elasticsearch.index.query.TermQueryBuilder;
|
||||||
import org.elasticsearch.index.query.WrapperQueryBuilder;
|
import org.elasticsearch.index.query.WrapperQueryBuilder;
|
||||||
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
|
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
|
||||||
|
@ -52,6 +58,7 @@ import org.joda.time.DateTimeZone;
|
||||||
import org.joda.time.format.ISODateTimeFormat;
|
import org.joda.time.format.ISODateTimeFormat;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
@ -1819,4 +1826,30 @@ public class SearchQueryIT extends ESIntegTestCase {
|
||||||
assertHitCount(searchResponse, 1);
|
assertHitCount(searchResponse, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testTermExpansionExceptionOnSpanFailure() throws ExecutionException, InterruptedException {
|
||||||
|
Settings.Builder builder = Settings.builder();
|
||||||
|
builder.put(SETTING_NUMBER_OF_SHARDS, 1).build();
|
||||||
|
|
||||||
|
createIndex("test", builder.build());
|
||||||
|
ArrayList<IndexRequestBuilder> reqs = new ArrayList<>();
|
||||||
|
int origBoolMaxClauseCount = BooleanQuery.getMaxClauseCount();
|
||||||
|
try {
|
||||||
|
BooleanQuery.setMaxClauseCount(2);
|
||||||
|
for (int i = 0; i < BooleanQuery.getMaxClauseCount() + 1; i++) {
|
||||||
|
reqs.add(client().prepareIndex("test", "_doc", Integer.toString(i)).setSource("body", "foo" +
|
||||||
|
Integer.toString(i) + " bar baz"));
|
||||||
|
}
|
||||||
|
indexRandom(true, false, reqs);
|
||||||
|
|
||||||
|
QueryBuilder queryBuilder = new SpanNearQueryBuilder(new SpanMultiTermQueryBuilder(QueryBuilders.wildcardQuery
|
||||||
|
("body", "f*")), 0).addClause(new SpanTermQueryBuilder("body", "bar"));
|
||||||
|
|
||||||
|
expectThrows(ElasticsearchException.class, () ->
|
||||||
|
client().prepareSearch().setIndices("test").setQuery(queryBuilder).get());
|
||||||
|
} finally {
|
||||||
|
BooleanQuery.setMaxClauseCount(origBoolMaxClauseCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue