Fail `span_multi` queries that exceeds boolean max clause limit (#30913)
By default span_multi query will limit term expansions = boolean max clause. This will limit high heap usage in case of high cardinality term expansions. This applies only if top_terms_N is not used in inner multi query.
This commit is contained in:
parent
b30aa3137d
commit
75a676c70b
|
@ -37,10 +37,9 @@ GET /_search
|
|||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
WARNING: By default `span_multi queries are rewritten to a `span_or` query
|
||||
containing **all** the expanded terms. This can be expensive if the number of expanded
|
||||
terms is large. To avoid an unbounded expansion you can set the
|
||||
<<query-dsl-multi-term-rewrite,rewrite method>> of the multi term query to `top_terms_*`
|
||||
rewrite. Or, if you use `span_multi` on `prefix` query only, you can
|
||||
activate the <<index-prefix-config,`index_prefixes`>> field option of the `text` field instead. This will
|
||||
rewrite any prefix query on the field to a a single term query that matches the indexed prefix.
|
||||
WARNING: `span_multi` queries will hit too many clauses failure if the number of terms that match the query exceeds the
|
||||
boolean query limit (defaults to 1024).To avoid an unbounded expansion you can set the <<query-dsl-multi-term-rewrite,
|
||||
rewrite method>> of the multi term query to `top_terms_*` rewrite. Or, if you use `span_multi` on `prefix` query only,
|
||||
you can activate the <<index-prefix-config,`index_prefixes`>> field option of the `text` field instead. This will
|
||||
rewrite any prefix query on the field to a a single term query that matches the indexed prefix.
|
||||
|
||||
|
|
|
@ -19,6 +19,9 @@
|
|||
package org.elasticsearch.index.query;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
|
@ -26,11 +29,15 @@ import org.apache.lucene.search.PrefixQuery;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
|
||||
import org.apache.lucene.search.ScoringRewrite;
|
||||
import org.apache.lucene.search.TopTermsRewrite;
|
||||
import org.apache.lucene.search.spans.SpanBoostQuery;
|
||||
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
|
||||
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.ParsingException;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
|
@ -42,6 +49,8 @@ import org.elasticsearch.index.mapper.TextFieldMapper;
|
|||
import org.elasticsearch.index.query.support.QueryParsers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
|
@ -49,12 +58,10 @@ import java.util.Objects;
|
|||
* as a {@link SpanQueryBuilder} so it can be nested.
|
||||
*/
|
||||
public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTermQueryBuilder>
|
||||
implements SpanQueryBuilder {
|
||||
implements SpanQueryBuilder {
|
||||
|
||||
public static final String NAME = "span_multi";
|
||||
|
||||
private static final ParseField MATCH_FIELD = new ParseField("match");
|
||||
|
||||
private final MultiTermQueryBuilder multiTermQueryBuilder;
|
||||
|
||||
public SpanMultiTermQueryBuilder(MultiTermQueryBuilder multiTermQueryBuilder) {
|
||||
|
@ -83,7 +90,7 @@ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTer
|
|||
|
||||
@Override
|
||||
protected void doXContent(XContentBuilder builder, Params params)
|
||||
throws IOException {
|
||||
throws IOException {
|
||||
builder.startObject(NAME);
|
||||
builder.field(MATCH_FIELD.getPreferredName());
|
||||
multiTermQueryBuilder.toXContent(builder, params);
|
||||
|
@ -105,7 +112,7 @@ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTer
|
|||
QueryBuilder query = parseInnerQueryBuilder(parser);
|
||||
if (query instanceof MultiTermQueryBuilder == false) {
|
||||
throw new ParsingException(parser.getTokenLocation(),
|
||||
"[span_multi] [" + MATCH_FIELD.getPreferredName() + "] must be of type multi term query");
|
||||
"[span_multi] [" + MATCH_FIELD.getPreferredName() + "] must be of type multi term query");
|
||||
}
|
||||
subQuery = (MultiTermQueryBuilder) query;
|
||||
} else {
|
||||
|
@ -124,12 +131,55 @@ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTer
|
|||
|
||||
if (subQuery == null) {
|
||||
throw new ParsingException(parser.getTokenLocation(),
|
||||
"[span_multi] must have [" + MATCH_FIELD.getPreferredName() + "] multi term query clause");
|
||||
"[span_multi] must have [" + MATCH_FIELD.getPreferredName() + "] multi term query clause");
|
||||
}
|
||||
|
||||
return new SpanMultiTermQueryBuilder(subQuery).queryName(queryName).boost(boost);
|
||||
}
|
||||
|
||||
public static class TopTermSpanBooleanQueryRewriteWithMaxClause extends SpanMultiTermQueryWrapper.SpanRewriteMethod {
|
||||
|
||||
private MultiTermQuery multiTermQuery;
|
||||
private final long maxExpansions;
|
||||
|
||||
TopTermSpanBooleanQueryRewriteWithMaxClause(long max) {
|
||||
maxExpansions = max;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SpanQuery rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
|
||||
multiTermQuery = query;
|
||||
return (SpanQuery) this.delegate.rewrite(reader, multiTermQuery);
|
||||
}
|
||||
|
||||
final ScoringRewrite<List<SpanQuery>> delegate = new ScoringRewrite<List<SpanQuery>>() {
|
||||
|
||||
@Override
|
||||
protected List<SpanQuery> getTopLevelBuilder() {
|
||||
return new ArrayList();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query build(List<SpanQuery> builder) {
|
||||
return new SpanOrQuery((SpanQuery[]) builder.toArray(new SpanQuery[builder.size()]));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void checkMaxClauseCount(int count) {
|
||||
if (count > maxExpansions) {
|
||||
throw new ElasticsearchException("[" + multiTermQuery.toString() + " ] " +
|
||||
"exceeds maxClauseCount [ Boolean maxClauseCount is set to " + BooleanQuery.getMaxClauseCount() + "]");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void addClause(List<SpanQuery> topLevel, Term term, int docCount, float boost, TermContext states) {
|
||||
SpanTermQuery q = new SpanTermQuery(term, states);
|
||||
topLevel.add(q);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query doToQuery(QueryShardContext context) throws IOException {
|
||||
Query subQuery = multiTermQueryBuilder.toQuery(context);
|
||||
|
@ -190,10 +240,15 @@ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTer
|
|||
+ MultiTermQuery.class.getName() + " but was " + subQuery.getClass().getName());
|
||||
}
|
||||
spanQuery = new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery);
|
||||
if (((MultiTermQuery) subQuery).getRewriteMethod() instanceof TopTermsRewrite == false) {
|
||||
((SpanMultiTermQueryWrapper<MultiTermQuery>) spanQuery).setRewriteMethod(new
|
||||
TopTermSpanBooleanQueryRewriteWithMaxClause(BooleanQuery.getMaxClauseCount()));
|
||||
}
|
||||
}
|
||||
if (boost != AbstractQueryBuilder.DEFAULT_BOOST) {
|
||||
return new SpanBoostQuery(spanQuery, boost);
|
||||
}
|
||||
|
||||
return spanQuery;
|
||||
}
|
||||
|
||||
|
|
|
@ -34,7 +34,6 @@ import org.elasticsearch.Version;
|
|||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.compress.CompressedXContent;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
|
@ -238,4 +237,38 @@ public class SpanMultiTermQueryBuilderTests extends AbstractQueryTestCase<SpanMu
|
|||
assertEquals(json, "ki", ((PrefixQueryBuilder) parsed.innerQuery()).value());
|
||||
assertEquals(json, 1.08, parsed.innerQuery().boost(), 0.0001);
|
||||
}
|
||||
|
||||
public void testDefaultMaxRewriteBuilder() throws Exception {
|
||||
Query query = QueryBuilders.spanMultiTermQueryBuilder(QueryBuilders.prefixQuery("foo", "b")).
|
||||
toQuery(createShardContext());
|
||||
|
||||
if (query instanceof SpanBoostQuery) {
|
||||
query = ((SpanBoostQuery)query).getQuery();
|
||||
}
|
||||
|
||||
assertTrue(query instanceof SpanMultiTermQueryWrapper);
|
||||
if (query instanceof SpanMultiTermQueryWrapper) {
|
||||
MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper)query).getRewriteMethod();
|
||||
assertTrue(rewriteMethod instanceof SpanMultiTermQueryBuilder.TopTermSpanBooleanQueryRewriteWithMaxClause);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void testTopNMultiTermsRewriteInsideSpan() throws Exception {
|
||||
|
||||
Query query = QueryBuilders.spanMultiTermQueryBuilder(QueryBuilders.prefixQuery("foo", "b").rewrite
|
||||
("top_terms_boost_2000")).
|
||||
toQuery(createShardContext());
|
||||
|
||||
if (query instanceof SpanBoostQuery) {
|
||||
query = ((SpanBoostQuery)query).getQuery();
|
||||
}
|
||||
|
||||
assertTrue(query instanceof SpanMultiTermQueryWrapper);
|
||||
if (query instanceof SpanMultiTermQueryWrapper) {
|
||||
MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper)query).getRewriteMethod();
|
||||
assertFalse(rewriteMethod instanceof SpanMultiTermQueryBuilder.TopTermSpanBooleanQueryRewriteWithMaxClause);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,7 +19,9 @@
|
|||
|
||||
package org.elasticsearch.search.query;
|
||||
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.util.English;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
|
||||
import org.elasticsearch.action.index.IndexRequestBuilder;
|
||||
import org.elasticsearch.action.search.SearchPhaseExecutionException;
|
||||
|
@ -33,8 +35,12 @@ import org.elasticsearch.index.query.BoolQueryBuilder;
|
|||
import org.elasticsearch.index.query.MatchQueryBuilder;
|
||||
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
|
||||
import org.elasticsearch.index.query.Operator;
|
||||
import org.elasticsearch.index.query.QueryBuilder;
|
||||
import org.elasticsearch.index.query.QueryBuilders;
|
||||
import org.elasticsearch.index.query.RangeQueryBuilder;
|
||||
import org.elasticsearch.index.query.SpanMultiTermQueryBuilder;
|
||||
import org.elasticsearch.index.query.SpanNearQueryBuilder;
|
||||
import org.elasticsearch.index.query.SpanTermQueryBuilder;
|
||||
import org.elasticsearch.index.query.TermQueryBuilder;
|
||||
import org.elasticsearch.index.query.WrapperQueryBuilder;
|
||||
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
|
||||
|
@ -52,6 +58,7 @@ import org.joda.time.DateTimeZone;
|
|||
import org.joda.time.format.ISODateTimeFormat;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Random;
|
||||
|
@ -1819,4 +1826,30 @@ public class SearchQueryIT extends ESIntegTestCase {
|
|||
assertHitCount(searchResponse, 1);
|
||||
}
|
||||
|
||||
public void testTermExpansionExceptionOnSpanFailure() throws ExecutionException, InterruptedException {
|
||||
Settings.Builder builder = Settings.builder();
|
||||
builder.put(SETTING_NUMBER_OF_SHARDS, 1).build();
|
||||
|
||||
createIndex("test", builder.build());
|
||||
ArrayList<IndexRequestBuilder> reqs = new ArrayList<>();
|
||||
int origBoolMaxClauseCount = BooleanQuery.getMaxClauseCount();
|
||||
try {
|
||||
BooleanQuery.setMaxClauseCount(2);
|
||||
for (int i = 0; i < BooleanQuery.getMaxClauseCount() + 1; i++) {
|
||||
reqs.add(client().prepareIndex("test", "_doc", Integer.toString(i)).setSource("body", "foo" +
|
||||
Integer.toString(i) + " bar baz"));
|
||||
}
|
||||
indexRandom(true, false, reqs);
|
||||
|
||||
QueryBuilder queryBuilder = new SpanNearQueryBuilder(new SpanMultiTermQueryBuilder(QueryBuilders.wildcardQuery
|
||||
("body", "f*")), 0).addClause(new SpanTermQueryBuilder("body", "bar"));
|
||||
|
||||
expectThrows(ElasticsearchException.class, () ->
|
||||
client().prepareSearch().setIndices("test").setQuery(queryBuilder).get());
|
||||
} finally {
|
||||
BooleanQuery.setMaxClauseCount(origBoolMaxClauseCount);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue