Fail `span_multi` queries that exceeds boolean max clause limit (#30913)

By default span_multi query will limit term expansions = boolean max clause.
This will limit high heap usage in case of high cardinality term
expansions. This applies only if top_terms_N is not used in inner multi
query.
This commit is contained in:
Nirmal Chidambaram 2018-06-07 07:34:39 +00:00 committed by Jim Ferenczi
parent b30aa3137d
commit 75a676c70b
4 changed files with 134 additions and 14 deletions

View File

@ -37,10 +37,9 @@ GET /_search
--------------------------------------------------
// CONSOLE
WARNING: By default `span_multi queries are rewritten to a `span_or` query
containing **all** the expanded terms. This can be expensive if the number of expanded
terms is large. To avoid an unbounded expansion you can set the
<<query-dsl-multi-term-rewrite,rewrite method>> of the multi term query to `top_terms_*`
rewrite. Or, if you use `span_multi` on `prefix` query only, you can
activate the <<index-prefix-config,`index_prefixes`>> field option of the `text` field instead. This will
rewrite any prefix query on the field to a a single term query that matches the indexed prefix.
WARNING: `span_multi` queries will hit too many clauses failure if the number of terms that match the query exceeds the
boolean query limit (defaults to 1024).To avoid an unbounded expansion you can set the <<query-dsl-multi-term-rewrite,
rewrite method>> of the multi term query to `top_terms_*` rewrite. Or, if you use `span_multi` on `prefix` query only,
you can activate the <<index-prefix-config,`index_prefixes`>> field option of the `text` field instead. This will
rewrite any prefix query on the field to a a single term query that matches the indexed prefix.

View File

@ -19,6 +19,9 @@
package org.elasticsearch.index.query;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MultiTermQuery;
@ -26,11 +29,15 @@ import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.ScoringRewrite;
import org.apache.lucene.search.TopTermsRewrite;
import org.apache.lucene.search.spans.SpanBoostQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.elasticsearch.Version;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.io.stream.StreamInput;
@ -42,6 +49,8 @@ import org.elasticsearch.index.mapper.TextFieldMapper;
import org.elasticsearch.index.query.support.QueryParsers;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
/**
@ -49,12 +58,10 @@ import java.util.Objects;
* as a {@link SpanQueryBuilder} so it can be nested.
*/
public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTermQueryBuilder>
implements SpanQueryBuilder {
implements SpanQueryBuilder {
public static final String NAME = "span_multi";
private static final ParseField MATCH_FIELD = new ParseField("match");
private final MultiTermQueryBuilder multiTermQueryBuilder;
public SpanMultiTermQueryBuilder(MultiTermQueryBuilder multiTermQueryBuilder) {
@ -83,7 +90,7 @@ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTer
@Override
protected void doXContent(XContentBuilder builder, Params params)
throws IOException {
throws IOException {
builder.startObject(NAME);
builder.field(MATCH_FIELD.getPreferredName());
multiTermQueryBuilder.toXContent(builder, params);
@ -105,7 +112,7 @@ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTer
QueryBuilder query = parseInnerQueryBuilder(parser);
if (query instanceof MultiTermQueryBuilder == false) {
throw new ParsingException(parser.getTokenLocation(),
"[span_multi] [" + MATCH_FIELD.getPreferredName() + "] must be of type multi term query");
"[span_multi] [" + MATCH_FIELD.getPreferredName() + "] must be of type multi term query");
}
subQuery = (MultiTermQueryBuilder) query;
} else {
@ -124,12 +131,55 @@ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTer
if (subQuery == null) {
throw new ParsingException(parser.getTokenLocation(),
"[span_multi] must have [" + MATCH_FIELD.getPreferredName() + "] multi term query clause");
"[span_multi] must have [" + MATCH_FIELD.getPreferredName() + "] multi term query clause");
}
return new SpanMultiTermQueryBuilder(subQuery).queryName(queryName).boost(boost);
}
public static class TopTermSpanBooleanQueryRewriteWithMaxClause extends SpanMultiTermQueryWrapper.SpanRewriteMethod {
private MultiTermQuery multiTermQuery;
private final long maxExpansions;
TopTermSpanBooleanQueryRewriteWithMaxClause(long max) {
maxExpansions = max;
}
@Override
public SpanQuery rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
multiTermQuery = query;
return (SpanQuery) this.delegate.rewrite(reader, multiTermQuery);
}
final ScoringRewrite<List<SpanQuery>> delegate = new ScoringRewrite<List<SpanQuery>>() {
@Override
protected List<SpanQuery> getTopLevelBuilder() {
return new ArrayList();
}
@Override
protected Query build(List<SpanQuery> builder) {
return new SpanOrQuery((SpanQuery[]) builder.toArray(new SpanQuery[builder.size()]));
}
@Override
protected void checkMaxClauseCount(int count) {
if (count > maxExpansions) {
throw new ElasticsearchException("[" + multiTermQuery.toString() + " ] " +
"exceeds maxClauseCount [ Boolean maxClauseCount is set to " + BooleanQuery.getMaxClauseCount() + "]");
}
}
@Override
protected void addClause(List<SpanQuery> topLevel, Term term, int docCount, float boost, TermContext states) {
SpanTermQuery q = new SpanTermQuery(term, states);
topLevel.add(q);
}
};
}
@Override
protected Query doToQuery(QueryShardContext context) throws IOException {
Query subQuery = multiTermQueryBuilder.toQuery(context);
@ -190,10 +240,15 @@ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTer
+ MultiTermQuery.class.getName() + " but was " + subQuery.getClass().getName());
}
spanQuery = new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery);
if (((MultiTermQuery) subQuery).getRewriteMethod() instanceof TopTermsRewrite == false) {
((SpanMultiTermQueryWrapper<MultiTermQuery>) spanQuery).setRewriteMethod(new
TopTermSpanBooleanQueryRewriteWithMaxClause(BooleanQuery.getMaxClauseCount()));
}
}
if (boost != AbstractQueryBuilder.DEFAULT_BOOST) {
return new SpanBoostQuery(spanQuery, boost);
}
return spanQuery;
}

View File

@ -34,7 +34,6 @@ import org.elasticsearch.Version;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.search.internal.SearchContext;
@ -238,4 +237,38 @@ public class SpanMultiTermQueryBuilderTests extends AbstractQueryTestCase<SpanMu
assertEquals(json, "ki", ((PrefixQueryBuilder) parsed.innerQuery()).value());
assertEquals(json, 1.08, parsed.innerQuery().boost(), 0.0001);
}
public void testDefaultMaxRewriteBuilder() throws Exception {
Query query = QueryBuilders.spanMultiTermQueryBuilder(QueryBuilders.prefixQuery("foo", "b")).
toQuery(createShardContext());
if (query instanceof SpanBoostQuery) {
query = ((SpanBoostQuery)query).getQuery();
}
assertTrue(query instanceof SpanMultiTermQueryWrapper);
if (query instanceof SpanMultiTermQueryWrapper) {
MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper)query).getRewriteMethod();
assertTrue(rewriteMethod instanceof SpanMultiTermQueryBuilder.TopTermSpanBooleanQueryRewriteWithMaxClause);
}
}
public void testTopNMultiTermsRewriteInsideSpan() throws Exception {
Query query = QueryBuilders.spanMultiTermQueryBuilder(QueryBuilders.prefixQuery("foo", "b").rewrite
("top_terms_boost_2000")).
toQuery(createShardContext());
if (query instanceof SpanBoostQuery) {
query = ((SpanBoostQuery)query).getQuery();
}
assertTrue(query instanceof SpanMultiTermQueryWrapper);
if (query instanceof SpanMultiTermQueryWrapper) {
MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper)query).getRewriteMethod();
assertFalse(rewriteMethod instanceof SpanMultiTermQueryBuilder.TopTermSpanBooleanQueryRewriteWithMaxClause);
}
}
}

View File

@ -19,7 +19,9 @@
package org.elasticsearch.search.query;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.util.English;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
@ -33,8 +35,12 @@ import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.index.query.Operator;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.RangeQueryBuilder;
import org.elasticsearch.index.query.SpanMultiTermQueryBuilder;
import org.elasticsearch.index.query.SpanNearQueryBuilder;
import org.elasticsearch.index.query.SpanTermQueryBuilder;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.index.query.WrapperQueryBuilder;
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
@ -52,6 +58,7 @@ import org.joda.time.DateTimeZone;
import org.joda.time.format.ISODateTimeFormat;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Random;
@ -1819,4 +1826,30 @@ public class SearchQueryIT extends ESIntegTestCase {
assertHitCount(searchResponse, 1);
}
public void testTermExpansionExceptionOnSpanFailure() throws ExecutionException, InterruptedException {
Settings.Builder builder = Settings.builder();
builder.put(SETTING_NUMBER_OF_SHARDS, 1).build();
createIndex("test", builder.build());
ArrayList<IndexRequestBuilder> reqs = new ArrayList<>();
int origBoolMaxClauseCount = BooleanQuery.getMaxClauseCount();
try {
BooleanQuery.setMaxClauseCount(2);
for (int i = 0; i < BooleanQuery.getMaxClauseCount() + 1; i++) {
reqs.add(client().prepareIndex("test", "_doc", Integer.toString(i)).setSource("body", "foo" +
Integer.toString(i) + " bar baz"));
}
indexRandom(true, false, reqs);
QueryBuilder queryBuilder = new SpanNearQueryBuilder(new SpanMultiTermQueryBuilder(QueryBuilders.wildcardQuery
("body", "f*")), 0).addClause(new SpanTermQueryBuilder("body", "bar"));
expectThrows(ElasticsearchException.class, () ->
client().prepareSearch().setIndices("test").setQuery(queryBuilder).get());
} finally {
BooleanQuery.setMaxClauseCount(origBoolMaxClauseCount);
}
}
}