Fail `span_multi` queries that exceeds boolean max clause limit (#30913)

By default span_multi query will limit term expansions = boolean max clause.
This will limit high heap usage in case of high cardinality term
expansions. This applies only if top_terms_N is not used in inner multi
query.
This commit is contained in:
Nirmal Chidambaram 2018-06-07 07:34:39 +00:00 committed by Jim Ferenczi
parent b30aa3137d
commit 75a676c70b
4 changed files with 134 additions and 14 deletions

View File

@ -37,10 +37,9 @@ GET /_search
-------------------------------------------------- --------------------------------------------------
// CONSOLE // CONSOLE
WARNING: By default `span_multi queries are rewritten to a `span_or` query WARNING: `span_multi` queries will hit too many clauses failure if the number of terms that match the query exceeds the
containing **all** the expanded terms. This can be expensive if the number of expanded boolean query limit (defaults to 1024).To avoid an unbounded expansion you can set the <<query-dsl-multi-term-rewrite,
terms is large. To avoid an unbounded expansion you can set the rewrite method>> of the multi term query to `top_terms_*` rewrite. Or, if you use `span_multi` on `prefix` query only,
<<query-dsl-multi-term-rewrite,rewrite method>> of the multi term query to `top_terms_*` you can activate the <<index-prefix-config,`index_prefixes`>> field option of the `text` field instead. This will
rewrite. Or, if you use `span_multi` on `prefix` query only, you can rewrite any prefix query on the field to a a single term query that matches the indexed prefix.
activate the <<index-prefix-config,`index_prefixes`>> field option of the `text` field instead. This will
rewrite any prefix query on the field to a a single term query that matches the indexed prefix.

View File

@ -19,6 +19,9 @@
package org.elasticsearch.index.query; package org.elasticsearch.index.query;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.MultiTermQuery;
@ -26,11 +29,15 @@ import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery; import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.ScoringRewrite;
import org.apache.lucene.search.TopTermsRewrite;
import org.apache.lucene.search.spans.SpanBoostQuery; import org.apache.lucene.search.spans.SpanBoostQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.SpanTermQuery;
import org.elasticsearch.Version; import org.elasticsearch.Version;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamInput;
@ -42,6 +49,8 @@ import org.elasticsearch.index.mapper.TextFieldMapper;
import org.elasticsearch.index.query.support.QueryParsers; import org.elasticsearch.index.query.support.QueryParsers;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects; import java.util.Objects;
/** /**
@ -49,12 +58,10 @@ import java.util.Objects;
* as a {@link SpanQueryBuilder} so it can be nested. * as a {@link SpanQueryBuilder} so it can be nested.
*/ */
public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTermQueryBuilder> public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTermQueryBuilder>
implements SpanQueryBuilder { implements SpanQueryBuilder {
public static final String NAME = "span_multi"; public static final String NAME = "span_multi";
private static final ParseField MATCH_FIELD = new ParseField("match"); private static final ParseField MATCH_FIELD = new ParseField("match");
private final MultiTermQueryBuilder multiTermQueryBuilder; private final MultiTermQueryBuilder multiTermQueryBuilder;
public SpanMultiTermQueryBuilder(MultiTermQueryBuilder multiTermQueryBuilder) { public SpanMultiTermQueryBuilder(MultiTermQueryBuilder multiTermQueryBuilder) {
@ -83,7 +90,7 @@ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTer
@Override @Override
protected void doXContent(XContentBuilder builder, Params params) protected void doXContent(XContentBuilder builder, Params params)
throws IOException { throws IOException {
builder.startObject(NAME); builder.startObject(NAME);
builder.field(MATCH_FIELD.getPreferredName()); builder.field(MATCH_FIELD.getPreferredName());
multiTermQueryBuilder.toXContent(builder, params); multiTermQueryBuilder.toXContent(builder, params);
@ -105,7 +112,7 @@ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTer
QueryBuilder query = parseInnerQueryBuilder(parser); QueryBuilder query = parseInnerQueryBuilder(parser);
if (query instanceof MultiTermQueryBuilder == false) { if (query instanceof MultiTermQueryBuilder == false) {
throw new ParsingException(parser.getTokenLocation(), throw new ParsingException(parser.getTokenLocation(),
"[span_multi] [" + MATCH_FIELD.getPreferredName() + "] must be of type multi term query"); "[span_multi] [" + MATCH_FIELD.getPreferredName() + "] must be of type multi term query");
} }
subQuery = (MultiTermQueryBuilder) query; subQuery = (MultiTermQueryBuilder) query;
} else { } else {
@ -124,12 +131,55 @@ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTer
if (subQuery == null) { if (subQuery == null) {
throw new ParsingException(parser.getTokenLocation(), throw new ParsingException(parser.getTokenLocation(),
"[span_multi] must have [" + MATCH_FIELD.getPreferredName() + "] multi term query clause"); "[span_multi] must have [" + MATCH_FIELD.getPreferredName() + "] multi term query clause");
} }
return new SpanMultiTermQueryBuilder(subQuery).queryName(queryName).boost(boost); return new SpanMultiTermQueryBuilder(subQuery).queryName(queryName).boost(boost);
} }
public static class TopTermSpanBooleanQueryRewriteWithMaxClause extends SpanMultiTermQueryWrapper.SpanRewriteMethod {
private MultiTermQuery multiTermQuery;
private final long maxExpansions;
TopTermSpanBooleanQueryRewriteWithMaxClause(long max) {
maxExpansions = max;
}
@Override
public SpanQuery rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
multiTermQuery = query;
return (SpanQuery) this.delegate.rewrite(reader, multiTermQuery);
}
final ScoringRewrite<List<SpanQuery>> delegate = new ScoringRewrite<List<SpanQuery>>() {
@Override
protected List<SpanQuery> getTopLevelBuilder() {
return new ArrayList();
}
@Override
protected Query build(List<SpanQuery> builder) {
return new SpanOrQuery((SpanQuery[]) builder.toArray(new SpanQuery[builder.size()]));
}
@Override
protected void checkMaxClauseCount(int count) {
if (count > maxExpansions) {
throw new ElasticsearchException("[" + multiTermQuery.toString() + " ] " +
"exceeds maxClauseCount [ Boolean maxClauseCount is set to " + BooleanQuery.getMaxClauseCount() + "]");
}
}
@Override
protected void addClause(List<SpanQuery> topLevel, Term term, int docCount, float boost, TermContext states) {
SpanTermQuery q = new SpanTermQuery(term, states);
topLevel.add(q);
}
};
}
@Override @Override
protected Query doToQuery(QueryShardContext context) throws IOException { protected Query doToQuery(QueryShardContext context) throws IOException {
Query subQuery = multiTermQueryBuilder.toQuery(context); Query subQuery = multiTermQueryBuilder.toQuery(context);
@ -190,10 +240,15 @@ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder<SpanMultiTer
+ MultiTermQuery.class.getName() + " but was " + subQuery.getClass().getName()); + MultiTermQuery.class.getName() + " but was " + subQuery.getClass().getName());
} }
spanQuery = new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery); spanQuery = new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery);
if (((MultiTermQuery) subQuery).getRewriteMethod() instanceof TopTermsRewrite == false) {
((SpanMultiTermQueryWrapper<MultiTermQuery>) spanQuery).setRewriteMethod(new
TopTermSpanBooleanQueryRewriteWithMaxClause(BooleanQuery.getMaxClauseCount()));
}
} }
if (boost != AbstractQueryBuilder.DEFAULT_BOOST) { if (boost != AbstractQueryBuilder.DEFAULT_BOOST) {
return new SpanBoostQuery(spanQuery, boost); return new SpanBoostQuery(spanQuery, boost);
} }
return spanQuery; return spanQuery;
} }

View File

@ -34,7 +34,6 @@ import org.elasticsearch.Version;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.Strings;
import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.internal.SearchContext;
@ -238,4 +237,38 @@ public class SpanMultiTermQueryBuilderTests extends AbstractQueryTestCase<SpanMu
assertEquals(json, "ki", ((PrefixQueryBuilder) parsed.innerQuery()).value()); assertEquals(json, "ki", ((PrefixQueryBuilder) parsed.innerQuery()).value());
assertEquals(json, 1.08, parsed.innerQuery().boost(), 0.0001); assertEquals(json, 1.08, parsed.innerQuery().boost(), 0.0001);
} }
public void testDefaultMaxRewriteBuilder() throws Exception {
Query query = QueryBuilders.spanMultiTermQueryBuilder(QueryBuilders.prefixQuery("foo", "b")).
toQuery(createShardContext());
if (query instanceof SpanBoostQuery) {
query = ((SpanBoostQuery)query).getQuery();
}
assertTrue(query instanceof SpanMultiTermQueryWrapper);
if (query instanceof SpanMultiTermQueryWrapper) {
MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper)query).getRewriteMethod();
assertTrue(rewriteMethod instanceof SpanMultiTermQueryBuilder.TopTermSpanBooleanQueryRewriteWithMaxClause);
}
}
public void testTopNMultiTermsRewriteInsideSpan() throws Exception {
Query query = QueryBuilders.spanMultiTermQueryBuilder(QueryBuilders.prefixQuery("foo", "b").rewrite
("top_terms_boost_2000")).
toQuery(createShardContext());
if (query instanceof SpanBoostQuery) {
query = ((SpanBoostQuery)query).getQuery();
}
assertTrue(query instanceof SpanMultiTermQueryWrapper);
if (query instanceof SpanMultiTermQueryWrapper) {
MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper)query).getRewriteMethod();
assertFalse(rewriteMethod instanceof SpanMultiTermQueryBuilder.TopTermSpanBooleanQueryRewriteWithMaxClause);
}
}
} }

View File

@ -19,7 +19,9 @@
package org.elasticsearch.search.query; package org.elasticsearch.search.query;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.util.English; import org.apache.lucene.util.English;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchPhaseExecutionException; import org.elasticsearch.action.search.SearchPhaseExecutionException;
@ -33,8 +35,12 @@ import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.MultiMatchQueryBuilder; import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.index.query.Operator; import org.elasticsearch.index.query.Operator;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.RangeQueryBuilder; import org.elasticsearch.index.query.RangeQueryBuilder;
import org.elasticsearch.index.query.SpanMultiTermQueryBuilder;
import org.elasticsearch.index.query.SpanNearQueryBuilder;
import org.elasticsearch.index.query.SpanTermQueryBuilder;
import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.index.query.WrapperQueryBuilder; import org.elasticsearch.index.query.WrapperQueryBuilder;
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders; import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
@ -52,6 +58,7 @@ import org.joda.time.DateTimeZone;
import org.joda.time.format.ISODateTimeFormat; import org.joda.time.format.ISODateTimeFormat;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.Random; import java.util.Random;
@ -1819,4 +1826,30 @@ public class SearchQueryIT extends ESIntegTestCase {
assertHitCount(searchResponse, 1); assertHitCount(searchResponse, 1);
} }
public void testTermExpansionExceptionOnSpanFailure() throws ExecutionException, InterruptedException {
Settings.Builder builder = Settings.builder();
builder.put(SETTING_NUMBER_OF_SHARDS, 1).build();
createIndex("test", builder.build());
ArrayList<IndexRequestBuilder> reqs = new ArrayList<>();
int origBoolMaxClauseCount = BooleanQuery.getMaxClauseCount();
try {
BooleanQuery.setMaxClauseCount(2);
for (int i = 0; i < BooleanQuery.getMaxClauseCount() + 1; i++) {
reqs.add(client().prepareIndex("test", "_doc", Integer.toString(i)).setSource("body", "foo" +
Integer.toString(i) + " bar baz"));
}
indexRandom(true, false, reqs);
QueryBuilder queryBuilder = new SpanNearQueryBuilder(new SpanMultiTermQueryBuilder(QueryBuilders.wildcardQuery
("body", "f*")), 0).addClause(new SpanTermQueryBuilder("body", "bar"));
expectThrows(ElasticsearchException.class, () ->
client().prepareSearch().setIndices("test").setQuery(queryBuilder).get());
} finally {
BooleanQuery.setMaxClauseCount(origBoolMaxClauseCount);
}
}
} }