SOLR-10921: raise lucene BooleanQuery.maxClauseCount, add higher level checking via QueryUtils.build

This commit is contained in:
yonik 2017-06-22 16:05:19 -04:00
parent 5de15ff403
commit 98276481e4
15 changed files with 157 additions and 139 deletions

View File

@ -194,6 +194,11 @@ Bug Fixes
* SOLR-10886: Using V2Request.process(solrClient) method throws NPE if the API returns an error (Cao Manh Dat)
* SOLR-10921: Work around the static Lucene BooleanQuery.maxClauseCount that causes Solr's maxBooleanClauses
setting behavior to be "last core wins". This patch sets BooleanQuery.maxClauseCount to its maximum value,
thus disabling the global check, and replaces it with specific checks where desired via
QueryUtils.build(). (yonik)
Optimizations
----------------------

View File

@ -367,6 +367,9 @@ public class SolrConfig extends Config implements MapSerializable {
public static final Map<String, SolrPluginInfo> classVsSolrPluginInfo;
static {
// Raise the Lucene static limit so we can control this with higher granularity. See SOLR-10921
BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE-1);
Map<String, SolrPluginInfo> map = new HashMap<>();
for (SolrPluginInfo plugin : plugins) map.put(plugin.clazz.getName(), plugin);
classVsSolrPluginInfo = Collections.unmodifiableMap(map);

View File

@ -68,7 +68,6 @@ import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@ -250,17 +249,6 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab
static int boolean_query_max_clause_count = Integer.MIN_VALUE;
// only change the BooleanQuery maxClauseCount once for ALL cores...
void booleanQueryMaxClauseCount() {
synchronized(SolrCore.class) {
if (boolean_query_max_clause_count == Integer.MIN_VALUE) {
boolean_query_max_clause_count = solrConfig.booleanQueryMaxClauseCount;
BooleanQuery.setMaxClauseCount(boolean_query_max_clause_count);
} else if (boolean_query_max_clause_count != solrConfig.booleanQueryMaxClauseCount ) {
log.debug("BooleanQuery.maxClauseCount={}, ignoring {}", boolean_query_max_clause_count, solrConfig.booleanQueryMaxClauseCount);
}
}
}
/**
* The SolrResourceLoader used to load all resources for this core.
@ -931,8 +919,6 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab
this.maxWarmingSearchers = config.maxWarmingSearchers;
this.slowQueryThresholdMillis = config.slowQueryThresholdMillis;
booleanQueryMaxClauseCount();
final CountDownLatch latch = new CountDownLatch(1);
try {

View File

@ -56,6 +56,7 @@ import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.TextField;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryUtils;
import org.apache.solr.search.SolrConstantScoreQuery;
import org.apache.solr.search.SyntaxError;
@ -657,7 +658,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
}
}
BooleanQuery bq = booleanBuilder.build();
BooleanQuery bq = QueryUtils.build(booleanBuilder,parser);
if (bq.clauses().size() == 1) { // Unwrap single SHOULD query
BooleanClause clause = bq.clauses().iterator().next();
if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
@ -910,7 +911,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
Query subq = ft.getFieldQuery(this.parser, rawq.sfield, externalVal);
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
}
normal = booleanBuilder.build();
normal = QueryUtils.build(booleanBuilder, parser);
}
}
}

View File

@ -67,6 +67,7 @@ import org.apache.solr.common.util.StrUtils;
import org.apache.solr.query.SolrRangeQuery;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryUtils;
import org.apache.solr.search.Sorting;
import org.apache.solr.uninverting.UninvertingReader;
import org.slf4j.Logger;
@ -754,12 +755,13 @@ public abstract class FieldType extends FieldProperties {
/** @lucene.experimental */
public Query getSetQuery(QParser parser, SchemaField field, Collection<String> externalVals) {
if (!field.indexed()) {
// TODO: if the field isn't indexed, this feels like the wrong query type to use?
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (String externalVal : externalVals) {
Query subq = getFieldQuery(parser, field, externalVal);
builder.add(subq, BooleanClause.Occur.SHOULD);
}
return builder.build();
return QueryUtils.build(builder, parser);
}
List<BytesRef> lst = new ArrayList<>(externalVals.size());

View File

@ -113,7 +113,7 @@ public class DisMaxQParser extends QParser {
addBoostQuery(query, solrParams);
addBoostFunctions(query, solrParams);
return query.build();
return QueryUtils.build(query, this);
}
protected void addBoostFunctions(BooleanQuery.Builder query, SolrParams solrParams) throws SyntaxError {
@ -252,7 +252,7 @@ public class DisMaxQParser extends QParser {
SolrPluginUtils.flattenBooleanQuery(t, (BooleanQuery) dis);
boolean mmAutoRelax = params.getBool(DisMaxParams.MM_AUTORELAX, false);
SolrPluginUtils.setMinShouldMatch(t, minShouldMatch, mmAutoRelax);
query = t.build();
query = QueryUtils.build(t, this);
}
return query;
}

View File

@ -192,7 +192,7 @@ public class ExtendedDismaxQParser extends QParser {
//
// create a boosted query (scores multiplied by boosts)
//
Query topQuery = query.build();
Query topQuery = QueryUtils.build(query, this);
List<ValueSource> boosts = getMultiplicativeBoosts();
if (boosts.size()>1) {
ValueSource prod = new ProductFloatFunction(boosts.toArray(new ValueSource[boosts.size()]));
@ -282,7 +282,7 @@ public class ExtendedDismaxQParser extends QParser {
BooleanQuery.Builder t = new BooleanQuery.Builder();
SolrPluginUtils.flattenBooleanQuery(t, (BooleanQuery)query);
SolrPluginUtils.setMinShouldMatch(t, config.minShouldMatch, config.mmAutoRelax);
query = t.build();
query = QueryUtils.build(t, this);
}
return query;
}
@ -1163,7 +1163,7 @@ public class ExtendedDismaxQParser extends QParser {
for (Query sub : lst) {
q.add(sub, BooleanClause.Occur.SHOULD);
}
return q.build();
return QueryUtils.build(q, parser);
}
} else {
@ -1225,7 +1225,7 @@ public class ExtendedDismaxQParser extends QParser {
}
q.add(newBooleanClause(new DisjunctionMaxQuery(subs, a.tie), BooleanClause.Occur.SHOULD));
}
return q.build();
return QueryUtils.build(q, parser);
} else {
return new DisjunctionMaxQuery(lst, a.tie);
}
@ -1234,7 +1234,7 @@ public class ExtendedDismaxQParser extends QParser {
for (Query sub : lst) {
q.add(sub, BooleanClause.Occur.SHOULD);
}
return q.build();
return QueryUtils.build(q, parser);
}
} else {
// verify that a fielded query is actually on a field that exists... if not,

View File

@ -22,6 +22,7 @@ import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrException;
import java.util.Collection;
@ -129,4 +130,14 @@ public class QueryUtils {
return new BoostQuery(newBq, boost);
}
/** @lucene.experimental throw exception if max boolean clauses are exceeded */
public static BooleanQuery build(BooleanQuery.Builder builder, QParser parser) {
int configuredMax = parser != null ? parser.getReq().getCore().getSolrConfig().booleanQueryMaxClauseCount : BooleanQuery.getMaxClauseCount();
BooleanQuery bq = builder.build();
if (bq.clauses().size() > configuredMax) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Too many clauses in boolean query: encountered=" + bq.clauses().size() + " configured in solrconfig.xml via maxBooleanClauses=" + configuredMax);
}
return bq;
}
}

View File

@ -46,6 +46,7 @@ import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.QueryUtils;
import org.apache.solr.util.SolrPluginUtils;
import static org.apache.solr.common.params.CommonParams.ID;
@ -161,7 +162,7 @@ public class CloudMLTQParser extends QParser {
newQ.add(q, clause.getOccur());
}
boostedMLTQuery = newQ.build();
boostedMLTQuery = QueryUtils.build(newQ, this);
}
// exclude current document from results

View File

@ -33,6 +33,7 @@ import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.QueryUtils;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.SolrPluginUtils;
@ -134,7 +135,7 @@ public class SimpleMLTQParser extends QParser {
newQ.add(q, clause.getOccur());
}
boostedMLTQuery = newQ.build();
boostedMLTQuery = QueryUtils.build(newQ, this);
}
// exclude current document from results

View File

@ -358,6 +358,16 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
String q = sb.toString();
// This will still fail when used as the main query, but will pass in a filter query since TermsQuery can be used.
try {
ignoreException("Too many clauses");
assertJQ(req("q",q)
,"/response/numFound==6");
fail();
} catch (Exception e) {
// expect "too many clauses" exception... see SOLR-10921
assertTrue(e.getMessage().contains("many clauses"));
}
assertJQ(req("q","*:*", "fq", q)
,"/response/numFound==6");
assertJQ(req("q","*:*", "fq", q, "sow", "false")

View File

@ -104,7 +104,9 @@ public class TestJsonFacetRefinement extends SolrTestCaseHS {
}
/** Use SimpleOrderedMap rather than Map to match responses from shards */
/**
* Use SimpleOrderedMap rather than Map to match responses from shards
*/
public static Object fromJSON(String json) throws IOException {
JSONParser parser = new JSONParser(json);
ObjectBuilder ob = new ObjectBuilder(parser) {
@ -520,4 +522,22 @@ public class TestJsonFacetRefinement extends SolrTestCaseHS {
}
// Unlike solrconfig.xml this test using solrconfig-tlog.xml should not fail with too-many-exceptions (see TestSolrQueryParser.testManyClauses)
@Test
public void testManyClauses() throws Exception {
String a = "1 a 2 b 3 c 10 d 11 12 "; // 10 terms
StringBuilder sb = new StringBuilder("id:(");
for (int i = 0; i < 1024; i++) { // historically, the max number of boolean clauses defaulted to 1024
sb.append('z').append(i).append(' ');
}
sb.append(a);
sb.append(")");
String q = sb.toString();
ignoreException("Too many clauses");
assertJQ(req("q", q)
, "/response/numFound==");
}
}

View File

@ -389,18 +389,11 @@
Query section - these settings control query time things like caches
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
<query>
<!-- Max Boolean Clauses
Maximum number of clauses in each BooleanQuery, an exception
is thrown if exceeded.
** WARNING **
This option actually modifies a global Lucene property that
will affect all SolrCores. If multiple solrconfig.xml files
disagree on this property, the value at any given moment will
be based on the last SolrCore to be initialized.
<!-- Maximum number of clauses in each BooleanQuery, an exception
is thrown if exceeded. It is safe to increase or remove this setting,
since it is purely an arbitrary limit to try and catch user errors where
large boolean queries may not be the best implementation choice.
-->
<maxBooleanClauses>1024</maxBooleanClauses>

View File

@ -389,22 +389,14 @@
Query section - these settings control query time things like caches
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
<query>
<!-- Max Boolean Clauses
Maximum number of clauses in each BooleanQuery, an exception
is thrown if exceeded.
** WARNING **
This option actually modifies a global Lucene property that
will affect all SolrCores. If multiple solrconfig.xml files
disagree on this property, the value at any given moment will
be based on the last SolrCore to be initialized.
<!-- Maximum number of clauses in each BooleanQuery, an exception
is thrown if exceeded. It is safe to increase or remove this setting,
since it is purely an arbitrary limit to try and catch user errors where
large boolean queries may not be the best implementation choice.
-->
<maxBooleanClauses>1024</maxBooleanClauses>
<!-- Solr Internal Query Caches
There are two implementations of cache available for Solr,

View File

@ -395,18 +395,11 @@
Query section - these settings control query time things like caches
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
<query>
<!-- Max Boolean Clauses
Maximum number of clauses in each BooleanQuery, an exception
is thrown if exceeded.
** WARNING **
This option actually modifies a global Lucene property that
will affect all SolrCores. If multiple solrconfig.xml files
disagree on this property, the value at any given moment will
be based on the last SolrCore to be initialized.
<!-- Maximum number of clauses in each BooleanQuery, an exception
is thrown if exceeded. It is safe to increase or remove this setting,
since it is purely an arbitrary limit to try and catch user errors where
large boolean queries may not be the best implementation choice.
-->
<maxBooleanClauses>1024</maxBooleanClauses>