SOLR-13336: add maxBooleanClauses (default to 1024) setting to solr.xml, reverting previous effective value of Integer.MAX_VALUE-1, to restrict risk of pathalogical query expansion.

(cherry picked from commit d90034f0d61cd1525e10d07cf064a8647dc08cc9)
This commit is contained in:
Chris Hostetter 2019-04-15 10:27:08 -07:00
parent a96c75f031
commit 59a3c45d9c
16 changed files with 155 additions and 70 deletions

View File

@ -30,6 +30,14 @@ Jetty 9.4.14.v20181114
Upgrade Notes
----------------------
* Solr's default behavior when dealing with 'maxBooleanClauses' has changed to reduce the risk of exponential
query expansion when dealing with pathological query strings. A default upper limit of 1024 clauses
(The same default prior to Solr 7.0) is now enforced at the node level, and can be overridden in solr.xml.
The identically named solrconfig.xml setting is still available for limiting the size of 'explicit' boolean
query strings, but this per-collection limit is still ristricted by the upper-bound of the global limit
in solr.xml. See SOLR-13336 for more details.
* When requesting the status of an async request via REQUESTSTATUS collections API, the response will
include the list of internal async requests (if any) in the "success" or "failed" keys (in addition
to them being included outside those keys for backwards compatibility). See SOLR-12708 for more
@ -152,6 +160,9 @@ Bug Fixes
processing concurrent requests during shutdown. This primarily affected tests, but may have also caused
odd errors/delays when restart/shutting down solr nodes. (hossman)
* SOLR-13336: add maxBooleanClauses (default to 1024) setting to solr.xml, reverting previous effective
value of Integer.MAX_VALUE-1, to restrict risk of pathalogical query expansion. (hossman)
Improvements
----------------------

View File

@ -45,6 +45,7 @@ import org.apache.http.client.CredentialsProvider;
import org.apache.http.config.Lookup;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.store.Directory;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
@ -321,6 +322,9 @@ public class CoreContainer {
this.solrHome = loader.getInstancePath().toString();
containerHandlers.put(PublicKeyHandler.PATH, new PublicKeyHandler());
this.cfg = requireNonNull(config);
if (null != this.cfg.getBooleanQueryMaxClauseCount()) {
BooleanQuery.setMaxClauseCount(this.cfg.getBooleanQueryMaxClauseCount());
}
this.coresLocator = locator;
this.containerProperties = new Properties(properties);
this.asyncSolrCoreLoad = asyncSolrCoreLoad;

View File

@ -35,6 +35,8 @@ public class NodeConfig {
private final Path solrDataHome;
private final Integer booleanQueryMaxClauseCount;
private final Path configSetBaseDirectory;
private final String sharedLibDirectory;
@ -75,7 +77,8 @@ public class NodeConfig {
private final PluginInfo transientCacheConfig;
private NodeConfig(String nodeName, Path coreRootDirectory, Path solrDataHome, Path configSetBaseDirectory, String sharedLibDirectory,
private NodeConfig(String nodeName, Path coreRootDirectory, Path solrDataHome, Integer booleanQueryMaxClauseCount,
Path configSetBaseDirectory, String sharedLibDirectory,
PluginInfo shardHandlerFactoryConfig, UpdateShardHandlerConfig updateShardHandlerConfig,
String coreAdminHandlerClass, String collectionsAdminHandlerClass,
String healthCheckHandlerClass, String infoHandlerClass, String configSetsHandlerClass,
@ -86,6 +89,7 @@ public class NodeConfig {
this.nodeName = nodeName;
this.coreRootDirectory = coreRootDirectory;
this.solrDataHome = solrDataHome;
this.booleanQueryMaxClauseCount = booleanQueryMaxClauseCount;
this.configSetBaseDirectory = configSetBaseDirectory;
this.sharedLibDirectory = sharedLibDirectory;
this.shardHandlerFactoryConfig = shardHandlerFactoryConfig;
@ -126,6 +130,15 @@ public class NodeConfig {
return solrDataHome;
}
/**
* If null, the lucene default will not be overridden
*
* @see org.apache.lucene.search.BooleanQuery#setMaxClauseCount
*/
public Integer getBooleanQueryMaxClauseCount() {
return booleanQueryMaxClauseCount;
}
public PluginInfo getShardHandlerFactoryPluginInfo() {
return shardHandlerFactoryConfig;
}
@ -217,6 +230,7 @@ public class NodeConfig {
private Path coreRootDirectory;
private Path solrDataHome;
private Integer booleanQueryMaxClauseCount;
private Path configSetBaseDirectory;
private String sharedLibDirectory = "lib";
private PluginInfo shardHandlerFactoryConfig;
@ -288,6 +302,11 @@ public class NodeConfig {
}
return this;
}
public NodeConfigBuilder setBooleanQueryMaxClauseCount(Integer booleanQueryMaxClauseCount) {
this.booleanQueryMaxClauseCount = booleanQueryMaxClauseCount;
return this;
}
public NodeConfigBuilder setConfigSetBaseDirectory(String configSetBaseDirectory) {
this.configSetBaseDirectory = loader.getInstancePath().resolve(configSetBaseDirectory);
@ -392,7 +411,8 @@ public class NodeConfig {
}
public NodeConfig build() {
return new NodeConfig(nodeName, coreRootDirectory, solrDataHome, configSetBaseDirectory, sharedLibDirectory, shardHandlerFactoryConfig,
return new NodeConfig(nodeName, coreRootDirectory, solrDataHome, booleanQueryMaxClauseCount,
configSetBaseDirectory, sharedLibDirectory, shardHandlerFactoryConfig,
updateShardHandlerConfig, coreAdminHandlerClass, collectionsAdminHandlerClass, healthCheckHandlerClass, infoHandlerClass, configSetsHandlerClass,
logWatcherConfig, cloudConfig, coreLoadThreads, replayUpdatesThreads, transientCacheSize, useSchemaCache, managementPath, loader, solrProperties,
backupRepositoryPlugins, metricsConfig, transientCacheConfig);

View File

@ -208,6 +208,8 @@ public class SolrConfig extends XmlConfigFile implements MapSerializable {
getRequestParams();
initLibs();
luceneMatchVersion = SolrConfig.parseLuceneVersionString(getVal("luceneMatchVersion", true));
log.info("Using Lucene MatchVersion: {}", luceneMatchVersion);
String indexConfigPrefix;
// Old indexDefaults and mainIndex sections are deprecated and fails fast for luceneMatchVersion=>LUCENE_4_0_0.
@ -235,8 +237,12 @@ public class SolrConfig extends XmlConfigFile implements MapSerializable {
indexConfig = new SolrIndexConfig(this, "indexConfig", null);
booleanQueryMaxClauseCount = getInt("query/maxBooleanClauses", BooleanQuery.getMaxClauseCount());
log.info("Using Lucene MatchVersion: {}", luceneMatchVersion);
if (BooleanQuery.getMaxClauseCount() < booleanQueryMaxClauseCount) {
log.warn("solrconfig.xml: <maxBooleanClauses> of {} is greater than global limit of {} "+
"and will have no effect", booleanQueryMaxClauseCount, BooleanQuery.getMaxClauseCount());
log.warn("set 'maxBooleanClauses' in solr.xml to increase global limit");
}
// Warn about deprecated / discontinued parameters
// boolToFilterOptimizer has had no effect since 3.1
if (get("query/boolTofilterOptimizer", null) != null)
@ -395,22 +401,11 @@ public class SolrConfig extends XmlConfigFile implements MapSerializable {
public static final Map<String, SolrPluginInfo> classVsSolrPluginInfo;
static {
// Raise the Lucene static limit so we can control this with higher granularity. See SOLR-10921
BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE-1);
Map<String, SolrPluginInfo> map = new HashMap<>();
for (SolrPluginInfo plugin : plugins) map.put(plugin.clazz.getName(), plugin);
classVsSolrPluginInfo = Collections.unmodifiableMap(map);
}
{
// non-static setMaxClauseCount because the test framework sometimes reverts the value on us and
// the static setting above is only executed once. This re-sets the value every time a SolrConfig
// object is created. See SOLR-10921
BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE-1);
}
public static class SolrPluginInfo {
public final Class clazz;

View File

@ -257,6 +257,9 @@ public class SolrXmlConfig {
case "solrDataHome":
builder.setSolrDataHome(value);
break;
case "maxBooleanClauses":
builder.setBooleanQueryMaxClauseCount(parseInt(name, value));
break;
case "managementPath":
builder.setManagementPath(value);
break;

View File

@ -369,10 +369,15 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
<query>
<!-- Maximum number of clauses in each BooleanQuery, an exception
is thrown if exceeded. It is safe to increase or remove this setting,
since it is purely an arbitrary limit to try and catch user errors where
large boolean queries may not be the best implementation choice.
<!-- Maximum number of clauses allowed when parsing a boolean query string.
This limit only impacts boolean queries specified by a user as part of a query string,
and provides per-collection controls on how complex user specified boolean queries can
be. Query strings that specify more clauses then this will result in an error.
If this per-collection limit is greater then the global `maxBooleanClauses` limit
specified in `solr.xml`, it will have no effect, as that setting also limits the size
of user specified boolean queries.
-->
<maxBooleanClauses>${solr.max.booleanClauses:1024}</maxBooleanClauses>

View File

@ -27,6 +27,7 @@
<str name="shareSchema">${shareSchema:true}</str>
<int name="transientCacheSize">66</int>
<int name="replayUpdatesThreads">100</int>
<int name="maxBooleanClauses">42</int>
<solrcloud>
<int name="distribUpdateConnTimeout">22</int>

View File

@ -68,7 +68,8 @@ public class TestSolrXml extends SolrTestCaseJ4 {
CloudConfig ccfg = cfg.getCloudConfig();
UpdateShardHandlerConfig ucfg = cfg.getUpdateShardHandlerConfig();
PluginInfo[] backupRepoConfigs = cfg.getBackupRepositoryPlugins();
assertEquals("maxBooleanClauses", (Integer) 42, cfg.getBooleanQueryMaxClauseCount());
assertEquals("core admin handler class", "testAdminHandler", cfg.getCoreAdminHandlerClass());
assertEquals("collection handler class", "testCollectionsHandler", cfg.getCollectionsHandlerClass());
assertEquals("info handler class", "testInfoHandler", cfg.getInfoHandlerClass());
@ -127,6 +128,7 @@ public class TestSolrXml extends SolrTestCaseJ4 {
public void testExplicitNullGivesDefaults() throws IOException {
String solrXml = "<solr>" +
"<null name=\"maxBooleanClauses\"/>" +
"<solrcloud>" +
"<str name=\"host\">host</str>" +
"<int name=\"hostPort\">8983</int>" +
@ -135,6 +137,7 @@ public class TestSolrXml extends SolrTestCaseJ4 {
"</solrcloud></solr>";
NodeConfig cfg = SolrXmlConfig.fromString(loader, solrXml);
assertNull("maxBooleanClauses", cfg.getBooleanQueryMaxClauseCount()); // default is null
assertEquals("leaderVoteWait", 180000, cfg.getCloudConfig().getLeaderVoteWait());
}

View File

@ -47,11 +47,13 @@ import org.junit.BeforeClass;
import org.junit.Test;
import org.noggit.ObjectBuilder;
import static org.hamcrest.core.StringContains.containsString;
public class TestSolrQueryParser extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
System.setProperty("enable.update.log", "false"); // schema12 doesn't support _version_
System.setProperty("solr.max.booleanClauses", "42"); // lower for testing
initCore("solrconfig.xml", "schema12.xml");
createIndex();
}
@ -341,30 +343,62 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
}
@Test
public void testManyClauses() throws Exception {
String a = "1 a 2 b 3 c 10 d 11 12 "; // 10 terms
StringBuilder sb = new StringBuilder("id:(");
for (int i = 0; i < 1024; i++) { // historically, the max number of boolean clauses defaulted to 1024
public void testManyClauses_Solr() throws Exception {
final String a = "1 a 2 b 3 c 10 d 11 12 "; // 10 terms
// this should exceed our solrconfig.xml level (solr specific) maxBooleanClauses limit
// even though it's not long enough to trip the Lucene level (global) limit
final String too_long = "id:(" + a + a + a + a + a + ")";
final String expectedMsg = "Too many clauses";
ignoreException(expectedMsg);
SolrException e = expectThrows(SolrException.class, "expected SolrException",
() -> assertJQ(req("q", too_long), "/response/numFound==6"));
assertThat(e.getMessage(), containsString(expectedMsg));
// but should still work as a filter query since TermsQuery can be used...
assertJQ(req("q","*:*", "fq", too_long)
,"/response/numFound==6");
assertJQ(req("q","*:*", "fq", too_long, "sow", "false")
,"/response/numFound==6");
assertJQ(req("q","*:*", "fq", too_long, "sow", "true")
,"/response/numFound==6");
}
@Test
public void testManyClauses_Lucene() throws Exception {
final int numZ = BooleanQuery.getMaxClauseCount();
final String a = "1 a 2 b 3 c 10 d 11 12 "; // 10 terms
final StringBuilder sb = new StringBuilder("id:(");
for (int i = 0; i < numZ; i++) {
sb.append('z').append(i).append(' ');
}
sb.append(a);
sb.append(")");
// this should trip the lucene level global BooleanQuery.getMaxClauseCount() limit,
// causing a parsing error, before Solr even get's a chance to enforce it's lower level limit
final String way_too_long = sb.toString();
String q = sb.toString();
final String expectedMsg = "too many boolean clauses";
ignoreException(expectedMsg);
SolrException e = expectThrows(SolrException.class, "expected SolrException",
() -> assertJQ(req("q", way_too_long), "/response/numFound==6"));
assertThat(e.getMessage(), containsString(expectedMsg));
assertNotNull(e.getCause());
assertEquals(SyntaxError.class, e.getCause().getClass());
assertNotNull(e.getCause().getCause());
assertEquals(BooleanQuery.TooManyClauses.class, e.getCause().getCause().getClass());
// This will still fail when used as the main query, but will pass in a filter query since TermsQuery can be used.
{
ignoreException("Too many clauses");
SolrException e = expectThrows(SolrException.class, "exoected too many clauses exception",
() -> assertJQ(req("q", q), "/response/numFound==6"));
assertTrue(e.getMessage().contains("many clauses"));
}
assertJQ(req("q","*:*", "fq", q)
// but should still work as a filter query since TermsQuery can be used...
assertJQ(req("q","*:*", "fq", way_too_long)
,"/response/numFound==6");
assertJQ(req("q","*:*", "fq", q, "sow", "false")
assertJQ(req("q","*:*", "fq", way_too_long, "sow", "false")
,"/response/numFound==6");
assertJQ(req("q","*:*", "fq", q, "sow", "true")
assertJQ(req("q","*:*", "fq", way_too_long, "sow", "true")
,"/response/numFound==6");
}

View File

@ -1407,22 +1407,4 @@ public class TestJsonFacetRefinement extends SolrTestCaseHS {
} // end method loop
}
// Unlike solrconfig.xml this test using solrconfig-tlog.xml should not fail with too-many-exceptions (see TestSolrQueryParser.testManyClauses)
@Test
public void testManyClauses() throws Exception {
String a = "1 a 2 b 3 c 10 d 11 12 "; // 10 terms
StringBuilder sb = new StringBuilder("id:(");
for (int i = 0; i < 1024; i++) { // historically, the max number of boolean clauses defaulted to 1024
sb.append('z').append(i).append(' ');
}
sb.append(a);
sb.append(")");
String q = sb.toString();
ignoreException("Too many clauses");
assertJQ(req("q", q)
, "/response/numFound==");
}
}

View File

@ -369,10 +369,15 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
<query>
<!-- Maximum number of clauses in each BooleanQuery, an exception
is thrown if exceeded. It is safe to increase or remove this setting,
since it is purely an arbitrary limit to try and catch user errors where
large boolean queries may not be the best implementation choice.
<!-- Maximum number of clauses allowed when parsing a boolean query string.
This limit only impacts boolean queries specified by a user as part of a query string,
and provides per-collection controls on how complex user specified boolean queries can
be. Query strings that specify more clauses then this will result in an error.
If this per-collection limit is greater then the global `maxBooleanClauses` limit
specified in `solr.xml`, it will have no effect, as that setting also limits the size
of user specified boolean queries.
-->
<maxBooleanClauses>${solr.max.booleanClauses:1024}</maxBooleanClauses>

View File

@ -372,10 +372,15 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
<query>
<!-- Maximum number of clauses in each BooleanQuery, an exception
is thrown if exceeded. It is safe to increase or remove this setting,
since it is purely an arbitrary limit to try and catch user errors where
large boolean queries may not be the best implementation choice.
<!-- Maximum number of clauses allowed when parsing a boolean query string.
This limit only impacts boolean queries specified by a user as part of a query string,
and provides per-collection controls on how complex user specified boolean queries can
be. Query strings that specify more clauses then this will result in an error.
If this per-collection limit is greater then the global `maxBooleanClauses` limit
specified in `solr.xml`, it will have no effect, as that setting also limits the size
of user specified boolean queries.
-->
<maxBooleanClauses>${solr.max.booleanClauses:1024}</maxBooleanClauses>

View File

@ -28,6 +28,8 @@
<solr>
<int name="maxBooleanClauses">${solr.max.booleanClauses:1024}</int>
<solrcloud>
<str name="host">${host:}</str>

View File

@ -28,6 +28,8 @@ You can find `solr.xml` in your `$SOLR_HOME` directory (usually `server/solr`) i
----
<solr>
<int name="maxBooleanClauses">${solr.max.booleanClauses:1024}</int>
<solrcloud>
<str name="host">${host:}</str>
<int name="hostPort">${jetty.port:8983}</int>
@ -92,6 +94,18 @@ Defines how many cores with `transient=true` that can be loaded before swapping
`configSetBaseDir`::
The directory under which configSets for Solr cores can be found. Defaults to `$SOLR_HOME/configsets`.
[[global-maxbooleanclauses]]
`maxBooleanClauses`::
Sets the maximum number of clauses allowed in any boolean query.
+
This global limit provides a safety constraint on the number of clauses allowed in any boolean queries against any collection -- regardless of whether those clauses were explicitly specified in a query string, or were the result of query expansion/re-writing from a more complex type of query based on the terms in the index.
+
In default configurations this property uses the value of the `solr.max.booleanClauses` system property if specified. This is the same system property used in the default configset for the <<query-settings-in-solrconfig#maxbooleanclauses,`<maxBooleanClauses>` setting of `solrconfig.xml`>> making it easy for Solr administrators to increase both values (in all collections) without needing to search through and update all of their configs.
+
[source,xml]
----
<maxBooleanClauses>${solr.max.booleanClauses:1024}</maxBooleanClauses>
----
=== The <solrcloud> Element

View File

@ -124,20 +124,19 @@ If you want auto-warming of your cache, include a `regenerator` attribute with t
=== maxBooleanClauses
This sets the maximum number of clauses allowed in a boolean query. This can affect range or prefix queries that expand to a query with a large number of boolean terms. If this limit is exceeded, an exception is thrown.
Sets the maximum number of clauses allowed when parsing a boolean query string.
In default configurations this property uses the value of the `solr.max.booleanClauses` system property where present. This provides an easy way for users to change this value in all collections without needing to search through and update all of their configs.
This limit only impacts boolean queries specified by a user as part of a query string, and provides per-collection controls on how complex user specified boolean queries can be. Query strings that specify more clauses than this will result in an error.
If this per-collection limit is greater than <<format-of-solr-xml#global-maxbooleanclauses,the global `maxBooleanClauses` limit specified in `solr.xml`>> it will have no effect, as that setting also limits the size of user specified boolean queries.
In default configurations this property uses the value of the `solr.max.booleanClauses` system property if specified. This is the same system property used in the <<format-of-solr-xml#global-maxbooleanclauses,global `maxBooleanClauses` setting in the default `solr.xml`>> making it easy for Solr administrators to increase both values (in all collections) without needing to search through and update all of their configs.
[source,xml]
----
<maxBooleanClauses>${solr.max.booleanClauses:1024}</maxBooleanClauses>
----
[WARNING]
====
This option modifies a global property that effects all Solr cores. If multiple `solrconfig.xml` files disagree on this property, the value at any point in time will be based on the last Solr core that was initialized.
====
=== enableLazyFieldLoading
If this parameter is set to true, then fields that are not directly requested will be loaded lazily as needed. This can boost performance if the most common queries only need a small subset of fields, especially if infrequently accessed fields are large in size.

View File

@ -2800,7 +2800,9 @@ public class StreamExpressionTest extends SolrCloudTestCase {
SolrClientCache cache = new SolrClientCache();
StreamContext streamContext = new StreamContext();
streamContext.setSolrClientCache(cache);
String longQuery = "\"id:(" + IntStream.range(0, 4000).mapToObj(i -> "a").collect(Collectors.joining(" ", "", "")) + ")\"";
// use filter() to allow being parsed as 'terms in set' query instead of a (weighted/scored) BooleanQuery
// so we don't trip too many boolean clauses
String longQuery = "\"filter(id:(" + IntStream.range(0, 4000).mapToObj(i -> "a").collect(Collectors.joining(" ", "", "")) + "))\"";
try {
assertSuccess("significantTerms("+COLLECTIONORALIAS+", q="+longQuery+", field=\"test_t\", limit=3, minTermLength=1, maxDocFreq=\".5\")", streamContext);