Circuit break the number of inline scripts compiled per minute

When compiling many dynamically changing scripts, parameterized
scripts (<https://www.elastic.co/guide/en/elasticsearch/reference/master/modules-scripting-using.html#prefer-params>)
should be preferred. This enforces a limit to the number of scripts that
can be compiled within a minute. A new dynamic setting is added -
`script.max_compilations_per_minute`, which defaults to 15.

If more dynamic scripts are sent, a user will get the following
exception:

```json
{
  "error" : {
    "root_cause" : [
      {
        "type" : "circuit_breaking_exception",
        "reason" : "[script] Too many dynamic script compilations within one minute, max: [15/min]; please use on-disk, indexed, or scripts with parameters instead",
        "bytes_wanted" : 0,
        "bytes_limit" : 0
      }
    ],
    "type" : "search_phase_execution_exception",
    "reason" : "all shards failed",
    "phase" : "query",
    "grouped" : true,
    "failed_shards" : [
      {
        "shard" : 0,
        "index" : "i",
        "node" : "a5V1eXcZRYiIk8lecjZ4Jw",
        "reason" : {
          "type" : "general_script_exception",
          "reason" : "Failed to compile inline script [\"aaaaaaaaaaaaaaaa\"] using lang [painless]",
          "caused_by" : {
            "type" : "circuit_breaking_exception",
            "reason" : "[script] Too many dynamic script compilations within one minute, max: [15/min]; please use on-disk, indexed, or scripts with parameters instead",
            "bytes_wanted" : 0,
            "bytes_limit" : 0
          }
        }
      }
    ],
    "caused_by" : {
      "type" : "general_script_exception",
      "reason" : "Failed to compile inline script [\"aaaaaaaaaaaaaaaa\"] using lang [painless]",
      "caused_by" : {
        "type" : "circuit_breaking_exception",
        "reason" : "[script] Too many dynamic script compilations within one minute, max: [15/min]; please use on-disk, indexed, or scripts with parameters instead",
        "bytes_wanted" : 0,
        "bytes_limit" : 0
      }
    }
  },
  "status" : 500
}
```

This also fixes a bug in `ScriptService` where requests being executed
concurrently on a single node could cause a script to be compiled
multiple times (many in the case of a powerful node with many shards)
due to no synchronization between checking the cache and compiling the
script. There is now synchronization so that a script being compiled
will only be compiled once regardless of the number of concurrent
searches on a node.

Relates to 
This commit is contained in:
Lee Hinman 2016-07-26 13:36:29 -06:00
parent a91bb29585
commit 2be52eff09
19 changed files with 250 additions and 26 deletions
core/src
main/java/org/elasticsearch
test/java/org/elasticsearch/script
docs
modules
lang-expression
lang-groovy
build.gradle
src/test/resources/rest-api-spec/test/lang_groovy
lang-mustache
lang-painless
plugins
lang-javascript
lang-python
qa/smoke-test-reindex-with-painless
test/framework/src/main/java/org/elasticsearch/test

View File

@ -300,6 +300,7 @@ public final class ClusterSettings extends AbstractScopedSettings {
ScriptService.SCRIPT_CACHE_EXPIRE_SETTING,
ScriptService.SCRIPT_AUTO_RELOAD_ENABLED_SETTING,
ScriptService.SCRIPT_MAX_SIZE_IN_BYTES,
ScriptService.SCRIPT_MAX_COMPILATIONS_PER_MINUTE,
IndicesService.INDICES_CACHE_CLEAN_INTERVAL_SETTING,
IndicesFieldDataCache.INDICES_FIELDDATA_CACHE_SIZE_KEY,
IndicesRequestCache.INDICES_CACHE_QUERY_SIZE,

View File

@ -294,6 +294,7 @@ public class Node implements Closeable {
// this is as early as we can validate settings at this point. we already pass them to ScriptModule as well as ThreadPool
// so we might be late here already
final SettingsModule settingsModule = new SettingsModule(this.settings, additionalSettings, additionalSettingsFilter);
scriptModule.registerClusterSettingsListeners(settingsModule.getClusterSettings());
resourcesToClose.add(resourceWatcherService);
final NetworkService networkService = new NetworkService(settings,
getCustomNameResolvers(pluginsService.filterPlugins(DiscoveryPlugin.class)));

View File

@ -19,6 +19,7 @@
package org.elasticsearch.script;
import org.elasticsearch.common.settings.ClusterSettings;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
@ -45,8 +46,8 @@ public class ScriptModule {
* Build from {@linkplain ScriptPlugin}s. Convenient for normal use but not great for tests. See
* {@link ScriptModule#ScriptModule(Settings, Environment, ResourceWatcherService, List, List)} for easier use in tests.
*/
public static ScriptModule create(Settings settings, Environment environment, ResourceWatcherService resourceWatcherService,
List<ScriptPlugin> scriptPlugins) {
public static ScriptModule create(Settings settings, Environment environment,
ResourceWatcherService resourceWatcherService, List<ScriptPlugin> scriptPlugins) {
Map<String, NativeScriptFactory> factoryMap = scriptPlugins.stream().flatMap(x -> x.getNativeScripts().stream())
.collect(Collectors.toMap(NativeScriptFactory::getName, Function.identity()));
NativeScriptEngineService nativeScriptEngineService = new NativeScriptEngineService(settings, factoryMap);
@ -61,8 +62,9 @@ public class ScriptModule {
/**
* Build {@linkplain ScriptEngineService} and {@linkplain ScriptContext.Plugin}.
*/
public ScriptModule(Settings settings, Environment environment, ResourceWatcherService resourceWatcherService,
List<ScriptEngineService> scriptEngineServices, List<ScriptContext.Plugin> customScriptContexts) {
public ScriptModule(Settings settings, Environment environment,
ResourceWatcherService resourceWatcherService, List<ScriptEngineService> scriptEngineServices,
List<ScriptContext.Plugin> customScriptContexts) {
ScriptContextRegistry scriptContextRegistry = new ScriptContextRegistry(customScriptContexts);
ScriptEngineRegistry scriptEngineRegistry = new ScriptEngineRegistry(scriptEngineServices);
scriptSettings = new ScriptSettings(scriptEngineRegistry, scriptContextRegistry);
@ -87,4 +89,11 @@ public class ScriptModule {
public ScriptService getScriptService() {
return scriptService;
}
/**
* Allow the script service to register any settings update handlers on the cluster settings
*/
public void registerClusterSettingsListeners(ClusterSettings clusterSettings) {
scriptService.registerClusterSettingsListeners(clusterSettings);
}
}

View File

@ -36,6 +36,7 @@ import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParseFieldMatcher;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.breaker.CircuitBreakingException;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.cache.Cache;
import org.elasticsearch.common.cache.CacheBuilder;
@ -47,6 +48,7 @@ import org.elasticsearch.common.io.Streams;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.logging.LoggerMessageFormat;
import org.elasticsearch.common.settings.ClusterSettings;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Setting.Property;
import org.elasticsearch.common.settings.Settings;
@ -86,6 +88,8 @@ public class ScriptService extends AbstractComponent implements Closeable, Clust
Setting.boolSetting("script.auto_reload_enabled", true, Property.NodeScope);
public static final Setting<Integer> SCRIPT_MAX_SIZE_IN_BYTES =
Setting.intSetting("script.max_size_in_bytes", 65535, Property.NodeScope);
public static final Setting<Integer> SCRIPT_MAX_COMPILATIONS_PER_MINUTE =
Setting.intSetting("script.max_compilations_per_minute", 15, 0, Property.Dynamic, Property.NodeScope);
private final String defaultLang;
@ -106,6 +110,11 @@ public class ScriptService extends AbstractComponent implements Closeable, Clust
private ClusterState clusterState;
private int totalCompilesPerMinute;
private long lastInlineCompileTime;
private double scriptsPerMinCounter;
private double compilesAllowedPerNano;
public ScriptService(Settings settings, Environment env,
ResourceWatcherService resourceWatcherService, ScriptEngineRegistry scriptEngineRegistry,
ScriptContextRegistry scriptContextRegistry, ScriptSettings scriptSettings) throws IOException {
@ -165,6 +174,13 @@ public class ScriptService extends AbstractComponent implements Closeable, Clust
// automatic reload is disable just load scripts once
fileWatcher.init();
}
this.lastInlineCompileTime = System.nanoTime();
this.setMaxCompilationsPerMinute(SCRIPT_MAX_COMPILATIONS_PER_MINUTE.get(settings));
}
void registerClusterSettingsListeners(ClusterSettings clusterSettings) {
clusterSettings.addSettingsUpdateConsumer(SCRIPT_MAX_COMPILATIONS_PER_MINUTE, this::setMaxCompilationsPerMinute);
}
@Override
@ -188,7 +204,12 @@ public class ScriptService extends AbstractComponent implements Closeable, Clust
return scriptEngineService;
}
void setMaxCompilationsPerMinute(Integer newMaxPerMinute) {
this.totalCompilesPerMinute = newMaxPerMinute;
// Reset the counter to allow new compilations
this.scriptsPerMinCounter = totalCompilesPerMinute;
this.compilesAllowedPerNano = ((double) totalCompilesPerMinute) / TimeValue.timeValueMinutes(1).nanos();
}
/**
* Checks if a script can be executed and compiles it if needed, or returns the previously compiled and cached script.
@ -224,6 +245,38 @@ public class ScriptService extends AbstractComponent implements Closeable, Clust
return compileInternal(script, params);
}
/**
* Check whether there have been too many compilations within the last minute, throwing a circuit breaking exception if so.
* This is a variant of the token bucket algorithm: https://en.wikipedia.org/wiki/Token_bucket
*
* It can be thought of as a bucket with water, every time the bucket is checked, water is added proportional to the amount of time that
* elapsed since the last time it was checked. If there is enough water, some is removed and the request is allowed. If there is not
* enough water the request is denied. Just like a normal bucket, if water is added that overflows the bucket, the extra water/capacity
* is discarded - there can never be more water in the bucket than the size of the bucket.
*/
void checkCompilationLimit() {
long now = System.nanoTime();
long timePassed = now - lastInlineCompileTime;
lastInlineCompileTime = now;
scriptsPerMinCounter += ((double) timePassed) * compilesAllowedPerNano;
// It's been over the time limit anyway, readjust the bucket to be level
if (scriptsPerMinCounter > totalCompilesPerMinute) {
scriptsPerMinCounter = totalCompilesPerMinute;
}
// If there is enough tokens in the bucket, allow the request and decrease the tokens by 1
if (scriptsPerMinCounter >= 1) {
scriptsPerMinCounter -= 1.0;
} else {
// Otherwise reject the request
throw new CircuitBreakingException("[script] Too many dynamic script compilations within one minute, max: [" +
totalCompilesPerMinute + "/min]; please use on-disk, indexed, or scripts with parameters instead; " +
"this limit can be changed by the [" + SCRIPT_MAX_COMPILATIONS_PER_MINUTE.getKey() + "] setting");
}
}
/**
* Compiles a script straight-away, or returns the previously compiled and cached script,
* without checking if it can be executed based on settings.
@ -271,28 +324,44 @@ public class ScriptService extends AbstractComponent implements Closeable, Clust
CacheKey cacheKey = new CacheKey(scriptEngineService, type == ScriptType.INLINE ? null : name, code, params);
CompiledScript compiledScript = cache.get(cacheKey);
if (compiledScript == null) {
//Either an un-cached inline script or indexed script
//If the script type is inline the name will be the same as the code for identification in exceptions
try {
// but give the script engine the chance to be better, give it separate name + source code
// for the inline case, then its anonymous: null.
String actualName = (type == ScriptType.INLINE) ? null : name;
compiledScript = new CompiledScript(type, name, lang, scriptEngineService.compile(actualName, code, params));
} catch (ScriptException good) {
// TODO: remove this try-catch completely, when all script engines have good exceptions!
throw good; // its already good
} catch (Exception exception) {
throw new GeneralScriptException("Failed to compile " + type + " script [" + name + "] using lang [" + lang + "]", exception);
}
//Since the cache key is the script content itself we don't need to
//invalidate/check the cache if an indexed script changes.
scriptMetrics.onCompilation();
cache.put(cacheKey, compiledScript);
if (compiledScript != null) {
return compiledScript;
}
return compiledScript;
// Synchronize so we don't compile scripts many times during multiple shards all compiling a script
synchronized (this) {
// Retrieve it again in case it has been put by a different thread
compiledScript = cache.get(cacheKey);
if (compiledScript == null) {
try {
// Either an un-cached inline script or indexed script
// If the script type is inline the name will be the same as the code for identification in exceptions
// but give the script engine the chance to be better, give it separate name + source code
// for the inline case, then its anonymous: null.
String actualName = (type == ScriptType.INLINE) ? null : name;
if (logger.isTraceEnabled()) {
logger.trace("compiling script, type: [{}], lang: [{}], params: [{}]", type, lang, params);
}
// Check whether too many compilations have happened
checkCompilationLimit();
compiledScript = new CompiledScript(type, name, lang, scriptEngineService.compile(actualName, code, params));
} catch (ScriptException good) {
// TODO: remove this try-catch completely, when all script engines have good exceptions!
throw good; // its already good
} catch (Exception exception) {
throw new GeneralScriptException("Failed to compile " + type + " script [" + name + "] using lang [" + lang + "]", exception);
}
// Since the cache key is the script content itself we don't need to
// invalidate/check the cache if an indexed script changes.
scriptMetrics.onCompilation();
cache.put(cacheKey, compiledScript);
}
return compiledScript;
}
}
private String validateScriptLanguage(String scriptLang) {

View File

@ -26,6 +26,7 @@ import org.elasticsearch.cluster.ClusterName;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.breaker.CircuitBreakingException;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.Streams;
@ -80,6 +81,7 @@ public class ScriptServiceTests extends ESTestCase {
baseSettings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(Environment.PATH_CONF_SETTING.getKey(), genericConfigFolder)
.put(ScriptService.SCRIPT_MAX_COMPILATIONS_PER_MINUTE.getKey(), 10000)
.build();
resourceWatcherService = new ResourceWatcherService(baseSettings, null);
scriptEngineService = new TestEngineService();
@ -123,6 +125,30 @@ public class ScriptServiceTests extends ESTestCase {
};
}
public void testCompilationCircuitBreaking() throws Exception {
buildScriptService(Settings.EMPTY);
scriptService.setMaxCompilationsPerMinute(1);
scriptService.checkCompilationLimit(); // should pass
expectThrows(CircuitBreakingException.class, () -> scriptService.checkCompilationLimit());
scriptService.setMaxCompilationsPerMinute(2);
scriptService.checkCompilationLimit(); // should pass
scriptService.checkCompilationLimit(); // should pass
expectThrows(CircuitBreakingException.class, () -> scriptService.checkCompilationLimit());
int count = randomIntBetween(5, 50);
scriptService.setMaxCompilationsPerMinute(count);
for (int i = 0; i < count; i++) {
scriptService.checkCompilationLimit(); // should pass
}
expectThrows(CircuitBreakingException.class, () -> scriptService.checkCompilationLimit());
scriptService.setMaxCompilationsPerMinute(0);
expectThrows(CircuitBreakingException.class, () -> scriptService.checkCompilationLimit());
scriptService.setMaxCompilationsPerMinute(Integer.MAX_VALUE);
int largeLimit = randomIntBetween(1000, 10000);
for (int i = 0; i < largeLimit; i++) {
scriptService.checkCompilationLimit();
}
}
public void testNotSupportedDisableDynamicSetting() throws IOException {
try {
buildScriptService(Settings.builder().put(ScriptService.DISABLE_DYNAMIC_SCRIPTING_SETTING, randomUnicodeOfLength(randomIntBetween(1, 10))).build());

View File

@ -23,6 +23,7 @@ integTest {
cluster {
setting 'script.inline', 'true'
setting 'script.stored', 'true'
setting 'script.max_compilations_per_minute', '1000'
Closure configFile = {
extraConfigFile it, "src/test/cluster/config/$it"
}

View File

@ -72,5 +72,18 @@ memory on a node. The memory usage is based on the content length of the request
A constant that all in flight requests estimations are multiplied with to determine a
final estimation. Defaults to 1
[[http-circuit-breaker]]
[[script-compilation-circuit-breaker]]
[float]
==== Script compilation circuit breaker
Slightly different than the previous memory-based circuit breaker, the script
compilation circuit breaker limits the number of inline script compilations
within a period of time.
See the "prefer-parameters" section of the <<modules-scripting-using,scripting>>
documentation for more information.
`script.max_compilations_per_minute`::
Limit for the number of unique dynamic scripts within a minute that are
allowed to be compiled. Defaults to 15.

View File

@ -103,6 +103,12 @@ Instead, pass it in as a named parameter:
The first version has to be recompiled every time the multiplier changes. The
second version is only compiled once.
If you compile too many unique scripts within a small amount of time,
Elasticsearch will reject the new dynamic scripts with a
`circuit_breaking_exception` error. By default, up to 15 inline scripts per
minute will be compiled. You can change this setting dynamically by setting
`script.max_compilations_per_minute`.
========================================

View File

@ -35,3 +35,8 @@ dependencyLicenses {
mapping from: /asm-.*/, to: 'asm'
}
integTest {
cluster {
setting 'script.max_compilations_per_minute', '1000'
}
}

View File

@ -30,6 +30,7 @@ integTest {
cluster {
setting 'script.inline', 'true'
setting 'script.stored', 'true'
setting 'script.max_compilations_per_minute', '1000'
}
}

View File

@ -0,0 +1,72 @@
setup:
- do:
indices.create:
index: test
- do:
index:
index: test
type: test
id: 1
body: { foo: bar }
refresh: wait_for
---
teardown:
- do:
cluster.put_settings:
body:
transient:
script.max_compilations_per_minute: null
---
"circuit breaking with too many scripts":
- do:
cluster.put_settings:
body:
transient:
script.max_compilations_per_minute: 1
- do:
search:
index: test
type: test
body:
size: 1
script_fields:
myfield:
script:
inline: "\"aoeu\""
lang: groovy
- match: { hits.total: 1 }
- do:
catch: /Too many dynamic script compilations within one minute/
search:
index: test
type: test
body:
size: 1
script_fields:
myfield:
script:
inline: "\"aoeuaoeu\""
lang: groovy
---
"no bad settings":
- do:
catch: /Failed to parse value \[-1\] for setting \[script.max_compilations_per_minute\] must be >= 0/
cluster.put_settings:
body:
transient:
script.max_compilations_per_minute: -1
- do:
catch: /Failed to parse value \[99999999999\] for setting \[script.max_compilations_per_minute\]/
cluster.put_settings:
body:
transient:
script.max_compilations_per_minute: 99999999999

View File

@ -31,6 +31,7 @@ integTest {
cluster {
setting 'script.inline', 'true'
setting 'script.stored', 'true'
setting 'script.max_compilations_per_minute', '1000'
setting 'path.scripts', "${project.buildDir}/resources/test/templates"
}
}

View File

@ -46,3 +46,9 @@ dependencies {
ant.references['regenerate.classpath'] = new Path(ant.project, configurations.regenerate.asPath)
ant.importBuild 'ant.xml'
integTest {
cluster {
setting 'script.max_compilations_per_minute', '1000'
}
}

View File

@ -30,5 +30,6 @@ integTest {
cluster {
setting 'script.inline', 'true'
setting 'script.stored', 'true'
setting 'script.max_compilations_per_minute', '1000'
}
}

View File

@ -30,6 +30,7 @@ integTest {
cluster {
setting 'script.inline', 'true'
setting 'script.stored', 'true'
setting 'script.max_compilations_per_minute', '1000'
}
}

View File

@ -18,3 +18,9 @@
*/
apply plugin: 'elasticsearch.rest-test'
integTest {
cluster {
setting 'script.max_compilations_per_minute', '1000'
}
}

View File

@ -30,6 +30,7 @@ import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.discovery.DiscoveryModule;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.script.ScriptService;
import org.elasticsearch.transport.MockTcpTransportPlugin;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.ShardOperationFailedException;
@ -1631,6 +1632,7 @@ public abstract class ESIntegTestCase extends ESTestCase {
// from failing on nodes without enough disk space
.put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), "1b")
.put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), "1b")
.put(ScriptService.SCRIPT_MAX_COMPILATIONS_PER_MINUTE.getKey(), 1000)
.put("script.stored", "true")
.put("script.inline", "true")
// by default we never cache below 10k docs in a segment,

View File

@ -177,6 +177,7 @@ public abstract class ESSingleNodeTestCase extends ESTestCase {
.put("node.name", nodeName())
.put("script.inline", "true")
.put("script.stored", "true")
.put(ScriptService.SCRIPT_MAX_COMPILATIONS_PER_MINUTE.getKey(), 1000)
.put(EsExecutors.PROCESSORS_SETTING.getKey(), 1) // limit the number of threads created
.put(NetworkModule.HTTP_ENABLED.getKey(), false)
.put("discovery.type", "local")

View File

@ -316,6 +316,8 @@ public final class InternalTestCluster extends TestCluster {
// from failing on nodes without enough disk space
builder.put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), "1b");
builder.put(DiskThresholdDecider.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), "1b");
// Some tests make use of scripting quite a bit, so increase the limit for integration tests
builder.put(ScriptService.SCRIPT_MAX_COMPILATIONS_PER_MINUTE.getKey(), 1000);
if (TEST_NIGHTLY) {
builder.put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_INCOMING_RECOVERIES_SETTING.getKey(), RandomInts.randomIntBetween(random, 5, 10));
builder.put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_OUTGOING_RECOVERIES_SETTING.getKey(), RandomInts.randomIntBetween(random, 5, 10));