ScriptService: Replace max compilation per minute setting with max compilation rate (#26399)

The current script service has a script compilation limit for a one
minute window. This is set to a small default value of 15. Instead of
increasing that default value, this commit introduces a new setting 
that allows to configure a rate per time unit, so that the script service can deal with bursts better.

The new setting is named `script.max_compilations_rate`,
requires a nonnegative number and a positive time value.

The default is `75/5m`, which is equivalent to the existing 15 per minute.
This commit is contained in:
Alexander Reelsen 2017-09-01 10:15:27 +02:00 committed by GitHub
parent 53848877c8
commit 80d0a32f8e
16 changed files with 112 additions and 47 deletions

View File

@ -329,7 +329,11 @@ class ClusterFormationTasks {
esConfig['cluster.routing.allocation.disk.watermark.flood_stage'] = '1b'
}
// increase script compilation limit since tests can rapid-fire script compilations
if (Version.fromString(node.nodeVersion).major > 6) {
esConfig['script.max_compilations_rate'] = '2048/1m'
} else {
esConfig['script.max_compilations_per_minute'] = 2048
}
esConfig.putAll(node.config.settings)
Task writeConfig = project.tasks.create(name: name, type: DefaultTask, dependsOn: setup)

View File

@ -316,7 +316,7 @@ public final class ClusterSettings extends AbstractScopedSettings {
ScriptService.SCRIPT_CACHE_SIZE_SETTING,
ScriptService.SCRIPT_CACHE_EXPIRE_SETTING,
ScriptService.SCRIPT_MAX_SIZE_IN_BYTES,
ScriptService.SCRIPT_MAX_COMPILATIONS_PER_MINUTE,
ScriptService.SCRIPT_MAX_COMPILATIONS_RATE,
ScriptService.TYPES_ALLOWED_SETTING,
ScriptService.CONTEXTS_ALLOWED_SETTING,
IndicesService.INDICES_CACHE_CLEAN_INTERVAL_SETTING,

View File

@ -39,6 +39,7 @@ import org.elasticsearch.common.cache.Cache;
import org.elasticsearch.common.cache.CacheBuilder;
import org.elasticsearch.common.cache.RemovalListener;
import org.elasticsearch.common.cache.RemovalNotification;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.settings.ClusterSettings;
import org.elasticsearch.common.settings.Setting;
@ -60,14 +61,47 @@ public class ScriptService extends AbstractComponent implements Closeable, Clust
static final String DISABLE_DYNAMIC_SCRIPTING_SETTING = "script.disable_dynamic";
// a parsing function that requires a non negative int and a timevalue as arguments split by a slash
// this allows you to easily define rates
static final Function<String, Tuple<Integer, TimeValue>> MAX_COMPILATION_RATE_FUNCTION =
(String value) -> {
if (value.contains("/") == false || value.startsWith("/") || value.endsWith("/")) {
throw new IllegalArgumentException("parameter must contain a positive integer and a timevalue, i.e. 10/1m, but was [" +
value + "]");
}
int idx = value.indexOf("/");
String count = value.substring(0, idx);
String time = value.substring(idx + 1);
try {
int rate = Integer.parseInt(count);
if (rate < 0) {
throw new IllegalArgumentException("rate [" + rate + "] must be positive");
}
TimeValue timeValue = TimeValue.parseTimeValue(time, "script.max_compilations_rate");
if (timeValue.nanos() <= 0) {
throw new IllegalArgumentException("time value [" + time + "] must be positive");
}
// protect against a too hard to check limit, like less than a minute
if (timeValue.seconds() < 60) {
throw new IllegalArgumentException("time value [" + time + "] must be at least on a one minute resolution");
}
return Tuple.tuple(rate, timeValue);
} catch (NumberFormatException e) {
// the number format exception message is so confusing, that it makes more sense to wrap it with a useful one
throw new IllegalArgumentException("could not parse [" + count + "] as integer in value [" + value + "]", e);
}
};
public static final Setting<Integer> SCRIPT_CACHE_SIZE_SETTING =
Setting.intSetting("script.cache.max_size", 100, 0, Property.NodeScope);
public static final Setting<TimeValue> SCRIPT_CACHE_EXPIRE_SETTING =
Setting.positiveTimeSetting("script.cache.expire", TimeValue.timeValueMillis(0), Property.NodeScope);
public static final Setting<Integer> SCRIPT_MAX_SIZE_IN_BYTES =
Setting.intSetting("script.max_size_in_bytes", 65535, Property.NodeScope);
public static final Setting<Integer> SCRIPT_MAX_COMPILATIONS_PER_MINUTE =
Setting.intSetting("script.max_compilations_per_minute", 15, 0, Property.Dynamic, Property.NodeScope);
// public Setting(String key, Function<Settings, String> defaultValue, Function<String, T> parser, Property... properties) {
public static final Setting<Tuple<Integer, TimeValue>> SCRIPT_MAX_COMPILATIONS_RATE =
new Setting<>("script.max_compilations_rate", "75/5m", MAX_COMPILATION_RATE_FUNCTION, Property.Dynamic, Property.NodeScope);
public static final String ALLOW_NONE = "none";
@ -88,9 +122,9 @@ public class ScriptService extends AbstractComponent implements Closeable, Clust
private ClusterState clusterState;
private int totalCompilesPerMinute;
private Tuple<Integer, TimeValue> rate;
private long lastInlineCompileTime;
private double scriptsPerMinCounter;
private double scriptsPerTimeWindow;
private double compilesAllowedPerNano;
public ScriptService(Settings settings, Map<String, ScriptEngine> engines, Map<String, ScriptContext<?>> contexts) {
@ -188,11 +222,11 @@ public class ScriptService extends AbstractComponent implements Closeable, Clust
this.cache = cacheBuilder.removalListener(new ScriptCacheRemovalListener()).build();
this.lastInlineCompileTime = System.nanoTime();
this.setMaxCompilationsPerMinute(SCRIPT_MAX_COMPILATIONS_PER_MINUTE.get(settings));
this.setMaxCompilationRate(SCRIPT_MAX_COMPILATIONS_RATE.get(settings));
}
void registerClusterSettingsListeners(ClusterSettings clusterSettings) {
clusterSettings.addSettingsUpdateConsumer(SCRIPT_MAX_COMPILATIONS_PER_MINUTE, this::setMaxCompilationsPerMinute);
clusterSettings.addSettingsUpdateConsumer(SCRIPT_MAX_COMPILATIONS_RATE, this::setMaxCompilationRate);
}
@Override
@ -208,11 +242,16 @@ public class ScriptService extends AbstractComponent implements Closeable, Clust
return scriptEngine;
}
void setMaxCompilationsPerMinute(Integer newMaxPerMinute) {
this.totalCompilesPerMinute = newMaxPerMinute;
/**
* This configures the maximum script compilations per five minute window.
*
* @param newRate the new expected maximum number of compilations per five minute window
*/
void setMaxCompilationRate(Tuple<Integer, TimeValue> newRate) {
this.rate = newRate;
// Reset the counter to allow new compilations
this.scriptsPerMinCounter = totalCompilesPerMinute;
this.compilesAllowedPerNano = ((double) totalCompilesPerMinute) / TimeValue.timeValueMinutes(1).nanos();
this.scriptsPerTimeWindow = rate.v1();
this.compilesAllowedPerNano = ((double) rate.v1()) / newRate.v2().nanos();
}
/**
@ -325,22 +364,22 @@ public class ScriptService extends AbstractComponent implements Closeable, Clust
long timePassed = now - lastInlineCompileTime;
lastInlineCompileTime = now;
scriptsPerMinCounter += (timePassed) * compilesAllowedPerNano;
scriptsPerTimeWindow += (timePassed) * compilesAllowedPerNano;
// It's been over the time limit anyway, readjust the bucket to be level
if (scriptsPerMinCounter > totalCompilesPerMinute) {
scriptsPerMinCounter = totalCompilesPerMinute;
if (scriptsPerTimeWindow > rate.v1()) {
scriptsPerTimeWindow = rate.v1();
}
// If there is enough tokens in the bucket, allow the request and decrease the tokens by 1
if (scriptsPerMinCounter >= 1) {
scriptsPerMinCounter -= 1.0;
if (scriptsPerTimeWindow >= 1) {
scriptsPerTimeWindow -= 1.0;
} else {
scriptMetrics.onCompilationLimit();
// Otherwise reject the request
throw new CircuitBreakingException("[script] Too many dynamic script compilations within one minute, max: [" +
totalCompilesPerMinute + "/min]; please use on-disk, indexed, or scripts with parameters instead; " +
"this limit can be changed by the [" + SCRIPT_MAX_COMPILATIONS_PER_MINUTE.getKey() + "] setting");
throw new CircuitBreakingException("[script] Too many dynamic script compilations within, max: [" +
rate.v1() + "/" + rate.v2() +"]; please use indexed, or scripts with parameters instead; " +
"this limit can be changed by the [" + SCRIPT_MAX_COMPILATIONS_RATE.getKey() + "] setting");
}
}

View File

@ -18,6 +18,7 @@
*/
package org.elasticsearch.script;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.ResourceNotFoundException;
import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptRequest;
import org.elasticsearch.cluster.ClusterName;
@ -26,7 +27,9 @@ import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.common.breaker.CircuitBreakingException;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.env.Environment;
@ -39,7 +42,9 @@ import java.util.HashMap;
import java.util.Map;
import java.util.function.Function;
import static org.elasticsearch.script.ScriptService.MAX_COMPILATION_RATE_FUNCTION;
import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.notNullValue;
import static org.hamcrest.Matchers.sameInstance;
@ -55,7 +60,7 @@ public class ScriptServiceTests extends ESTestCase {
public void setup() throws IOException {
baseSettings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.put(ScriptService.SCRIPT_MAX_COMPILATIONS_PER_MINUTE.getKey(), 10000)
.put(ScriptService.SCRIPT_MAX_COMPILATIONS_RATE.getKey(), "10000/1m")
.build();
Map<String, Function<Map<String, Object>, Object>> scripts = new HashMap<>();
for (int i = 0; i < 20; ++i) {
@ -82,30 +87,58 @@ public class ScriptServiceTests extends ESTestCase {
};
}
// even though circuit breaking is allowed to be configured per minute, we actually weigh this over five minutes
// simply by multiplying by five, so even setting it to one, requires five compilations to break
public void testCompilationCircuitBreaking() throws Exception {
buildScriptService(Settings.EMPTY);
scriptService.setMaxCompilationsPerMinute(1);
scriptService.setMaxCompilationRate(Tuple.tuple(1, TimeValue.timeValueMinutes(1)));
scriptService.checkCompilationLimit(); // should pass
expectThrows(CircuitBreakingException.class, () -> scriptService.checkCompilationLimit());
scriptService.setMaxCompilationsPerMinute(2);
scriptService.setMaxCompilationRate(Tuple.tuple(2, TimeValue.timeValueMinutes(1)));
scriptService.checkCompilationLimit(); // should pass
scriptService.checkCompilationLimit(); // should pass
expectThrows(CircuitBreakingException.class, () -> scriptService.checkCompilationLimit());
int count = randomIntBetween(5, 50);
scriptService.setMaxCompilationsPerMinute(count);
scriptService.setMaxCompilationRate(Tuple.tuple(count, TimeValue.timeValueMinutes(1)));
for (int i = 0; i < count; i++) {
scriptService.checkCompilationLimit(); // should pass
}
expectThrows(CircuitBreakingException.class, () -> scriptService.checkCompilationLimit());
scriptService.setMaxCompilationsPerMinute(0);
scriptService.setMaxCompilationRate(Tuple.tuple(0, TimeValue.timeValueMinutes(1)));
expectThrows(CircuitBreakingException.class, () -> scriptService.checkCompilationLimit());
scriptService.setMaxCompilationsPerMinute(Integer.MAX_VALUE);
scriptService.setMaxCompilationRate(Tuple.tuple(Integer.MAX_VALUE, TimeValue.timeValueMinutes(1)));
int largeLimit = randomIntBetween(1000, 10000);
for (int i = 0; i < largeLimit; i++) {
scriptService.checkCompilationLimit();
}
}
public void testMaxCompilationRateSetting() throws Exception {
assertThat(MAX_COMPILATION_RATE_FUNCTION.apply("10/1m"), is(Tuple.tuple(10, TimeValue.timeValueMinutes(1))));
assertThat(MAX_COMPILATION_RATE_FUNCTION.apply("10/60s"), is(Tuple.tuple(10, TimeValue.timeValueMinutes(1))));
assertException("10/m", ElasticsearchParseException.class, "failed to parse [m]");
assertException("6/1.6m", ElasticsearchParseException.class, "failed to parse [1.6m], fractional time values are not supported");
assertException("foo/bar", IllegalArgumentException.class, "could not parse [foo] as integer in value [foo/bar]");
assertException("6.0/1m", IllegalArgumentException.class, "could not parse [6.0] as integer in value [6.0/1m]");
assertException("6/-1m", IllegalArgumentException.class, "time value [-1m] must be positive");
assertException("6/0m", IllegalArgumentException.class, "time value [0m] must be positive");
assertException("10", IllegalArgumentException.class,
"parameter must contain a positive integer and a timevalue, i.e. 10/1m, but was [10]");
assertException("anything", IllegalArgumentException.class,
"parameter must contain a positive integer and a timevalue, i.e. 10/1m, but was [anything]");
assertException("/1m", IllegalArgumentException.class,
"parameter must contain a positive integer and a timevalue, i.e. 10/1m, but was [/1m]");
assertException("10/", IllegalArgumentException.class,
"parameter must contain a positive integer and a timevalue, i.e. 10/1m, but was [10/]");
assertException("-1/1m", IllegalArgumentException.class, "rate [-1] must be positive");
assertException("10/5s", IllegalArgumentException.class, "time value [5s] must be at least on a one minute resolution");
}
private void assertException(String rate, Class<? extends Exception> clazz, String message) {
Exception e = expectThrows(clazz, () -> MAX_COMPILATION_RATE_FUNCTION.apply(rate));
assertThat(e.getMessage(), is(message));
}
public void testNotSupportedDisableDynamicSetting() throws IOException {
try {
buildScriptService(Settings.builder().put(ScriptService.DISABLE_DYNAMIC_SCRIPTING_SETTING, randomUnicodeOfLength(randomIntBetween(1, 10))).build());

View File

@ -95,6 +95,7 @@ public abstract class AbstractSortTestCase<T extends SortBuilder<T>> extends EST
public static void afterClass() throws Exception {
namedWriteableRegistry = null;
xContentRegistry = null;
scriptService = null;
}
/** Returns random sort that is put under test */

View File

@ -20,7 +20,6 @@
apply plugin: 'elasticsearch.docs-test'
integTestCluster {
setting 'script.max_compilations_per_minute', '1000'
/* Enable regexes in painless so our tests don't complain about example
* snippets that use them. */
setting 'script.painless.regex.enabled', 'true'

View File

@ -83,7 +83,8 @@ within a period of time.
See the "prefer-parameters" section of the <<modules-scripting-using,scripting>>
documentation for more information.
`script.max_compilations_per_minute`::
`script.max_compilations_rate`::
Limit for the number of unique dynamic scripts within a minute that are
allowed to be compiled. Defaults to 15.
Limit for the number of unique dynamic scripts within a certain interval
that are allowed to be compiled. Defaults to 75/5m, meaning 75 every 5
minutes.

View File

@ -104,7 +104,7 @@ If you compile too many unique scripts within a small amount of time,
Elasticsearch will reject the new dynamic scripts with a
`circuit_breaking_exception` error. By default, up to 15 inline scripts per
minute will be compiled. You can change this setting dynamically by setting
`script.max_compilations_per_minute`.
`script.max_compilations_rate`.
========================================

View File

@ -35,6 +35,3 @@ dependencyLicenses {
mapping from: /asm-.*/, to: 'asm'
}
integTestCluster {
setting 'script.max_compilations_per_minute', '1000'
}

View File

@ -27,6 +27,3 @@ dependencies {
compile "com.github.spullara.mustache.java:compiler:0.9.3"
}
integTestCluster {
setting 'script.max_compilations_per_minute', '1000'
}

View File

@ -37,10 +37,6 @@ test {
jvmArg '-XX:-OmitStackTraceInFastThrow'
}
integTestCluster {
setting 'script.max_compilations_per_minute', '1000'
}
/* Build Javadoc for the Java classes in Painless's public API that are in the
* Painless plugin */
task apiJavadoc(type: Javadoc) {

View File

@ -30,5 +30,4 @@ dependencies {
integTestCluster {
plugin ':plugins:ingest-geoip'
setting 'script.max_compilations_per_minute', '1000'
}

View File

@ -21,7 +21,6 @@ apply plugin: 'elasticsearch.standalone-rest-test'
apply plugin: 'elasticsearch.rest-test'
integTestCluster {
setting 'script.max_compilations_per_minute', '1000'
// Whitelist reindexing from the local node so we can test it.
setting 'reindex.remote.whitelist', '127.0.0.1:*'
}

View File

@ -1694,7 +1694,7 @@ public abstract class ESIntegTestCase extends ESTestCase {
.put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), "1b")
.put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), "1b")
.put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_WATERMARK_SETTING.getKey(), "1b")
.put(ScriptService.SCRIPT_MAX_COMPILATIONS_PER_MINUTE.getKey(), 2048)
.put(ScriptService.SCRIPT_MAX_COMPILATIONS_RATE.getKey(), "2048/1m")
// by default we never cache below 10k docs in a segment,
// bypass this limit so that caching gets some testing in
// integration tests that usually create few documents

View File

@ -170,7 +170,7 @@ public abstract class ESSingleNodeTestCase extends ESTestCase {
// This needs to tie into the ESIntegTestCase#indexSettings() method
.put(Environment.PATH_SHARED_DATA_SETTING.getKey(), createTempDir().getParent())
.put("node.name", "node_s_0")
.put(ScriptService.SCRIPT_MAX_COMPILATIONS_PER_MINUTE.getKey(), 1000)
.put(ScriptService.SCRIPT_MAX_COMPILATIONS_RATE.getKey(), "1000/1m")
.put(EsExecutors.PROCESSORS_SETTING.getKey(), 1) // limit the number of threads created
.put(NetworkModule.HTTP_ENABLED.getKey(), false)
.put("transport.type", getTestTransportType())

View File

@ -337,7 +337,7 @@ public final class InternalTestCluster extends TestCluster {
builder.put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), "1b");
builder.put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_WATERMARK_SETTING.getKey(), "1b");
// Some tests make use of scripting quite a bit, so increase the limit for integration tests
builder.put(ScriptService.SCRIPT_MAX_COMPILATIONS_PER_MINUTE.getKey(), 1000);
builder.put(ScriptService.SCRIPT_MAX_COMPILATIONS_RATE.getKey(), "1000/1m");
builder.put(OperationRouting.USE_ADAPTIVE_REPLICA_SELECTION_SETTING.getKey(), random.nextBoolean());
if (TEST_NIGHTLY) {
builder.put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_INCOMING_RECOVERIES_SETTING.getKey(), RandomNumbers.randomIntBetween(random, 5, 10));