[7.x][ML] Remove index.unassigned.node_left.delayed_timeout setting from M… () ()

This setting was introduced with the purpose of reducing the time took by
tests that shut nodes down. Tests like `MlDistributedFailureIT` and
`NetworkDisruptionIT`. However, it is unfortunate to have to set the value
to an explicit value in production. In addition, and most important, the dynamically
choosing the value for this setting makes it impossible to adopt static index template configs
that we register via `IndexTemplateRegistry`, which we need to use in order to start
registering ILM policies for the ML indices.

This commit removes this setting from our templates. I run the tests a few times and could
not see execution time differing significantly.

Backport of 
This commit is contained in:
Dimitris Athanasiou 2020-01-31 20:28:29 +02:00 committed by GitHub
parent 5ca51562ec
commit 55b5c8f703
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 24 additions and 16 deletions
x-pack/plugin/ml/src
main/java/org/elasticsearch/xpack/ml
test/java/org/elasticsearch/xpack/ml

@ -22,7 +22,6 @@ import org.elasticsearch.cluster.metadata.IndexTemplateMetaData;
import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.node.DiscoveryNodeRole; import org.elasticsearch.cluster.node.DiscoveryNodeRole;
import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.cluster.routing.UnassignedInfo;
import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Module; import org.elasticsearch.common.inject.Module;
@ -894,13 +893,6 @@ public class MachineLearning extends Plugin implements SystemIndexPlugin, Analys
@Override @Override
public UnaryOperator<Map<String, IndexTemplateMetaData>> getIndexTemplateMetaDataUpgrader() { public UnaryOperator<Map<String, IndexTemplateMetaData>> getIndexTemplateMetaDataUpgrader() {
return templates -> { return templates -> {
final TimeValue delayedNodeTimeOutSetting;
// Whether we are using native process is a good way to detect whether we are in dev / test mode:
if (MachineLearningField.AUTODETECT_PROCESS.get(settings)) {
delayedNodeTimeOutSetting = UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.get(settings);
} else {
delayedNodeTimeOutSetting = TimeValue.timeValueNanos(0);
}
try (XContentBuilder auditMapping = ElasticsearchMappings.auditMessageMapping()) { try (XContentBuilder auditMapping = ElasticsearchMappings.auditMessageMapping()) {
IndexTemplateMetaData notificationMessageTemplate = IndexTemplateMetaData notificationMessageTemplate =
@ -912,8 +904,7 @@ public class MachineLearning extends Plugin implements SystemIndexPlugin, Analys
// Our indexes are small and one shard puts the // Our indexes are small and one shard puts the
// least possible burden on Elasticsearch // least possible burden on Elasticsearch
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, "0-1") .put(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, "0-1"))
.put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), delayedNodeTimeOutSetting))
.build(); .build();
templates.put(AuditorField.NOTIFICATIONS_INDEX, notificationMessageTemplate); templates.put(AuditorField.NOTIFICATIONS_INDEX, notificationMessageTemplate);
} catch (IOException e) { } catch (IOException e) {
@ -928,8 +919,7 @@ public class MachineLearning extends Plugin implements SystemIndexPlugin, Analys
// Our indexes are small and one shard puts the // Our indexes are small and one shard puts the
// least possible burden on Elasticsearch // least possible burden on Elasticsearch
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, "0-1") .put(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, "0-1"))
.put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), delayedNodeTimeOutSetting))
.version(Version.CURRENT.id) .version(Version.CURRENT.id)
.putMapping(SINGLE_MAPPING_NAME, Strings.toString(docMapping)) .putMapping(SINGLE_MAPPING_NAME, Strings.toString(docMapping))
.build(); .build();
@ -947,7 +937,6 @@ public class MachineLearning extends Plugin implements SystemIndexPlugin, Analys
// least possible burden on Elasticsearch // least possible burden on Elasticsearch
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, "0-1") .put(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, "0-1")
.put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), delayedNodeTimeOutSetting)
.put(IndexSettings.MAX_RESULT_WINDOW_SETTING.getKey(), .put(IndexSettings.MAX_RESULT_WINDOW_SETTING.getKey(),
AnomalyDetectorsIndex.CONFIG_INDEX_MAX_RESULTS_WINDOW)) AnomalyDetectorsIndex.CONFIG_INDEX_MAX_RESULTS_WINDOW))
.version(Version.CURRENT.id) .version(Version.CURRENT.id)
@ -964,8 +953,7 @@ public class MachineLearning extends Plugin implements SystemIndexPlugin, Analys
.patterns(Collections.singletonList(AnomalyDetectorsIndex.jobStateIndexPattern())) .patterns(Collections.singletonList(AnomalyDetectorsIndex.jobStateIndexPattern()))
// TODO review these settings // TODO review these settings
.settings(Settings.builder() .settings(Settings.builder()
.put(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, "0-1") .put(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, "0-1"))
.put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), delayedNodeTimeOutSetting))
.putMapping(SINGLE_MAPPING_NAME, Strings.toString(stateMapping)) .putMapping(SINGLE_MAPPING_NAME, Strings.toString(stateMapping))
.version(Version.CURRENT.id) .version(Version.CURRENT.id)
.build(); .build();
@ -981,7 +969,6 @@ public class MachineLearning extends Plugin implements SystemIndexPlugin, Analys
.patterns(Collections.singletonList(AnomalyDetectorsIndex.jobResultsIndexPrefix() + "*")) .patterns(Collections.singletonList(AnomalyDetectorsIndex.jobResultsIndexPrefix() + "*"))
.settings(Settings.builder() .settings(Settings.builder()
.put(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, "0-1") .put(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, "0-1")
.put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), delayedNodeTimeOutSetting)
// Sacrifice durability for performance: in the event of power // Sacrifice durability for performance: in the event of power
// failure we can lose the last 5 seconds of changes, but it's // failure we can lose the last 5 seconds of changes, but it's
// much faster // much faster

@ -68,6 +68,8 @@ public class BasicDistributedJobsIT extends BaseMlIntegTestCase {
client().execute(OpenJobAction.INSTANCE, openJobRequest).actionGet(); client().execute(OpenJobAction.INSTANCE, openJobRequest).actionGet();
awaitJobOpenedAndAssigned(job.getId(), null); awaitJobOpenedAndAssigned(job.getId(), null);
setMlIndicesDelayedNodeLeftTimeoutToZero();
ensureGreen(); // replicas must be assigned, otherwise we could lose a whole index ensureGreen(); // replicas must be assigned, otherwise we could lose a whole index
internalCluster().stopRandomDataNode(); internalCluster().stopRandomDataNode();
ensureStableCluster(3); ensureStableCluster(3);
@ -109,6 +111,9 @@ public class BasicDistributedJobsIT extends BaseMlIntegTestCase {
OpenJobAction.Request openJobRequest = new OpenJobAction.Request(job.getId()); OpenJobAction.Request openJobRequest = new OpenJobAction.Request(job.getId());
client().execute(OpenJobAction.INSTANCE, openJobRequest).actionGet(); client().execute(OpenJobAction.INSTANCE, openJobRequest).actionGet();
awaitJobOpenedAndAssigned(job.getId(), null); awaitJobOpenedAndAssigned(job.getId(), null);
setMlIndicesDelayedNodeLeftTimeoutToZero();
StartDatafeedAction.Request startDataFeedRequest = new StartDatafeedAction.Request(config.getId(), 0L); StartDatafeedAction.Request startDataFeedRequest = new StartDatafeedAction.Request(config.getId(), 0L);
client().execute(StartDatafeedAction.INSTANCE, startDataFeedRequest); client().execute(StartDatafeedAction.INSTANCE, startDataFeedRequest);

@ -418,6 +418,8 @@ public class MlDistributedFailureIT extends BaseMlIntegTestCase {
assertEquals(JobState.OPENED, statsResponse.getResponse().results().get(0).getState()); assertEquals(JobState.OPENED, statsResponse.getResponse().results().get(0).getState());
}, 20, TimeUnit.SECONDS); }, 20, TimeUnit.SECONDS);
setMlIndicesDelayedNodeLeftTimeoutToZero();
StartDatafeedAction.Request startDatafeedRequest = new StartDatafeedAction.Request(config.getId(), 0L); StartDatafeedAction.Request startDatafeedRequest = new StartDatafeedAction.Request(config.getId(), 0L);
client().execute(StartDatafeedAction.INSTANCE, startDatafeedRequest).get(); client().execute(StartDatafeedAction.INSTANCE, startDatafeedRequest).get();
} }

@ -52,6 +52,9 @@ public class NetworkDisruptionIT extends BaseMlIntegTestCase {
OpenJobAction.Request openJobRequest = new OpenJobAction.Request(job.getId()); OpenJobAction.Request openJobRequest = new OpenJobAction.Request(job.getId());
AcknowledgedResponse openJobResponse = client().execute(OpenJobAction.INSTANCE, openJobRequest).actionGet(); AcknowledgedResponse openJobResponse = client().execute(OpenJobAction.INSTANCE, openJobRequest).actionGet();
assertTrue(openJobResponse.isAcknowledged()); assertTrue(openJobResponse.isAcknowledged());
setMlIndicesDelayedNodeLeftTimeoutToZero();
ensureGreen(); ensureGreen();
// Record which node the job starts off on // Record which node the job starts off on

@ -7,6 +7,7 @@ package org.elasticsearch.xpack.ml.support;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
import org.elasticsearch.action.admin.indices.recovery.RecoveryResponse; import org.elasticsearch.action.admin.indices.recovery.RecoveryResponse;
import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest;
import org.elasticsearch.action.bulk.BulkItemResponse; import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder; import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.action.bulk.BulkResponse;
@ -17,6 +18,7 @@ import org.elasticsearch.analysis.common.CommonAnalysisPlugin;
import org.elasticsearch.client.Client; import org.elasticsearch.client.Client;
import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.cluster.routing.UnassignedInfo;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.unit.TimeValue;
@ -402,4 +404,13 @@ public abstract class BaseMlIntegTestCase extends ESIntegTestCase {
}); });
return jobNode.get(); return jobNode.get();
} }
/**
* Sets delayed allocation to 0 to make sure we have tests are not delayed
*/
protected void setMlIndicesDelayedNodeLeftTimeoutToZero() {
client().admin().indices().updateSettings(new UpdateSettingsRequest(".ml-*")
.settings(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), 0).build()))
.actionGet();
}
} }