This setting was introduced with the purpose of reducing the time took by tests that shut nodes down. Tests like `MlDistributedFailureIT` and `NetworkDisruptionIT`. However, it is unfortunate to have to set the value to an explicit value in production. In addition, and most important, the dynamically choosing the value for this setting makes it impossible to adopt static index template configs that we register via `IndexTemplateRegistry`, which we need to use in order to start registering ILM policies for the ML indices. This commit removes this setting from our templates. I run the tests a few times and could not see execution time differing significantly. Backport of #51740
This commit is contained in:
parent
5ca51562ec
commit
55b5c8f703
|
@ -22,7 +22,6 @@ import org.elasticsearch.cluster.metadata.IndexTemplateMetaData;
|
|||
import org.elasticsearch.cluster.node.DiscoveryNode;
|
||||
import org.elasticsearch.cluster.node.DiscoveryNodeRole;
|
||||
import org.elasticsearch.cluster.node.DiscoveryNodes;
|
||||
import org.elasticsearch.cluster.routing.UnassignedInfo;
|
||||
import org.elasticsearch.cluster.service.ClusterService;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.inject.Module;
|
||||
|
@ -894,13 +893,6 @@ public class MachineLearning extends Plugin implements SystemIndexPlugin, Analys
|
|||
@Override
|
||||
public UnaryOperator<Map<String, IndexTemplateMetaData>> getIndexTemplateMetaDataUpgrader() {
|
||||
return templates -> {
|
||||
final TimeValue delayedNodeTimeOutSetting;
|
||||
// Whether we are using native process is a good way to detect whether we are in dev / test mode:
|
||||
if (MachineLearningField.AUTODETECT_PROCESS.get(settings)) {
|
||||
delayedNodeTimeOutSetting = UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.get(settings);
|
||||
} else {
|
||||
delayedNodeTimeOutSetting = TimeValue.timeValueNanos(0);
|
||||
}
|
||||
|
||||
try (XContentBuilder auditMapping = ElasticsearchMappings.auditMessageMapping()) {
|
||||
IndexTemplateMetaData notificationMessageTemplate =
|
||||
|
@ -912,8 +904,7 @@ public class MachineLearning extends Plugin implements SystemIndexPlugin, Analys
|
|||
// Our indexes are small and one shard puts the
|
||||
// least possible burden on Elasticsearch
|
||||
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
|
||||
.put(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, "0-1")
|
||||
.put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), delayedNodeTimeOutSetting))
|
||||
.put(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, "0-1"))
|
||||
.build();
|
||||
templates.put(AuditorField.NOTIFICATIONS_INDEX, notificationMessageTemplate);
|
||||
} catch (IOException e) {
|
||||
|
@ -928,8 +919,7 @@ public class MachineLearning extends Plugin implements SystemIndexPlugin, Analys
|
|||
// Our indexes are small and one shard puts the
|
||||
// least possible burden on Elasticsearch
|
||||
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
|
||||
.put(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, "0-1")
|
||||
.put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), delayedNodeTimeOutSetting))
|
||||
.put(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, "0-1"))
|
||||
.version(Version.CURRENT.id)
|
||||
.putMapping(SINGLE_MAPPING_NAME, Strings.toString(docMapping))
|
||||
.build();
|
||||
|
@ -947,7 +937,6 @@ public class MachineLearning extends Plugin implements SystemIndexPlugin, Analys
|
|||
// least possible burden on Elasticsearch
|
||||
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
|
||||
.put(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, "0-1")
|
||||
.put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), delayedNodeTimeOutSetting)
|
||||
.put(IndexSettings.MAX_RESULT_WINDOW_SETTING.getKey(),
|
||||
AnomalyDetectorsIndex.CONFIG_INDEX_MAX_RESULTS_WINDOW))
|
||||
.version(Version.CURRENT.id)
|
||||
|
@ -964,8 +953,7 @@ public class MachineLearning extends Plugin implements SystemIndexPlugin, Analys
|
|||
.patterns(Collections.singletonList(AnomalyDetectorsIndex.jobStateIndexPattern()))
|
||||
// TODO review these settings
|
||||
.settings(Settings.builder()
|
||||
.put(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, "0-1")
|
||||
.put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), delayedNodeTimeOutSetting))
|
||||
.put(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, "0-1"))
|
||||
.putMapping(SINGLE_MAPPING_NAME, Strings.toString(stateMapping))
|
||||
.version(Version.CURRENT.id)
|
||||
.build();
|
||||
|
@ -981,7 +969,6 @@ public class MachineLearning extends Plugin implements SystemIndexPlugin, Analys
|
|||
.patterns(Collections.singletonList(AnomalyDetectorsIndex.jobResultsIndexPrefix() + "*"))
|
||||
.settings(Settings.builder()
|
||||
.put(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, "0-1")
|
||||
.put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), delayedNodeTimeOutSetting)
|
||||
// Sacrifice durability for performance: in the event of power
|
||||
// failure we can lose the last 5 seconds of changes, but it's
|
||||
// much faster
|
||||
|
|
|
@ -68,6 +68,8 @@ public class BasicDistributedJobsIT extends BaseMlIntegTestCase {
|
|||
client().execute(OpenJobAction.INSTANCE, openJobRequest).actionGet();
|
||||
awaitJobOpenedAndAssigned(job.getId(), null);
|
||||
|
||||
setMlIndicesDelayedNodeLeftTimeoutToZero();
|
||||
|
||||
ensureGreen(); // replicas must be assigned, otherwise we could lose a whole index
|
||||
internalCluster().stopRandomDataNode();
|
||||
ensureStableCluster(3);
|
||||
|
@ -109,6 +111,9 @@ public class BasicDistributedJobsIT extends BaseMlIntegTestCase {
|
|||
OpenJobAction.Request openJobRequest = new OpenJobAction.Request(job.getId());
|
||||
client().execute(OpenJobAction.INSTANCE, openJobRequest).actionGet();
|
||||
awaitJobOpenedAndAssigned(job.getId(), null);
|
||||
|
||||
setMlIndicesDelayedNodeLeftTimeoutToZero();
|
||||
|
||||
StartDatafeedAction.Request startDataFeedRequest = new StartDatafeedAction.Request(config.getId(), 0L);
|
||||
client().execute(StartDatafeedAction.INSTANCE, startDataFeedRequest);
|
||||
|
||||
|
|
|
@ -418,6 +418,8 @@ public class MlDistributedFailureIT extends BaseMlIntegTestCase {
|
|||
assertEquals(JobState.OPENED, statsResponse.getResponse().results().get(0).getState());
|
||||
}, 20, TimeUnit.SECONDS);
|
||||
|
||||
setMlIndicesDelayedNodeLeftTimeoutToZero();
|
||||
|
||||
StartDatafeedAction.Request startDatafeedRequest = new StartDatafeedAction.Request(config.getId(), 0L);
|
||||
client().execute(StartDatafeedAction.INSTANCE, startDatafeedRequest).get();
|
||||
}
|
||||
|
|
|
@ -52,6 +52,9 @@ public class NetworkDisruptionIT extends BaseMlIntegTestCase {
|
|||
OpenJobAction.Request openJobRequest = new OpenJobAction.Request(job.getId());
|
||||
AcknowledgedResponse openJobResponse = client().execute(OpenJobAction.INSTANCE, openJobRequest).actionGet();
|
||||
assertTrue(openJobResponse.isAcknowledged());
|
||||
|
||||
setMlIndicesDelayedNodeLeftTimeoutToZero();
|
||||
|
||||
ensureGreen();
|
||||
|
||||
// Record which node the job starts off on
|
||||
|
|
|
@ -7,6 +7,7 @@ package org.elasticsearch.xpack.ml.support;
|
|||
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.elasticsearch.action.admin.indices.recovery.RecoveryResponse;
|
||||
import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest;
|
||||
import org.elasticsearch.action.bulk.BulkItemResponse;
|
||||
import org.elasticsearch.action.bulk.BulkRequestBuilder;
|
||||
import org.elasticsearch.action.bulk.BulkResponse;
|
||||
|
@ -17,6 +18,7 @@ import org.elasticsearch.analysis.common.CommonAnalysisPlugin;
|
|||
import org.elasticsearch.client.Client;
|
||||
import org.elasticsearch.cluster.ClusterState;
|
||||
import org.elasticsearch.cluster.metadata.MetaData;
|
||||
import org.elasticsearch.cluster.routing.UnassignedInfo;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.unit.ByteSizeValue;
|
||||
import org.elasticsearch.common.unit.TimeValue;
|
||||
|
@ -402,4 +404,13 @@ public abstract class BaseMlIntegTestCase extends ESIntegTestCase {
|
|||
});
|
||||
return jobNode.get();
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets delayed allocation to 0 to make sure we have tests are not delayed
|
||||
*/
|
||||
protected void setMlIndicesDelayedNodeLeftTimeoutToZero() {
|
||||
client().admin().indices().updateSettings(new UpdateSettingsRequest(".ml-*")
|
||||
.settings(Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), 0).build()))
|
||||
.actionGet();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue