[ML] Rename `xpack.ml.allocation_enabled` boolean setting to `node.ml`.
Remove `node.attr.xpack.ml.allocation_enabled` node attribute and instead use `node.attr.max_running_jobs` attribute to indicate a node is a ml node. (in addition to indicating how many jobs can hosted on a node) Original commit: elastic/x-pack-elasticsearch@7979bc55b4
This commit is contained in:
parent
165c0d0e4f
commit
9100fa6333
|
@ -149,9 +149,8 @@ public class MachineLearning extends Plugin implements ActionPlugin {
|
|||
public static final Setting<Boolean> USE_NATIVE_PROCESS_OPTION = Setting.boolSetting("useNativeProcess", true, Property.NodeScope,
|
||||
Property.Deprecated);
|
||||
|
||||
public static final String ALLOCATION_ENABLED_ATTR = "xpack.ml.allocation_enabled";
|
||||
public static final Setting<Boolean> ALLOCATION_ENABLED = Setting.boolSetting("node.attr." + ALLOCATION_ENABLED_ATTR,
|
||||
XPackSettings.MACHINE_LEARNING_ENABLED, Setting.Property.NodeScope);
|
||||
public static final Setting<Boolean> ML_ENABLED =
|
||||
Setting.boolSetting("node.ml", XPackSettings.MACHINE_LEARNING_ENABLED, Setting.Property.NodeScope);
|
||||
public static final Setting<Integer> CONCURRENT_JOB_ALLOCATIONS =
|
||||
Setting.intSetting("xpack.ml.node_concurrent_job_allocations", 2, 0, Property.Dynamic, Property.NodeScope);
|
||||
|
||||
|
@ -175,7 +174,7 @@ public class MachineLearning extends Plugin implements ActionPlugin {
|
|||
public List<Setting<?>> getSettings() {
|
||||
return Collections.unmodifiableList(
|
||||
Arrays.asList(USE_NATIVE_PROCESS_OPTION,
|
||||
ALLOCATION_ENABLED,
|
||||
ML_ENABLED,
|
||||
CONCURRENT_JOB_ALLOCATIONS,
|
||||
ProcessCtrl.DONT_PERSIST_MODEL_STATE_SETTING,
|
||||
ProcessCtrl.MAX_ANOMALY_RECORDS_SETTING,
|
||||
|
@ -186,25 +185,18 @@ public class MachineLearning extends Plugin implements ActionPlugin {
|
|||
|
||||
@Override
|
||||
public Settings additionalSettings() {
|
||||
Settings.Builder additionalSettings = Settings.builder();
|
||||
Boolean allocationEnabled = settings.getAsBoolean(ALLOCATION_ENABLED.getKey(), null);
|
||||
if (enabled == false) {
|
||||
if (allocationEnabled != null) {
|
||||
// if the ml plugin has been disabled the ml allocation enabled node attribute shouldn't be set,
|
||||
// otherwise other nodes will allocate jobs to this node and that will fail, because ml hasn't been loaded.
|
||||
throw new IllegalArgumentException("Can't specify [" + ALLOCATION_ENABLED.getKey() + "] to true when [" +
|
||||
XPackSettings.MACHINE_LEARNING_ENABLED.getKey() + "] has been set to false");
|
||||
}
|
||||
if (enabled == false || this.transportClientMode) {
|
||||
return super.additionalSettings();
|
||||
}
|
||||
if (allocationEnabled == null) {
|
||||
// Make sure that we explicitly set allocation enabled node attribute if it has been specified in the node
|
||||
// settings. So we can always rely on it during assigning job tasks to nodes.
|
||||
additionalSettings.put(ALLOCATION_ENABLED.getKey(), ALLOCATION_ENABLED.get(settings));
|
||||
|
||||
Settings.Builder additionalSettings = Settings.builder();
|
||||
additionalSettings.put(super.additionalSettings());
|
||||
Boolean allocationEnabled = ML_ENABLED.get(settings);
|
||||
if (allocationEnabled != null && allocationEnabled) {
|
||||
// Copy max_running_jobs setting to node attribute, so that we use this information when assigning job tasks to nodes:
|
||||
additionalSettings.put("node.attr." + AutodetectProcessManager.MAX_RUNNING_JOBS_PER_NODE.getKey(),
|
||||
AutodetectProcessManager.MAX_RUNNING_JOBS_PER_NODE.get(settings));
|
||||
}
|
||||
// Add max running job limit as node attribute so that we use this information assigning job tasks to nodes
|
||||
additionalSettings.put("node.attr." + AutodetectProcessManager.MAX_RUNNING_JOBS_PER_NODE.getKey(),
|
||||
AutodetectProcessManager.MAX_RUNNING_JOBS_PER_NODE.get(settings));
|
||||
return additionalSettings.build();
|
||||
}
|
||||
|
||||
|
|
|
@ -376,8 +376,8 @@ public class OpenJobAction extends Action<OpenJobAction.Request, PersistentActio
|
|||
PersistentTasksInProgress persistentTasksInProgress = clusterState.getMetaData().custom(PersistentTasksInProgress.TYPE);
|
||||
for (DiscoveryNode node : clusterState.getNodes()) {
|
||||
Map<String, String> nodeAttributes = node.getAttributes();
|
||||
String allocationEnabled = nodeAttributes.get(MachineLearning.ALLOCATION_ENABLED_ATTR);
|
||||
if ("true".equals(allocationEnabled) == false) {
|
||||
String maxNumberOfOpenJobsStr = nodeAttributes.get(AutodetectProcessManager.MAX_RUNNING_JOBS_PER_NODE.getKey());
|
||||
if (maxNumberOfOpenJobsStr == null) {
|
||||
String reason = "Not opening job [" + jobId + "] on node [" + node + "], because this node isn't a ml node.";
|
||||
logger.debug(reason);
|
||||
reasons.add(reason);
|
||||
|
@ -410,7 +410,7 @@ public class OpenJobAction extends Action<OpenJobAction.Request, PersistentActio
|
|||
continue;
|
||||
}
|
||||
|
||||
long maxNumberOfOpenJobs = Long.parseLong(node.getAttributes().get(MAX_RUNNING_JOBS_PER_NODE.getKey()));
|
||||
long maxNumberOfOpenJobs = Long.parseLong(maxNumberOfOpenJobsStr);
|
||||
long available = maxNumberOfOpenJobs - numberOfAssignedJobs;
|
||||
if (available == 0) {
|
||||
String reason = "Not opening job [" + jobId + "] on node [" + node + "], because this node is full. " +
|
||||
|
|
|
@ -15,7 +15,6 @@ import org.elasticsearch.cluster.node.DiscoveryNode;
|
|||
import org.elasticsearch.cluster.node.DiscoveryNodes;
|
||||
import org.elasticsearch.common.transport.TransportAddress;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.elasticsearch.xpack.ml.MachineLearning;
|
||||
import org.elasticsearch.xpack.ml.job.config.Job;
|
||||
import org.elasticsearch.xpack.ml.job.config.JobState;
|
||||
import org.elasticsearch.xpack.ml.MlMetadata;
|
||||
|
@ -96,7 +95,6 @@ public class OpenJobActionTests extends ESTestCase {
|
|||
|
||||
public void testSelectLeastLoadedMlNode() {
|
||||
Map<String, String> nodeAttr = new HashMap<>();
|
||||
nodeAttr.put(MachineLearning.ALLOCATION_ENABLED_ATTR, "true");
|
||||
nodeAttr.put(MAX_RUNNING_JOBS_PER_NODE.getKey(), "10");
|
||||
DiscoveryNodes nodes = DiscoveryNodes.builder()
|
||||
.add(new DiscoveryNode("_node_name1", "_node_id1", new TransportAddress(InetAddress.getLoopbackAddress(), 9300),
|
||||
|
@ -128,7 +126,6 @@ public class OpenJobActionTests extends ESTestCase {
|
|||
int maxRunningJobsPerNode = randomIntBetween(1, 100);
|
||||
|
||||
Map<String, String> nodeAttr = new HashMap<>();
|
||||
nodeAttr.put(MachineLearning.ALLOCATION_ENABLED_ATTR, "true");
|
||||
nodeAttr.put(MAX_RUNNING_JOBS_PER_NODE.getKey(), String.valueOf(maxRunningJobsPerNode));
|
||||
DiscoveryNodes.Builder nodes = DiscoveryNodes.builder();
|
||||
Map<Long, PersistentTaskInProgress<?>> taskMap = new HashMap<>();
|
||||
|
@ -152,14 +149,11 @@ public class OpenJobActionTests extends ESTestCase {
|
|||
}
|
||||
|
||||
public void testSelectLeastLoadedMlNode_noMlNodes() {
|
||||
Map<String, String> nodeAttr = new HashMap<>();
|
||||
nodeAttr.put(MachineLearning.ALLOCATION_ENABLED_ATTR, "false");
|
||||
nodeAttr.put(MAX_RUNNING_JOBS_PER_NODE.getKey(), "10");
|
||||
DiscoveryNodes nodes = DiscoveryNodes.builder()
|
||||
.add(new DiscoveryNode("_node_name1", "_node_id1", new TransportAddress(InetAddress.getLoopbackAddress(), 9300),
|
||||
nodeAttr, Collections.emptySet(), Version.CURRENT))
|
||||
Collections.emptyMap(), Collections.emptySet(), Version.CURRENT))
|
||||
.add(new DiscoveryNode("_node_name2", "_node_id2", new TransportAddress(InetAddress.getLoopbackAddress(), 9301),
|
||||
nodeAttr, Collections.emptySet(), Version.CURRENT))
|
||||
Collections.emptyMap(), Collections.emptySet(), Version.CURRENT))
|
||||
.build();
|
||||
|
||||
PersistentTaskInProgress<OpenJobAction.Request> task =
|
||||
|
@ -175,7 +169,6 @@ public class OpenJobActionTests extends ESTestCase {
|
|||
|
||||
public void testSelectLeastLoadedMlNode_maxConcurrentOpeningJobs() {
|
||||
Map<String, String> nodeAttr = new HashMap<>();
|
||||
nodeAttr.put(MachineLearning.ALLOCATION_ENABLED_ATTR, "true");
|
||||
nodeAttr.put(MAX_RUNNING_JOBS_PER_NODE.getKey(), "10");
|
||||
DiscoveryNodes nodes = DiscoveryNodes.builder()
|
||||
.add(new DiscoveryNode("_node_name1", "_node_id1", new TransportAddress(InetAddress.getLoopbackAddress(), 9300),
|
||||
|
|
|
@ -140,11 +140,11 @@ public class BasicDistributedJobsIT extends BaseMlIntegTestCase {
|
|||
public void testDedicatedMlNode() throws Exception {
|
||||
internalCluster().ensureAtMostNumDataNodes(0);
|
||||
// start 2 non ml node that will never get a job allocated. (but ml apis are accessable from this node)
|
||||
internalCluster().startNode(Settings.builder().put(MachineLearning.ALLOCATION_ENABLED.getKey(), false));
|
||||
internalCluster().startNode(Settings.builder().put(MachineLearning.ALLOCATION_ENABLED.getKey(), false));
|
||||
internalCluster().startNode(Settings.builder().put(MachineLearning.ML_ENABLED.getKey(), false));
|
||||
internalCluster().startNode(Settings.builder().put(MachineLearning.ML_ENABLED.getKey(), false));
|
||||
// start ml node
|
||||
if (randomBoolean()) {
|
||||
internalCluster().startNode(Settings.builder().put(MachineLearning.ALLOCATION_ENABLED.getKey(), true));
|
||||
internalCluster().startNode(Settings.builder().put(MachineLearning.ML_ENABLED.getKey(), true));
|
||||
} else {
|
||||
// the default is based on 'xpack.ml.enabled', which is enabled in base test class.
|
||||
internalCluster().startNode();
|
||||
|
@ -165,14 +165,13 @@ public class BasicDistributedJobsIT extends BaseMlIntegTestCase {
|
|||
|
||||
DiscoveryNode node = clusterState.nodes().resolveNode(task.getExecutorNode());
|
||||
Map<String, String> expectedNodeAttr = new HashMap<>();
|
||||
expectedNodeAttr.put(MachineLearning.ALLOCATION_ENABLED_ATTR, "true");
|
||||
expectedNodeAttr.put(MAX_RUNNING_JOBS_PER_NODE.getKey(), "10");
|
||||
assertEquals(expectedNodeAttr, node.getAttributes());
|
||||
assertEquals(JobState.OPENED, task.getStatus());
|
||||
});
|
||||
|
||||
logger.info("stop the only running ml node");
|
||||
internalCluster().stopRandomNode(settings -> settings.getAsBoolean(MachineLearning.ALLOCATION_ENABLED.getKey(), true));
|
||||
internalCluster().stopRandomNode(settings -> settings.getAsBoolean(MachineLearning.ML_ENABLED.getKey(), true));
|
||||
ensureStableCluster(2);
|
||||
assertBusy(() -> {
|
||||
// job should get and remain in a failed state:
|
||||
|
@ -186,7 +185,7 @@ public class BasicDistributedJobsIT extends BaseMlIntegTestCase {
|
|||
});
|
||||
|
||||
logger.info("start ml node");
|
||||
internalCluster().startNode(Settings.builder().put(MachineLearning.ALLOCATION_ENABLED.getKey(), true));
|
||||
internalCluster().startNode(Settings.builder().put(MachineLearning.ML_ENABLED.getKey(), true));
|
||||
ensureStableCluster(3);
|
||||
assertBusy(() -> {
|
||||
// job should be re-opened:
|
||||
|
@ -197,7 +196,6 @@ public class BasicDistributedJobsIT extends BaseMlIntegTestCase {
|
|||
assertNotNull(task.getExecutorNode());
|
||||
DiscoveryNode node = clusterState.nodes().resolveNode(task.getExecutorNode());
|
||||
Map<String, String> expectedNodeAttr = new HashMap<>();
|
||||
expectedNodeAttr.put(MachineLearning.ALLOCATION_ENABLED_ATTR, "true");
|
||||
expectedNodeAttr.put(MAX_RUNNING_JOBS_PER_NODE.getKey(), "10");
|
||||
assertEquals(expectedNodeAttr, node.getAttributes());
|
||||
assertEquals(JobState.OPENED, task.getStatus());
|
||||
|
@ -211,12 +209,12 @@ public class BasicDistributedJobsIT extends BaseMlIntegTestCase {
|
|||
// start non ml node, but that will hold the indices
|
||||
logger.info("Start non ml node:");
|
||||
String nonMlNode = internalCluster().startNode(Settings.builder()
|
||||
.put(MachineLearning.ALLOCATION_ENABLED.getKey(), false));
|
||||
.put(MachineLearning.ML_ENABLED.getKey(), false));
|
||||
logger.info("Starting ml nodes");
|
||||
internalCluster().startNodes(numMlNodes, Settings.builder()
|
||||
.put("node.data", false)
|
||||
.put("node.master", false)
|
||||
.put(MachineLearning.ALLOCATION_ENABLED.getKey(), true).build());
|
||||
.put(MachineLearning.ML_ENABLED.getKey(), true).build());
|
||||
ensureStableCluster(numMlNodes + 1);
|
||||
|
||||
int maxConcurrentJobAllocations = randomIntBetween(1, 4);
|
||||
|
@ -273,7 +271,7 @@ public class BasicDistributedJobsIT extends BaseMlIntegTestCase {
|
|||
Runnable r = () -> {
|
||||
try {
|
||||
internalCluster()
|
||||
.stopRandomNode(settings -> settings.getAsBoolean(MachineLearning.ALLOCATION_ENABLED.getKey(), false));
|
||||
.stopRandomNode(settings -> settings.getAsBoolean(MachineLearning.ML_ENABLED.getKey(), false));
|
||||
} catch (IOException e) {
|
||||
logger.error("error stopping node", e);
|
||||
}
|
||||
|
@ -294,7 +292,7 @@ public class BasicDistributedJobsIT extends BaseMlIntegTestCase {
|
|||
internalCluster().startNodes(numMlNodes, Settings.builder()
|
||||
.put("node.data", false)
|
||||
.put("node.master", false)
|
||||
.put(MachineLearning.ALLOCATION_ENABLED.getKey(), true).build());
|
||||
.put(MachineLearning.ML_ENABLED.getKey(), true).build());
|
||||
|
||||
ensureStableCluster(1 + numMlNodes);
|
||||
assertBusy(() -> {
|
||||
|
|
Loading…
Reference in New Issue