diff --git a/docs/reference/ml/anomaly-detection/apis/get-ml-info.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-ml-info.asciidoc index c3b8dc2e1f6..7d5133b6a9d 100644 --- a/docs/reference/ml/anomaly-detection/apis/get-ml-info.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/get-ml-info.asciidoc @@ -113,9 +113,12 @@ This is a possible response: "version": "7.0.0", "build_hash": "99a07c016d5a73" }, - "limits" : { } + "limits" : { + "effective_max_model_memory_limit": "28961mb" + } } ---- // TESTRESPONSE[s/"upgrade_mode": false/"upgrade_mode": $body.upgrade_mode/] // TESTRESPONSE[s/"version": "7.0.0",/"version": "$body.native_code.version",/] // TESTRESPONSE[s/"build_hash": "99a07c016d5a73"/"build_hash": "$body.native_code.build_hash"/] +// TESTRESPONSE[s/"effective_max_model_memory_limit": "28961mb"/"effective_max_model_memory_limit": "$body.limits.effective_max_model_memory_limit"/] diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportMlInfoAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportMlInfoAction.java index d1d828de1f2..2735e0d7379 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportMlInfoAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportMlInfoAction.java @@ -10,6 +10,8 @@ import org.apache.logging.log4j.Logger; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.HandledTransportAction; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.unit.ByteSizeUnit; @@ -22,9 +24,11 @@ import org.elasticsearch.xpack.core.ml.MachineLearningField; import org.elasticsearch.xpack.core.ml.MlMetadata; import org.elasticsearch.xpack.core.ml.action.MlInfoAction; import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig; +import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; import org.elasticsearch.xpack.core.ml.job.config.AnalysisLimits; import org.elasticsearch.xpack.core.ml.job.config.CategorizationAnalyzerConfig; import org.elasticsearch.xpack.core.ml.job.config.Job; +import org.elasticsearch.xpack.ml.MachineLearning; import org.elasticsearch.xpack.ml.process.NativeController; import org.elasticsearch.xpack.ml.process.NativeControllerHolder; @@ -119,11 +123,50 @@ public class TransportMlInfoAction extends HandledTransportAction nodeAttributes = node.getAttributes(); + String machineMemoryStr = nodeAttributes.get(MachineLearning.MACHINE_MEMORY_NODE_ATTR); + if (machineMemoryStr == null) { + continue; + } + long machineMemory; + try { + machineMemory = Long.parseLong(machineMemoryStr); + } catch (NumberFormatException e) { + continue; + } + maxMlMemory = Math.max(maxMlMemory, machineMemory * maxMachineMemoryPercent / 100); + } + + if (maxMlMemory <= 0) { + // This implies there are currently no ML nodes in the cluster, so we + // have no idea what the effective limit would be if one were added + return null; + } + + maxMlMemory -= Math.max(Job.PROCESS_MEMORY_OVERHEAD.getBytes(), DataFrameAnalyticsConfig.PROCESS_MEMORY_OVERHEAD.getBytes()); + maxMlMemory -= MachineLearning.NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(); + return new ByteSizeValue(Math.max(0L, maxMlMemory) / 1024 / 1024, ByteSizeUnit.MB); + } + private Map limits() { Map limits = new HashMap<>(); + ByteSizeValue effectiveMaxModelMemoryLimit = calculateEffectiveMaxModelMemoryLimit( + clusterService.getClusterSettings().get(MachineLearning.MAX_MACHINE_MEMORY_PERCENT), clusterService.state().getNodes()); ByteSizeValue maxModelMemoryLimit = clusterService.getClusterSettings().get(MachineLearningField.MAX_MODEL_MEMORY_LIMIT); if (maxModelMemoryLimit != null && maxModelMemoryLimit.getBytes() > 0) { - limits.put("max_model_memory_limit", maxModelMemoryLimit); + limits.put("max_model_memory_limit", maxModelMemoryLimit.getStringRep()); + if (effectiveMaxModelMemoryLimit == null || effectiveMaxModelMemoryLimit.compareTo(maxModelMemoryLimit) > 0) { + effectiveMaxModelMemoryLimit = maxModelMemoryLimit; + } + } + if (effectiveMaxModelMemoryLimit != null) { + limits.put("effective_max_model_memory_limit", effectiveMaxModelMemoryLimit.getStringRep()); } return limits; } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportMlInfoActionTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportMlInfoActionTests.java new file mode 100644 index 00000000000..553c0beaa99 --- /dev/null +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportMlInfoActionTests.java @@ -0,0 +1,65 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +package org.elasticsearch.xpack.ml.action; + +import org.elasticsearch.Version; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.common.transport.TransportAddress; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; +import org.elasticsearch.xpack.core.ml.job.config.Job; +import org.elasticsearch.xpack.ml.MachineLearning; + +import java.net.InetAddress; +import java.util.Collections; + +import static org.hamcrest.Matchers.lessThanOrEqualTo; +import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.Matchers.nullValue; + +public class TransportMlInfoActionTests extends ESTestCase { + + public void testCalculateEffectiveMaxModelMemoryLimit() { + + int mlMemoryPercent = randomIntBetween(5, 90); + long highestMlMachineMemory = -1; + + DiscoveryNodes.Builder builder = DiscoveryNodes.builder(); + for (int i = randomIntBetween(1, 10); i > 0; --i) { + String nodeName = "_node_name" + i; + String nodeId = "_node_id" + i; + TransportAddress ta = new TransportAddress(InetAddress.getLoopbackAddress(), 9300 + i); + if (randomBoolean()) { + // Not an ML node + builder.add(new DiscoveryNode(nodeName, nodeId, ta, Collections.emptyMap(), Collections.emptySet(), Version.CURRENT)); + } else { + // ML node + long machineMemory = randomLongBetween(2000000000L, 100000000000L); + highestMlMachineMemory = Math.max(machineMemory, highestMlMachineMemory); + builder.add(new DiscoveryNode(nodeName, nodeId, ta, + Collections.singletonMap(MachineLearning.MACHINE_MEMORY_NODE_ATTR, String.valueOf(machineMemory)), + Collections.emptySet(), Version.CURRENT)); + } + } + DiscoveryNodes nodes = builder.build(); + + ByteSizeValue effectiveMaxModelMemoryLimit = + TransportMlInfoAction.calculateEffectiveMaxModelMemoryLimit(mlMemoryPercent, nodes); + + if (highestMlMachineMemory < 0) { + assertThat(effectiveMaxModelMemoryLimit, nullValue()); + } else { + assertThat(effectiveMaxModelMemoryLimit, notNullValue()); + assertThat(effectiveMaxModelMemoryLimit.getBytes() + + Math.max(Job.PROCESS_MEMORY_OVERHEAD.getBytes(), DataFrameAnalyticsConfig.PROCESS_MEMORY_OVERHEAD.getBytes()) + + MachineLearning.NATIVE_EXECUTABLE_CODE_OVERHEAD.getBytes(), + lessThanOrEqualTo(highestMlMachineMemory * mlMemoryPercent / 100)); + } + } +} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/ml_info.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/ml_info.yml index 585ab61fb05..d3e3b002234 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/ml_info.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/ml_info.yml @@ -15,7 +15,9 @@ teardown: - match: { defaults.anomaly_detectors.categorization_examples_limit: 4 } - match: { defaults.anomaly_detectors.model_snapshot_retention_days: 1 } - match: { defaults.datafeeds.scroll_size: 1000 } - - match: { limits: {} } + - is_false: limits.max_model_memory_limit + # We cannot assert an exact value for the next one as it will vary depending on the test machine + - match: { limits.effective_max_model_memory_limit: "/\\d+[kmg]?b/" } - match: { upgrade_mode: false } - do: @@ -32,6 +34,8 @@ teardown: - match: { defaults.anomaly_detectors.model_snapshot_retention_days: 1 } - match: { defaults.datafeeds.scroll_size: 1000 } - match: { limits.max_model_memory_limit: "512mb" } + # We cannot assert an exact value for the next one as it will vary depending on the test machine + - match: { limits.effective_max_model_memory_limit: "/\\d+[kmg]?b/" } - match: { upgrade_mode: false } - do: @@ -48,4 +52,42 @@ teardown: - match: { defaults.anomaly_detectors.model_snapshot_retention_days: 1 } - match: { defaults.datafeeds.scroll_size: 1000 } - match: { limits.max_model_memory_limit: "6gb" } + # We cannot assert an exact value for the next one as it will vary depending on the test machine + - match: { limits.effective_max_model_memory_limit: "/\\d+[kmg]?b/" } + - match: { upgrade_mode: false } + + - do: + cluster.put_settings: + body: + persistent: + xpack.ml.max_model_memory_limit: "6gb" + + - do: + ml.info: {} + - match: { defaults.anomaly_detectors.categorization_analyzer.tokenizer: "ml_classic" } + - match: { defaults.anomaly_detectors.model_memory_limit: "1gb" } + - match: { defaults.anomaly_detectors.categorization_examples_limit: 4 } + - match: { defaults.anomaly_detectors.model_snapshot_retention_days: 1 } + - match: { defaults.datafeeds.scroll_size: 1000 } + - match: { limits.max_model_memory_limit: "6gb" } + # We cannot assert an exact value for the next one as it will vary depending on the test machine + - match: { limits.effective_max_model_memory_limit: "/\\d+[kmg]?b/" } + - match: { upgrade_mode: false } + + - do: + cluster.put_settings: + body: + persistent: + xpack.ml.max_model_memory_limit: "1mb" + + - do: + ml.info: {} + - match: { defaults.anomaly_detectors.categorization_analyzer.tokenizer: "ml_classic" } + - match: { defaults.anomaly_detectors.model_memory_limit: "1mb" } + - match: { defaults.anomaly_detectors.categorization_examples_limit: 4 } + - match: { defaults.anomaly_detectors.model_snapshot_retention_days: 1 } + - match: { defaults.datafeeds.scroll_size: 1000 } + - match: { limits.max_model_memory_limit: "1mb" } + # This time we can assert an exact value for the next one because the hard limit is so low + - match: { limits.effective_max_model_memory_limit: "1mb" } - match: { upgrade_mode: false }