diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java
index 2fc23acd134..0a1a18eeb44 100644
--- a/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java
+++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java
@@ -29,6 +29,7 @@ import org.apache.lucene.util.BytesRef;
import org.elasticsearch.client.RequestConverters.EndpointBuilder;
import org.elasticsearch.client.core.PageParams;
import org.elasticsearch.client.ml.CloseJobRequest;
+import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest;
import org.elasticsearch.client.ml.DeleteCalendarEventRequest;
import org.elasticsearch.client.ml.DeleteCalendarJobRequest;
import org.elasticsearch.client.ml.DeleteCalendarRequest;
@@ -701,12 +702,17 @@ final class MLRequestConverters {
return request;
}
- static Request estimateMemoryUsage(PutDataFrameAnalyticsRequest estimateRequest) throws IOException {
- String endpoint = new EndpointBuilder()
- .addPathPartAsIs("_ml", "data_frame", "analytics", "_estimate_memory_usage")
- .build();
- Request request = new Request(HttpPost.METHOD_NAME, endpoint);
- request.setEntity(createEntity(estimateRequest, REQUEST_BODY_CONTENT_TYPE));
+ static Request explainDataFrameAnalytics(ExplainDataFrameAnalyticsRequest explainRequest) throws IOException {
+ EndpointBuilder endpoint = new EndpointBuilder().addPathPartAsIs("_ml", "data_frame", "analytics");
+ if (explainRequest.getId() != null) {
+ endpoint.addPathPart(explainRequest.getId());
+ }
+ endpoint.addPathPartAsIs("_explain");
+
+ Request request = new Request(HttpPost.METHOD_NAME, endpoint.build());
+ if (explainRequest.getConfig() != null) {
+ request.setEntity(createEntity(explainRequest.getConfig(), REQUEST_BODY_CONTENT_TYPE));
+ }
return request;
}
diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java
index 2ddc8839f96..468cd535c01 100644
--- a/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java
+++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java
@@ -22,6 +22,8 @@ import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.ml.CloseJobRequest;
import org.elasticsearch.client.ml.CloseJobResponse;
+import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest;
+import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsResponse;
import org.elasticsearch.client.ml.DeleteCalendarEventRequest;
import org.elasticsearch.client.ml.DeleteCalendarJobRequest;
import org.elasticsearch.client.ml.DeleteCalendarRequest;
@@ -34,7 +36,6 @@ import org.elasticsearch.client.ml.DeleteForecastRequest;
import org.elasticsearch.client.ml.DeleteJobRequest;
import org.elasticsearch.client.ml.DeleteJobResponse;
import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
-import org.elasticsearch.client.ml.EstimateMemoryUsageResponse;
import org.elasticsearch.client.ml.EvaluateDataFrameRequest;
import org.elasticsearch.client.ml.EvaluateDataFrameResponse;
import org.elasticsearch.client.ml.FindFileStructureRequest;
@@ -2249,46 +2250,46 @@ public final class MachineLearningClient {
}
/**
- * Estimates memory usage for the given Data Frame Analytics
+ * Explains the given Data Frame Analytics
*
* For additional info
- * see
- * Estimate Memory Usage for Data Frame Analytics documentation
+ * see
+ * Explain Data Frame Analytics documentation
*
- * @param request The {@link PutDataFrameAnalyticsRequest}
+ * @param request The {@link ExplainDataFrameAnalyticsRequest}
* @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
- * @return {@link EstimateMemoryUsageResponse} response object
+ * @return {@link ExplainDataFrameAnalyticsResponse} response object
* @throws IOException when there is a serialization issue sending the request or receiving the response
*/
- public EstimateMemoryUsageResponse estimateMemoryUsage(PutDataFrameAnalyticsRequest request,
- RequestOptions options) throws IOException {
+ public ExplainDataFrameAnalyticsResponse explainDataFrameAnalytics(ExplainDataFrameAnalyticsRequest request,
+ RequestOptions options) throws IOException {
return restHighLevelClient.performRequestAndParseEntity(
request,
- MLRequestConverters::estimateMemoryUsage,
+ MLRequestConverters::explainDataFrameAnalytics,
options,
- EstimateMemoryUsageResponse::fromXContent,
+ ExplainDataFrameAnalyticsResponse::fromXContent,
Collections.emptySet());
}
/**
- * Estimates memory usage for the given Data Frame Analytics asynchronously and notifies listener upon completion
+ * Explains the given Data Frame Analytics asynchronously and notifies listener upon completion
*
* For additional info
- * see
- * Estimate Memory Usage for Data Frame Analytics documentation
+ * see
+ * Explain Data Frame Analytics documentation
*
- * @param request The {@link PutDataFrameAnalyticsRequest}
+ * @param request The {@link ExplainDataFrameAnalyticsRequest}
* @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
* @param listener Listener to be notified upon request completion
* @return cancellable that may be used to cancel the request
*/
- public Cancellable estimateMemoryUsageAsync(PutDataFrameAnalyticsRequest request, RequestOptions options,
- ActionListener listener) {
+ public Cancellable explainDataFrameAnalyticsAsync(ExplainDataFrameAnalyticsRequest request, RequestOptions options,
+ ActionListener listener) {
return restHighLevelClient.performRequestAsyncAndParseEntity(
request,
- MLRequestConverters::estimateMemoryUsage,
+ MLRequestConverters::explainDataFrameAnalytics,
options,
- EstimateMemoryUsageResponse::fromXContent,
+ ExplainDataFrameAnalyticsResponse::fromXContent,
listener,
Collections.emptySet());
}
diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequest.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequest.java
new file mode 100644
index 00000000000..880e87b2eea
--- /dev/null
+++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequest.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.client.ml;
+
+import org.elasticsearch.client.Validatable;
+import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsConfig;
+import org.elasticsearch.common.Nullable;
+
+import java.util.Objects;
+
+/**
+ * Request to explain the following about a data frame analytics job:
+ *
+ * - field selection: which fields are included or are not in the analysis
+ * - memory estimation: how much memory the job is estimated to require
+ *
+ */
+public class ExplainDataFrameAnalyticsRequest implements Validatable {
+
+ private final String id;
+ private final DataFrameAnalyticsConfig config;
+
+ public ExplainDataFrameAnalyticsRequest(String id) {
+ this.id = Objects.requireNonNull(id);
+ this.config = null;
+ }
+
+ public ExplainDataFrameAnalyticsRequest(DataFrameAnalyticsConfig config) {
+ this.id = null;
+ this.config = Objects.requireNonNull(config);
+ }
+
+ @Nullable
+ public String getId() {
+ return id;
+ }
+
+ @Nullable
+ public DataFrameAnalyticsConfig getConfig() {
+ return config;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ ExplainDataFrameAnalyticsRequest other = (ExplainDataFrameAnalyticsRequest) o;
+ return Objects.equals(id, other.id) && Objects.equals(config, other.config);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(id, config);
+ }
+}
diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponse.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponse.java
new file mode 100644
index 00000000000..5879ffc7154
--- /dev/null
+++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponse.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.client.ml;
+
+import org.elasticsearch.client.ml.dataframe.explain.FieldSelection;
+import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation;
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.xcontent.ConstructingObjectParser;
+import org.elasticsearch.common.xcontent.ToXContentObject;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentParser;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Objects;
+
+public class ExplainDataFrameAnalyticsResponse implements ToXContentObject {
+
+ public static final ParseField TYPE = new ParseField("explain_data_frame_analytics_response");
+
+ public static final ParseField FIELD_SELECTION = new ParseField("field_selection");
+ public static final ParseField MEMORY_ESTIMATION = new ParseField("memory_estimation");
+
+ public static ExplainDataFrameAnalyticsResponse fromXContent(XContentParser parser) throws IOException {
+ return PARSER.parse(parser, null);
+ }
+
+ @SuppressWarnings("unchecked")
+ static final ConstructingObjectParser PARSER =
+ new ConstructingObjectParser<>(
+ TYPE.getPreferredName(), true,
+ args -> new ExplainDataFrameAnalyticsResponse((List) args[0], (MemoryEstimation) args[1]));
+
+ static {
+ PARSER.declareObjectArray(ConstructingObjectParser.constructorArg(), FieldSelection.PARSER, FIELD_SELECTION);
+ PARSER.declareObject(ConstructingObjectParser.constructorArg(), MemoryEstimation.PARSER, MEMORY_ESTIMATION);
+ }
+
+ private final List fieldSelection;
+ private final MemoryEstimation memoryEstimation;
+
+ public ExplainDataFrameAnalyticsResponse(List fieldSelection, MemoryEstimation memoryEstimation) {
+ this.fieldSelection = Objects.requireNonNull(fieldSelection);
+ this.memoryEstimation = Objects.requireNonNull(memoryEstimation);
+ }
+
+ @Override
+ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.startObject();
+ builder.field(FIELD_SELECTION.getPreferredName(), fieldSelection);
+ builder.field(MEMORY_ESTIMATION.getPreferredName(), memoryEstimation);
+ builder.endObject();
+ return builder;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (this == other) return true;
+ if (other == null || getClass() != other.getClass()) return false;
+
+ ExplainDataFrameAnalyticsResponse that = (ExplainDataFrameAnalyticsResponse) other;
+ return Objects.equals(fieldSelection, that.fieldSelection)
+ && Objects.equals(memoryEstimation, that.memoryEstimation);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(fieldSelection, memoryEstimation);
+ }
+
+ public MemoryEstimation getMemoryEstimation() {
+ return memoryEstimation;
+ }
+
+ public List getFieldSelection() {
+ return fieldSelection;
+ }
+}
diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelection.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelection.java
new file mode 100644
index 00000000000..4483b6fa5e0
--- /dev/null
+++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelection.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.client.ml.dataframe.explain;
+
+import org.elasticsearch.common.Nullable;
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.xcontent.ConstructingObjectParser;
+import org.elasticsearch.common.xcontent.ObjectParser;
+import org.elasticsearch.common.xcontent.ToXContentObject;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentParser;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Objects;
+import java.util.Set;
+
+public class FieldSelection implements ToXContentObject {
+
+ private static final ParseField NAME = new ParseField("name");
+ private static final ParseField MAPPING_TYPES = new ParseField("mapping_types");
+ private static final ParseField IS_INCLUDED = new ParseField("is_included");
+ private static final ParseField IS_REQUIRED = new ParseField("is_required");
+ private static final ParseField FEATURE_TYPE = new ParseField("feature_type");
+ private static final ParseField REASON = new ParseField("reason");
+
+ public enum FeatureType {
+ CATEGORICAL, NUMERICAL;
+
+ public static FeatureType fromString(String value) {
+ return FeatureType.valueOf(value.toUpperCase(Locale.ROOT));
+ }
+
+ @Override
+ public String toString() {
+ return name().toLowerCase(Locale.ROOT);
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ public static ConstructingObjectParser PARSER = new ConstructingObjectParser<>("field_selection", true,
+ a -> new FieldSelection((String) a[0], new HashSet<>((List) a[1]), (boolean) a[2], (boolean) a[3], (FeatureType) a[4],
+ (String) a[5]));
+
+ static {
+ PARSER.declareString(ConstructingObjectParser.constructorArg(), NAME);
+ PARSER.declareStringArray(ConstructingObjectParser.constructorArg(), MAPPING_TYPES);
+ PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_INCLUDED);
+ PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_REQUIRED);
+ PARSER.declareField(ConstructingObjectParser.optionalConstructorArg(), p -> {
+ if (p.currentToken() == XContentParser.Token.VALUE_STRING) {
+ return FeatureType.fromString(p.text());
+ }
+ throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]");
+ }, FEATURE_TYPE, ObjectParser.ValueType.STRING);
+ PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), REASON);
+ }
+
+ private final String name;
+ private final Set mappingTypes;
+ private final boolean isIncluded;
+ private final boolean isRequired;
+ private final FeatureType featureType;
+ private final String reason;
+
+ public static FieldSelection included(String name, Set mappingTypes, boolean isRequired, FeatureType featureType) {
+ return new FieldSelection(name, mappingTypes, true, isRequired, featureType, null);
+ }
+
+ public static FieldSelection excluded(String name, Set mappingTypes, String reason) {
+ return new FieldSelection(name, mappingTypes, false, false, null, reason);
+ }
+
+ FieldSelection(String name, Set mappingTypes, boolean isIncluded, boolean isRequired, @Nullable FeatureType featureType,
+ @Nullable String reason) {
+ this.name = Objects.requireNonNull(name);
+ this.mappingTypes = Collections.unmodifiableSet(mappingTypes);
+ this.isIncluded = isIncluded;
+ this.isRequired = isRequired;
+ this.featureType = featureType;
+ this.reason = reason;
+ }
+
+ @Override
+ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.startObject();
+ builder.field(NAME.getPreferredName(), name);
+ builder.field(MAPPING_TYPES.getPreferredName(), mappingTypes);
+ builder.field(IS_INCLUDED.getPreferredName(), isIncluded);
+ builder.field(IS_REQUIRED.getPreferredName(), isRequired);
+ if (featureType != null) {
+ builder.field(FEATURE_TYPE.getPreferredName(), featureType);
+ }
+ if (reason != null) {
+ builder.field(REASON.getPreferredName(), reason);
+ }
+ builder.endObject();
+ return builder;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ FieldSelection that = (FieldSelection) o;
+ return Objects.equals(name, that.name)
+ && Objects.equals(mappingTypes, that.mappingTypes)
+ && isIncluded == that.isIncluded
+ && isRequired == that.isRequired
+ && Objects.equals(featureType, that.featureType)
+ && Objects.equals(reason, that.reason);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(name, mappingTypes, isIncluded, isRequired, featureType, reason);
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public Set getMappingTypes() {
+ return mappingTypes;
+ }
+
+ public boolean isIncluded() {
+ return isIncluded;
+ }
+
+ public boolean isRequired() {
+ return isRequired;
+ }
+
+ @Nullable
+ public FeatureType getFeatureType() {
+ return featureType;
+ }
+
+ @Nullable
+ public String getReason() {
+ return reason;
+ }
+}
diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponse.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimation.java
similarity index 81%
rename from client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponse.java
rename to client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimation.java
index c97cc545cdb..9151b8ce5dd 100644
--- a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponse.java
+++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimation.java
@@ -16,8 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
-
-package org.elasticsearch.client.ml;
+package org.elasticsearch.client.ml.dataframe.explain;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.ParseField;
@@ -26,23 +25,19 @@ import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.ToXContentObject;
import org.elasticsearch.common.xcontent.XContentBuilder;
-import org.elasticsearch.common.xcontent.XContentParser;
import java.io.IOException;
import java.util.Objects;
import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg;
-public class EstimateMemoryUsageResponse implements ToXContentObject {
-
+public class MemoryEstimation implements ToXContentObject {
+
public static final ParseField EXPECTED_MEMORY_WITHOUT_DISK = new ParseField("expected_memory_without_disk");
public static final ParseField EXPECTED_MEMORY_WITH_DISK = new ParseField("expected_memory_with_disk");
- static final ConstructingObjectParser PARSER =
- new ConstructingObjectParser<>(
- "estimate_memory_usage_response",
- true,
- args -> new EstimateMemoryUsageResponse((ByteSizeValue) args[0], (ByteSizeValue) args[1]));
+ public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>("memory_estimation", true,
+ a -> new MemoryEstimation((ByteSizeValue) a[0], (ByteSizeValue) a[1]));
static {
PARSER.declareField(
@@ -57,14 +52,10 @@ public class EstimateMemoryUsageResponse implements ToXContentObject {
ObjectParser.ValueType.VALUE);
}
- public static EstimateMemoryUsageResponse fromXContent(XContentParser parser) {
- return PARSER.apply(parser, null);
- }
-
private final ByteSizeValue expectedMemoryWithoutDisk;
private final ByteSizeValue expectedMemoryWithDisk;
- public EstimateMemoryUsageResponse(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) {
+ public MemoryEstimation(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) {
this.expectedMemoryWithoutDisk = expectedMemoryWithoutDisk;
this.expectedMemoryWithDisk = expectedMemoryWithDisk;
}
@@ -99,7 +90,7 @@ public class EstimateMemoryUsageResponse implements ToXContentObject {
return false;
}
- EstimateMemoryUsageResponse that = (EstimateMemoryUsageResponse) other;
+ MemoryEstimation that = (MemoryEstimation) other;
return Objects.equals(expectedMemoryWithoutDisk, that.expectedMemoryWithoutDisk)
&& Objects.equals(expectedMemoryWithDisk, that.expectedMemoryWithDisk);
}
diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java
index db59054cdb8..633e5363ff1 100644
--- a/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java
+++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java
@@ -25,6 +25,7 @@ import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpPut;
import org.elasticsearch.client.core.PageParams;
import org.elasticsearch.client.ml.CloseJobRequest;
+import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest;
import org.elasticsearch.client.ml.DeleteCalendarEventRequest;
import org.elasticsearch.client.ml.DeleteCalendarJobRequest;
import org.elasticsearch.client.ml.DeleteCalendarRequest;
@@ -788,14 +789,25 @@ public class MLRequestConvertersTests extends ESTestCase {
}
}
- public void testEstimateMemoryUsage() throws IOException {
- PutDataFrameAnalyticsRequest estimateRequest = new PutDataFrameAnalyticsRequest(randomDataFrameAnalyticsConfig());
- Request request = MLRequestConverters.estimateMemoryUsage(estimateRequest);
- assertEquals(HttpPost.METHOD_NAME, request.getMethod());
- assertEquals("/_ml/data_frame/analytics/_estimate_memory_usage", request.getEndpoint());
- try (XContentParser parser = createParser(JsonXContent.jsonXContent, request.getEntity().getContent())) {
- DataFrameAnalyticsConfig parsedConfig = DataFrameAnalyticsConfig.fromXContent(parser);
- assertThat(parsedConfig, equalTo(estimateRequest.getConfig()));
+ public void testExplainDataFrameAnalytics() throws IOException {
+ // Request with config
+ {
+ ExplainDataFrameAnalyticsRequest estimateRequest = new ExplainDataFrameAnalyticsRequest(randomDataFrameAnalyticsConfig());
+ Request request = MLRequestConverters.explainDataFrameAnalytics(estimateRequest);
+ assertEquals(HttpPost.METHOD_NAME, request.getMethod());
+ assertEquals("/_ml/data_frame/analytics/_explain", request.getEndpoint());
+ try (XContentParser parser = createParser(JsonXContent.jsonXContent, request.getEntity().getContent())) {
+ DataFrameAnalyticsConfig parsedConfig = DataFrameAnalyticsConfig.fromXContent(parser);
+ assertThat(parsedConfig, equalTo(estimateRequest.getConfig()));
+ }
+ }
+ // Request with id
+ {
+ ExplainDataFrameAnalyticsRequest estimateRequest = new ExplainDataFrameAnalyticsRequest("foo");
+ Request request = MLRequestConverters.explainDataFrameAnalytics(estimateRequest);
+ assertEquals(HttpPost.METHOD_NAME, request.getMethod());
+ assertEquals("/_ml/data_frame/analytics/foo/_explain", request.getEndpoint());
+ assertNull(request.getEntity());
}
}
diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java
index 361b3674550..efb62b3f526 100644
--- a/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java
+++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java
@@ -32,6 +32,8 @@ import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.client.indices.GetIndexRequest;
import org.elasticsearch.client.ml.CloseJobRequest;
import org.elasticsearch.client.ml.CloseJobResponse;
+import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest;
+import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsResponse;
import org.elasticsearch.client.ml.DeleteCalendarEventRequest;
import org.elasticsearch.client.ml.DeleteCalendarJobRequest;
import org.elasticsearch.client.ml.DeleteCalendarRequest;
@@ -44,7 +46,6 @@ import org.elasticsearch.client.ml.DeleteForecastRequest;
import org.elasticsearch.client.ml.DeleteJobRequest;
import org.elasticsearch.client.ml.DeleteJobResponse;
import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
-import org.elasticsearch.client.ml.EstimateMemoryUsageResponse;
import org.elasticsearch.client.ml.EvaluateDataFrameRequest;
import org.elasticsearch.client.ml.EvaluateDataFrameResponse;
import org.elasticsearch.client.ml.FindFileStructureRequest;
@@ -140,6 +141,8 @@ import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.Binar
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetric;
+import org.elasticsearch.client.ml.dataframe.explain.FieldSelection;
+import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation;
import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
import org.elasticsearch.client.ml.inference.TrainedModelConfig;
import org.elasticsearch.client.ml.inference.TrainedModelDefinition;
@@ -1996,8 +1999,8 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
highLevelClient().indices().create(new CreateIndexRequest(indexName).mapping(mapping), RequestOptions.DEFAULT);
}
- public void testEstimateMemoryUsage() throws IOException {
- String indexName = "estimate-test-index";
+ public void testExplainDataFrameAnalytics() throws IOException {
+ String indexName = "explain-df-test-index";
createIndex(indexName, mappingForSoftClassification());
BulkRequest bulk1 = new BulkRequest()
.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
@@ -2007,8 +2010,8 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
highLevelClient().bulk(bulk1, RequestOptions.DEFAULT);
MachineLearningClient machineLearningClient = highLevelClient().machineLearning();
- PutDataFrameAnalyticsRequest estimateMemoryUsageRequest =
- new PutDataFrameAnalyticsRequest(
+ ExplainDataFrameAnalyticsRequest explainRequest =
+ new ExplainDataFrameAnalyticsRequest(
DataFrameAnalyticsConfig.builder()
.setSource(DataFrameAnalyticsSource.builder().setIndex(indexName).build())
.setAnalysis(OutlierDetection.createDefault())
@@ -2019,11 +2022,16 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
ByteSizeValue upperBound = new ByteSizeValue(1, ByteSizeUnit.GB);
// Data Frame has 10 rows, expect that the returned estimates fall within (1kB, 1GB) range.
- EstimateMemoryUsageResponse response1 =
- execute(
- estimateMemoryUsageRequest, machineLearningClient::estimateMemoryUsage, machineLearningClient::estimateMemoryUsageAsync);
- assertThat(response1.getExpectedMemoryWithoutDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound)));
- assertThat(response1.getExpectedMemoryWithDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound)));
+ ExplainDataFrameAnalyticsResponse response1 = execute(explainRequest, machineLearningClient::explainDataFrameAnalytics,
+ machineLearningClient::explainDataFrameAnalyticsAsync);
+
+ MemoryEstimation memoryEstimation1 = response1.getMemoryEstimation();
+ assertThat(memoryEstimation1.getExpectedMemoryWithoutDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound)));
+ assertThat(memoryEstimation1.getExpectedMemoryWithDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound)));
+
+ List fieldSelection = response1.getFieldSelection();
+ assertThat(fieldSelection.size(), equalTo(3));
+ assertThat(fieldSelection.stream().map(FieldSelection::getName).collect(Collectors.toList()), contains("dataset", "label", "p"));
BulkRequest bulk2 = new BulkRequest()
.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
@@ -2033,15 +2041,16 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
highLevelClient().bulk(bulk2, RequestOptions.DEFAULT);
// Data Frame now has 100 rows, expect that the returned estimates will be greater than or equal to the previous ones.
- EstimateMemoryUsageResponse response2 =
+ ExplainDataFrameAnalyticsResponse response2 =
execute(
- estimateMemoryUsageRequest, machineLearningClient::estimateMemoryUsage, machineLearningClient::estimateMemoryUsageAsync);
+ explainRequest, machineLearningClient::explainDataFrameAnalytics, machineLearningClient::explainDataFrameAnalyticsAsync);
+ MemoryEstimation memoryEstimation2 = response2.getMemoryEstimation();
assertThat(
- response2.getExpectedMemoryWithoutDisk(),
- allOf(greaterThanOrEqualTo(response1.getExpectedMemoryWithoutDisk()), lessThan(upperBound)));
+ memoryEstimation2.getExpectedMemoryWithoutDisk(),
+ allOf(greaterThanOrEqualTo(memoryEstimation1.getExpectedMemoryWithoutDisk()), lessThan(upperBound)));
assertThat(
- response2.getExpectedMemoryWithDisk(),
- allOf(greaterThanOrEqualTo(response1.getExpectedMemoryWithDisk()), lessThan(upperBound)));
+ memoryEstimation2.getExpectedMemoryWithDisk(),
+ allOf(greaterThanOrEqualTo(memoryEstimation1.getExpectedMemoryWithDisk()), lessThan(upperBound)));
}
public void testGetTrainedModels() throws Exception {
diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java
index da12420535f..8a118672d95 100644
--- a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java
+++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java
@@ -36,6 +36,8 @@ import org.elasticsearch.client.core.PageParams;
import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.client.ml.CloseJobRequest;
import org.elasticsearch.client.ml.CloseJobResponse;
+import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest;
+import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsResponse;
import org.elasticsearch.client.ml.DeleteCalendarEventRequest;
import org.elasticsearch.client.ml.DeleteCalendarJobRequest;
import org.elasticsearch.client.ml.DeleteCalendarRequest;
@@ -48,7 +50,6 @@ import org.elasticsearch.client.ml.DeleteForecastRequest;
import org.elasticsearch.client.ml.DeleteJobRequest;
import org.elasticsearch.client.ml.DeleteJobResponse;
import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
-import org.elasticsearch.client.ml.EstimateMemoryUsageResponse;
import org.elasticsearch.client.ml.EvaluateDataFrameRequest;
import org.elasticsearch.client.ml.EvaluateDataFrameResponse;
import org.elasticsearch.client.ml.FindFileStructureRequest;
@@ -155,6 +156,8 @@ import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.Confu
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric.ConfusionMatrix;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetric;
+import org.elasticsearch.client.ml.dataframe.explain.FieldSelection;
+import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation;
import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
import org.elasticsearch.client.ml.inference.TrainedModelConfig;
import org.elasticsearch.client.ml.inference.TrainedModelDefinition;
@@ -213,6 +216,7 @@ import java.util.zip.GZIPOutputStream;
import static org.hamcrest.Matchers.allOf;
import static org.hamcrest.Matchers.closeTo;
+import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
@@ -3460,10 +3464,10 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
}
}
- public void testEstimateMemoryUsage() throws Exception {
- createIndex("estimate-test-source-index");
+ public void testExplainDataFrameAnalytics() throws Exception {
+ createIndex("explain-df-test-source-index");
BulkRequest bulkRequest =
- new BulkRequest("estimate-test-source-index")
+ new BulkRequest("explain-df-test-source-index")
.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
for (int i = 0; i < 10; ++i) {
bulkRequest.add(new IndexRequest().source(XContentType.JSON, "timestamp", 123456789L, "total", 10L));
@@ -3471,22 +3475,33 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
RestHighLevelClient client = highLevelClient();
client.bulk(bulkRequest, RequestOptions.DEFAULT);
{
- // tag::estimate-memory-usage-request
+ // tag::explain-data-frame-analytics-id-request
+ ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest("existing_job_id"); // <1>
+ // end::explain-data-frame-analytics-id-request
+
+ // tag::explain-data-frame-analytics-config-request
DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder()
- .setSource(DataFrameAnalyticsSource.builder().setIndex("estimate-test-source-index").build())
+ .setSource(DataFrameAnalyticsSource.builder().setIndex("explain-df-test-source-index").build())
.setAnalysis(OutlierDetection.createDefault())
.build();
- PutDataFrameAnalyticsRequest request = new PutDataFrameAnalyticsRequest(config); // <1>
- // end::estimate-memory-usage-request
+ request = new ExplainDataFrameAnalyticsRequest(config); // <1>
+ // end::explain-data-frame-analytics-config-request
- // tag::estimate-memory-usage-execute
- EstimateMemoryUsageResponse response = client.machineLearning().estimateMemoryUsage(request, RequestOptions.DEFAULT);
- // end::estimate-memory-usage-execute
+ // tag::explain-data-frame-analytics-execute
+ ExplainDataFrameAnalyticsResponse response = client.machineLearning().explainDataFrameAnalytics(request,
+ RequestOptions.DEFAULT);
+ // end::explain-data-frame-analytics-execute
- // tag::estimate-memory-usage-response
- ByteSizeValue expectedMemoryWithoutDisk = response.getExpectedMemoryWithoutDisk(); // <1>
- ByteSizeValue expectedMemoryWithDisk = response.getExpectedMemoryWithDisk(); // <2>
- // end::estimate-memory-usage-response
+ // tag::explain-data-frame-analytics-response
+ List fieldSelection = response.getFieldSelection(); // <1>
+ MemoryEstimation memoryEstimation = response.getMemoryEstimation(); // <2>
+ // end::explain-data-frame-analytics-response
+
+ assertThat(fieldSelection.size(), equalTo(2));
+ assertThat(fieldSelection.stream().map(FieldSelection::getName).collect(Collectors.toList()), contains("timestamp", "total"));
+
+ ByteSizeValue expectedMemoryWithoutDisk = memoryEstimation.getExpectedMemoryWithoutDisk(); // <1>
+ ByteSizeValue expectedMemoryWithDisk = memoryEstimation.getExpectedMemoryWithDisk(); // <2>
// We are pretty liberal here as this test does not aim at verifying concrete numbers but rather end-to-end user workflow.
ByteSizeValue lowerBound = new ByteSizeValue(1, ByteSizeUnit.KB);
@@ -3496,14 +3511,14 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
}
{
DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder()
- .setSource(DataFrameAnalyticsSource.builder().setIndex("estimate-test-source-index").build())
+ .setSource(DataFrameAnalyticsSource.builder().setIndex("explain-df-test-source-index").build())
.setAnalysis(OutlierDetection.createDefault())
.build();
- PutDataFrameAnalyticsRequest request = new PutDataFrameAnalyticsRequest(config);
- // tag::estimate-memory-usage-execute-listener
- ActionListener listener = new ActionListener() {
+ ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest(config);
+ // tag::explain-data-frame-analytics-execute-listener
+ ActionListener listener = new ActionListener() {
@Override
- public void onResponse(EstimateMemoryUsageResponse response) {
+ public void onResponse(ExplainDataFrameAnalyticsResponse response) {
// <1>
}
@@ -3512,15 +3527,15 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
// <2>
}
};
- // end::estimate-memory-usage-execute-listener
+ // end::explain-data-frame-analytics-execute-listener
// Replace the empty listener by a blocking listener in test
final CountDownLatch latch = new CountDownLatch(1);
listener = new LatchedActionListener<>(listener, latch);
- // tag::estimate-memory-usage-execute-async
- client.machineLearning().estimateMemoryUsageAsync(request, RequestOptions.DEFAULT, listener); // <1>
- // end::estimate-memory-usage-execute-async
+ // tag::explain-data-frame-analytics-execute-async
+ client.machineLearning().explainDataFrameAnalyticsAsync(request, RequestOptions.DEFAULT, listener); // <1>
+ // end::explain-data-frame-analytics-execute-async
assertTrue(latch.await(30L, TimeUnit.SECONDS));
}
diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequestTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequestTests.java
new file mode 100644
index 00000000000..7273a40e298
--- /dev/null
+++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequestTests.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.client.ml;
+
+import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsConfig;
+import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsConfigTests;
+import org.elasticsearch.test.ESTestCase;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.nullValue;
+
+public class ExplainDataFrameAnalyticsRequestTests extends ESTestCase {
+
+ public void testIdConstructor() {
+ ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest("foo");
+ assertThat(request.getId(), equalTo("foo"));
+ assertThat(request.getConfig(), is(nullValue()));
+ }
+
+ public void testConfigConstructor() {
+ DataFrameAnalyticsConfig config = DataFrameAnalyticsConfigTests.randomDataFrameAnalyticsConfig();
+
+ ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest(config);
+ assertThat(request.getId(), is(nullValue()));
+ assertThat(request.getConfig(), equalTo(config));
+ }
+}
diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponseTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponseTests.java
new file mode 100644
index 00000000000..f4adbd09ba7
--- /dev/null
+++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponseTests.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.client.ml;
+
+import org.elasticsearch.client.ml.dataframe.explain.FieldSelection;
+import org.elasticsearch.client.ml.dataframe.explain.FieldSelectionTests;
+import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation;
+import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimationTests;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.test.AbstractXContentTestCase;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.IntStream;
+
+public class ExplainDataFrameAnalyticsResponseTests extends AbstractXContentTestCase {
+
+ @Override
+ protected ExplainDataFrameAnalyticsResponse createTestInstance() {
+ int fieldSelectionCount = randomIntBetween(1, 5);
+ List fieldSelection = new ArrayList<>(fieldSelectionCount);
+ IntStream.of(fieldSelectionCount).forEach(i -> fieldSelection.add(FieldSelectionTests.createRandom()));
+ MemoryEstimation memoryEstimation = MemoryEstimationTests.createRandom();
+
+ return new ExplainDataFrameAnalyticsResponse(fieldSelection, memoryEstimation);
+ }
+
+ @Override
+ protected ExplainDataFrameAnalyticsResponse doParseInstance(XContentParser parser) throws IOException {
+ return ExplainDataFrameAnalyticsResponse.fromXContent(parser);
+ }
+
+ @Override
+ protected boolean supportsUnknownFields() {
+ return true;
+ }
+}
diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelectionTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelectionTests.java
new file mode 100644
index 00000000000..e76f39b5b85
--- /dev/null
+++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelectionTests.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.client.ml.dataframe.explain;
+
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.test.AbstractXContentTestCase;
+
+import java.io.IOException;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+public class FieldSelectionTests extends AbstractXContentTestCase {
+
+ public static FieldSelection createRandom() {
+ Set mappingTypes = randomSubsetOf(randomIntBetween(1, 3), "int", "float", "double", "text", "keyword", "ip")
+ .stream().collect(Collectors.toSet());
+ FieldSelection.FeatureType featureType = randomBoolean() ? null : randomFrom(FieldSelection.FeatureType.values());
+ String reason = randomBoolean() ? null : randomAlphaOfLength(20);
+ return new FieldSelection(randomAlphaOfLength(10),
+ mappingTypes,
+ randomBoolean(),
+ randomBoolean(),
+ featureType,
+ reason);
+ }
+
+ @Override
+ protected FieldSelection createTestInstance() {
+ return createRandom();
+ }
+
+ @Override
+ protected FieldSelection doParseInstance(XContentParser parser) throws IOException {
+ return FieldSelection.PARSER.apply(parser, null);
+ }
+
+ @Override
+ protected boolean supportsUnknownFields() {
+ return true;
+ }
+}
diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponseTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimationTests.java
similarity index 68%
rename from client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponseTests.java
rename to client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimationTests.java
index f8f2746204d..884736e573e 100644
--- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponseTests.java
+++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimationTests.java
@@ -7,7 +7,7 @@
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
@@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.elasticsearch.client.ml;
+package org.elasticsearch.client.ml.dataframe.explain;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.xcontent.XContentParser;
@@ -24,22 +24,22 @@ import org.elasticsearch.test.AbstractXContentTestCase;
import java.io.IOException;
-public class EstimateMemoryUsageResponseTests extends AbstractXContentTestCase {
+public class MemoryEstimationTests extends AbstractXContentTestCase {
- public static EstimateMemoryUsageResponse randomResponse() {
- return new EstimateMemoryUsageResponse(
+ public static MemoryEstimation createRandom() {
+ return new MemoryEstimation(
randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null,
randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null);
}
@Override
- protected EstimateMemoryUsageResponse createTestInstance() {
- return randomResponse();
+ protected MemoryEstimation createTestInstance() {
+ return createRandom();
}
@Override
- protected EstimateMemoryUsageResponse doParseInstance(XContentParser parser) throws IOException {
- return EstimateMemoryUsageResponse.fromXContent(parser);
+ protected MemoryEstimation doParseInstance(XContentParser parser) throws IOException {
+ return MemoryEstimation.PARSER.apply(parser, null);
}
@Override
diff --git a/docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc b/docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc
deleted file mode 100644
index 8b7ae0f55c8..00000000000
--- a/docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc
+++ /dev/null
@@ -1,36 +0,0 @@
---
-:api: estimate-memory-usage
-:request: PutDataFrameAnalyticsRequest
-:response: EstimateMemoryUsageResponse
---
-[role="xpack"]
-[id="{upid}-{api}"]
-=== Estimate memory usage API
-
-Estimates memory usage of {dfanalytics}.
-Estimation results can be used when deciding the appropriate value for `model_memory_limit` setting later on.
-
-The API accepts an +{request}+ object and returns an +{response}+.
-
-[id="{upid}-{api}-request"]
-==== Estimate memory usage request
-
-["source","java",subs="attributes,callouts,macros"]
---------------------------------------------------
-include-tagged::{doc-tests-file}[{api}-request]
---------------------------------------------------
-<1> Constructing a new request containing a {dataframe-analytics-config} for which memory usage estimation should be performed
-
-include::../execution.asciidoc[]
-
-[id="{upid}-{api}-response"]
-==== Response
-
-The returned +{response}+ contains the memory usage estimates.
-
-["source","java",subs="attributes,callouts,macros"]
---------------------------------------------------
-include-tagged::{doc-tests-file}[{api}-response]
---------------------------------------------------
-<1> Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory (i.e. without overflowing to disk).
-<2> Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}.
\ No newline at end of file
diff --git a/docs/java-rest/high-level/ml/explain-data-frame-analytics.asciidoc b/docs/java-rest/high-level/ml/explain-data-frame-analytics.asciidoc
new file mode 100644
index 00000000000..3c41531d222
--- /dev/null
+++ b/docs/java-rest/high-level/ml/explain-data-frame-analytics.asciidoc
@@ -0,0 +1,48 @@
+--
+:api: explain-data-frame-analytics
+:request: ExplainDataFrameAnalyticsRequest
+:response: ExplainDataFrameAnalyticsResponse
+--
+[role="xpack"]
+[id="{upid}-{api}"]
+=== Explain {dfanalytics}} API
+
+Explains the following about a {dataframe-analytics-config}:
+
+* field selection: which fields are included or not in the analysis
+* memory estimation: how much memory is estimated to be required. The estimate can be used when deciding the appropriate value for `model_memory_limit` setting later on.
+
+The API accepts an +{request}+ object and returns an +{response}+.
+
+[id="{upid}-{api}-request"]
+==== Explain {dfanalytics} request
+
+The request can be constructed with the id of an existing {dfanalytics-job}.
+
+["source","java",subs="attributes,callouts,macros"]
+--------------------------------------------------
+include-tagged::{doc-tests-file}[{api}-id-request]
+--------------------------------------------------
+<1> Constructing a new request with the id of an existing {dfanalytics-job}
+
+It can also be constructed with a {dataframe-analytics-config} to explain it before creating it.
+
+["source","java",subs="attributes,callouts,macros"]
+--------------------------------------------------
+include-tagged::{doc-tests-file}[{api}-config-request]
+--------------------------------------------------
+<1> Constructing a new request containing a {dataframe-analytics-config}
+
+include::../execution.asciidoc[]
+
+[id="{upid}-{api}-response"]
+==== Response
+
+The returned +{response}+ contains the field selection and the memory usage estimation.
+
+["source","java",subs="attributes,callouts,macros"]
+--------------------------------------------------
+include-tagged::{doc-tests-file}[{api}-response]
+--------------------------------------------------
+<1> A list where each item explains whether a field was selected for analysis or not
+<2> The memory estimation for the {dfanalytics-job}
diff --git a/docs/java-rest/high-level/supported-apis.asciidoc b/docs/java-rest/high-level/supported-apis.asciidoc
index 770866a0755..d691a3ac34b 100644
--- a/docs/java-rest/high-level/supported-apis.asciidoc
+++ b/docs/java-rest/high-level/supported-apis.asciidoc
@@ -300,7 +300,7 @@ The Java High Level REST Client supports the following Machine Learning APIs:
* <<{upid}-start-data-frame-analytics>>
* <<{upid}-stop-data-frame-analytics>>
* <<{upid}-evaluate-data-frame>>
-* <<{upid}-estimate-memory-usage>>
+* <<{upid}-explain-data-frame-analytics>>
* <<{upid}-get-trained-models>>
* <<{upid}-put-filter>>
* <<{upid}-get-filters>>
@@ -353,7 +353,7 @@ include::ml/delete-data-frame-analytics.asciidoc[]
include::ml/start-data-frame-analytics.asciidoc[]
include::ml/stop-data-frame-analytics.asciidoc[]
include::ml/evaluate-data-frame.asciidoc[]
-include::ml/estimate-memory-usage.asciidoc[]
+include::ml/explain-data-frame-analytics.asciidoc[]
include::ml/get-trained-models.asciidoc[]
include::ml/put-filter.asciidoc[]
include::ml/get-filters.asciidoc[]
diff --git a/docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc b/docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc
deleted file mode 100644
index 64db472dfd1..00000000000
--- a/docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc
+++ /dev/null
@@ -1,80 +0,0 @@
-[role="xpack"]
-[testenv="platinum"]
-[[estimate-memory-usage-dfanalytics]]
-=== Estimate memory usage API
-
-[subs="attributes"]
-++++
-Estimate memory usage for {dfanalytics-jobs}
-++++
-
-Estimates memory usage for the given {dataframe-analytics-config}.
-
-experimental[]
-
-[[ml-estimate-memory-usage-dfanalytics-request]]
-==== {api-request-title}
-
-`POST _ml/data_frame/analytics/_estimate_memory_usage`
-
-[[ml-estimate-memory-usage-dfanalytics-prereq]]
-==== {api-prereq-title}
-
-* You must have `monitor_ml` privilege to use this API. For more
-information, see <> and <>.
-
-[[ml-estimate-memory-usage-dfanalytics-desc]]
-==== {api-description-title}
-
-This API estimates memory usage for the given {dataframe-analytics-config} before the {dfanalytics-job} is even created.
-
-Serves as an advice on how to set `model_memory_limit` when creating {dfanalytics-job}.
-
-[[ml-estimate-memory-usage-dfanalytics-request-body]]
-==== {api-request-body-title}
-
-`data_frame_analytics_config`::
- (Required, object) Intended configuration of {dfanalytics-job}. For more information, see
- <>.
- Note that `id` and `dest` don't need to be provided in the context of this API.
-
-[[ml-estimate-memory-usage-dfanalytics-results]]
-==== {api-response-body-title}
-
-`expected_memory_without_disk`::
- (string) Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory
- (i.e. without overflowing to disk).
-
-`expected_memory_with_disk`::
- (string) Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}.
- `expected_memory_with_disk` is usually smaller than `expected_memory_without_disk` as using disk allows to
- limit the main memory needed to perform {dfanalytics}.
-
-[[ml-estimate-memory-usage-dfanalytics-example]]
-==== {api-examples-title}
-
-[source,console]
---------------------------------------------------
-POST _ml/data_frame/analytics/_estimate_memory_usage
-{
- "data_frame_analytics_config": {
- "source": {
- "index": "logdata"
- },
- "analysis": {
- "outlier_detection": {}
- }
- }
-}
---------------------------------------------------
-// TEST[skip:TBD]
-
-The API returns the following results:
-
-[source,console-result]
-----
-{
- "expected_memory_without_disk": "128MB",
- "expected_memory_with_disk": "32MB"
-}
-----
diff --git a/docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc b/docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc
new file mode 100644
index 00000000000..c9ee565e9b2
--- /dev/null
+++ b/docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc
@@ -0,0 +1,159 @@
+[role="xpack"]
+[testenv="platinum"]
+[[explain-dfanalytics]]
+=== Explain {dfanalytics} API
+
+[subs="attributes"]
+++++
+Explain {dfanalytics} API
+++++
+
+Explains a {dataframe-analytics-config}.
+
+experimental[]
+
+[[ml-explain-dfanalytics-request]]
+==== {api-request-title}
+
+`GET _ml/data_frame/analytics/_explain` +
+
+`POST _ml/data_frame/analytics/_explain` +
+
+`GET _ml/data_frame/analytics//_explain` +
+
+`POST _ml/data_frame/analytics//_explain`
+
+[[ml-explain-dfanalytics-prereq]]
+==== {api-prereq-title}
+
+* You must have `monitor_ml` privilege to use this API. For more
+information, see <> and <>.
+
+[[ml-explain-dfanalytics-desc]]
+==== {api-description-title}
+
+This API provides explanations for a {dataframe-analytics-config} that either exists already or one that has not been created yet.
+The following explanations are provided:
+
+* which fields are included or not in the analysis and why
+* how much memory is estimated to be required. The estimate can be used when deciding the appropriate value for `model_memory_limit` setting later on.
+about either an existing {dfanalytics-job} or one that has not been created yet.
+
+[[ml-explain-dfanalytics-path-params]]
+==== {api-path-parms-title}
+
+``::
+ (Optional, string) A numerical character string that uniquely identifies the existing
+ {dfanalytics-job} to explain. This identifier can contain lowercase alphanumeric
+ characters (a-z and 0-9), hyphens, and underscores. It must start and end with
+ alphanumeric characters.
+
+[[ml-explain-dfanalytics-request-body]]
+==== {api-request-body-title}
+
+`data_frame_analytics_config`::
+ (Optional, object) Intended configuration of {dfanalytics-job}. For more information, see
+ <>.
+ Note that `id` and `dest` don't need to be provided in the context of this API.
+
+[[ml-explain-dfanalytics-results]]
+==== {api-response-body-title}
+
+The API returns a response that contains the following:
+
+`field_selection`::
+ (array) An array of objects that explain selection for each field, sorted by the field names.
+ Each object in the array has the following properties:
+
+ `name`:::
+ (string) The field name.
+
+ `mapping_types`:::
+ (string) The mapping types of the field.
+
+ `is_included`:::
+ (boolean) Whether the field is selected to be included in the analysis.
+
+ `is_required`:::
+ (boolean) Whether the field is required.
+
+ `feature_type`:::
+ (string) The feature type of this field for the analysis. May be `categorical` or `numerical`.
+
+ `reason`:::
+ (string) The reason a field is not selected to be included in the analysis.
+
+`memory_estimation`::
+ (object) An object containing the memory estimates. The object has the following properties:
+
+ `expected_memory_without_disk`:::
+ (string) Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory
+ (i.e. without overflowing to disk).
+
+ `expected_memory_with_disk`:::
+ (string) Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}.
+ `expected_memory_with_disk` is usually smaller than `expected_memory_without_disk` as using disk allows to
+ limit the main memory needed to perform {dfanalytics}.
+
+[[ml-explain-dfanalytics-example]]
+==== {api-examples-title}
+
+[source,console]
+--------------------------------------------------
+POST _ml/data_frame/analytics/_explain
+{
+ "data_frame_analytics_config": {
+ "source": {
+ "index": "houses_sold_last_10_yrs"
+ },
+ "analysis": {
+ "regression": {
+ "dependent_variable": "price"
+ }
+ }
+ }
+}
+--------------------------------------------------
+// TEST[skip:TBD]
+
+The API returns the following results:
+
+[source,console-result]
+----
+{
+ "field_selection": [
+ {
+ "field": "number_of_bedrooms",
+ "mappings_types": ["integer"],
+ "is_included": true,
+ "is_required": false,
+ "feature_type": "numerical"
+ },
+ {
+ "field": "postcode",
+ "mappings_types": ["text"],
+ "is_included": false,
+ "is_required": false,
+ "reason": "[postcode.keyword] is preferred because it is aggregatable"
+ },
+ {
+ "field": "postcode.keyword",
+ "mappings_types": ["keyword"],
+ "is_included": true,
+ "is_required": false,
+ "feature_type": "categorical"
+ },
+ {
+ "field": "price",
+ "mappings_types": ["float"],
+ "is_included": true,
+ "is_required": true,
+ "feature_type": "numerical"
+ }
+ ],
+ "memory_estimation": {
+ "expected_memory_without_disk": "128MB",
+ "expected_memory_with_disk": "32MB"
+ }
+}
+----
diff --git a/docs/reference/ml/df-analytics/apis/index.asciidoc b/docs/reference/ml/df-analytics/apis/index.asciidoc
index 30e909f3ffa..6bf63e7ddb8 100644
--- a/docs/reference/ml/df-analytics/apis/index.asciidoc
+++ b/docs/reference/ml/df-analytics/apis/index.asciidoc
@@ -5,16 +5,16 @@
You can use the following APIs to perform {ml} {dfanalytics} activities.
-* <>
+* <>
* <>
* <>
* <>
* <>
* <>
* <>
-* <>
+* <>
-See also <>.
+See also <>.
//CREATE
include::put-dfanalytics.asciidoc[]
@@ -23,7 +23,7 @@ include::delete-dfanalytics.asciidoc[]
//EVALUATE
include::evaluate-dfanalytics.asciidoc[]
//ESTIMATE_MEMORY_USAGE
-include::estimate-memory-usage-dfanalytics.asciidoc[]
+include::explain-dfanalytics.asciidoc[]
//GET
include::get-dfanalytics.asciidoc[]
include::get-dfanalytics-stats.asciidoc[]
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java
index 8caac9d6e20..d99dd1ec233 100644
--- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java
@@ -79,6 +79,7 @@ import org.elasticsearch.xpack.core.ml.MachineLearningFeatureSetUsage;
import org.elasticsearch.xpack.core.ml.MlMetadata;
import org.elasticsearch.xpack.core.ml.MlTasks;
import org.elasticsearch.xpack.core.ml.action.CloseJobAction;
+import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction;
import org.elasticsearch.xpack.core.ml.action.DeleteCalendarAction;
import org.elasticsearch.xpack.core.ml.action.DeleteCalendarEventAction;
import org.elasticsearch.xpack.core.ml.action.DeleteDataFrameAnalyticsAction;
@@ -89,7 +90,6 @@ import org.elasticsearch.xpack.core.ml.action.DeleteForecastAction;
import org.elasticsearch.xpack.core.ml.action.DeleteJobAction;
import org.elasticsearch.xpack.core.ml.action.DeleteModelSnapshotAction;
import org.elasticsearch.xpack.core.ml.action.DeleteTrainedModelAction;
-import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction;
import org.elasticsearch.xpack.core.ml.action.EvaluateDataFrameAction;
import org.elasticsearch.xpack.core.ml.action.FinalizeJobExecutionAction;
import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction;
@@ -158,6 +158,10 @@ import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.P
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.Recall;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.ScoreByThresholdResult;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.SoftClassificationMetric;
+import org.elasticsearch.xpack.core.ml.inference.preprocessing.FrequencyEncoding;
+import org.elasticsearch.xpack.core.ml.inference.preprocessing.OneHotEncoding;
+import org.elasticsearch.xpack.core.ml.inference.preprocessing.PreProcessor;
+import org.elasticsearch.xpack.core.ml.inference.preprocessing.TargetMeanEncoding;
import org.elasticsearch.xpack.core.ml.inference.results.ClassificationInferenceResults;
import org.elasticsearch.xpack.core.ml.inference.results.InferenceResults;
import org.elasticsearch.xpack.core.ml.inference.results.RegressionInferenceResults;
@@ -171,10 +175,6 @@ import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ensemble.OutputAgg
import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ensemble.WeightedMode;
import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ensemble.WeightedSum;
import org.elasticsearch.xpack.core.ml.inference.trainedmodel.tree.Tree;
-import org.elasticsearch.xpack.core.ml.inference.preprocessing.FrequencyEncoding;
-import org.elasticsearch.xpack.core.ml.inference.preprocessing.OneHotEncoding;
-import org.elasticsearch.xpack.core.ml.inference.preprocessing.PreProcessor;
-import org.elasticsearch.xpack.core.ml.inference.preprocessing.TargetMeanEncoding;
import org.elasticsearch.xpack.core.ml.job.config.JobTaskState;
import org.elasticsearch.xpack.core.monitoring.MonitoringFeatureSetUsage;
import org.elasticsearch.xpack.core.rollup.RollupFeatureSetUsage;
@@ -381,7 +381,7 @@ public class XPackClientPlugin extends Plugin implements ActionPlugin, NetworkPl
StartDataFrameAnalyticsAction.INSTANCE,
StopDataFrameAnalyticsAction.INSTANCE,
EvaluateDataFrameAction.INSTANCE,
- EstimateMemoryUsageAction.INSTANCE,
+ ExplainDataFrameAnalyticsAction.INSTANCE,
InternalInferModelAction.INSTANCE,
GetTrainedModelsAction.INSTANCE,
DeleteTrainedModelAction.INSTANCE,
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageAction.java
deleted file mode 100644
index 529db21cced..00000000000
--- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageAction.java
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License;
- * you may not use this file except in compliance with the Elastic License.
- */
-package org.elasticsearch.xpack.core.ml.action;
-
-import org.elasticsearch.action.ActionResponse;
-import org.elasticsearch.action.ActionType;
-import org.elasticsearch.common.Nullable;
-import org.elasticsearch.common.ParseField;
-import org.elasticsearch.common.io.stream.StreamInput;
-import org.elasticsearch.common.io.stream.StreamOutput;
-import org.elasticsearch.common.unit.ByteSizeValue;
-import org.elasticsearch.common.xcontent.ConstructingObjectParser;
-import org.elasticsearch.common.xcontent.ObjectParser;
-import org.elasticsearch.common.xcontent.ToXContentObject;
-import org.elasticsearch.common.xcontent.XContentBuilder;
-
-import java.io.IOException;
-import java.util.Objects;
-
-import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg;
-
-public class EstimateMemoryUsageAction extends ActionType {
-
- public static final EstimateMemoryUsageAction INSTANCE = new EstimateMemoryUsageAction();
- public static final String NAME = "cluster:admin/xpack/ml/data_frame/analytics/estimate_memory_usage";
-
- private EstimateMemoryUsageAction() {
- super(NAME, EstimateMemoryUsageAction.Response::new);
- }
-
- public static class Response extends ActionResponse implements ToXContentObject {
-
- public static final ParseField TYPE = new ParseField("memory_usage_estimation_result");
-
- public static final ParseField EXPECTED_MEMORY_WITHOUT_DISK = new ParseField("expected_memory_without_disk");
- public static final ParseField EXPECTED_MEMORY_WITH_DISK = new ParseField("expected_memory_with_disk");
-
- static final ConstructingObjectParser PARSER =
- new ConstructingObjectParser<>(
- TYPE.getPreferredName(),
- args -> new Response((ByteSizeValue) args[0], (ByteSizeValue) args[1]));
-
- static {
- PARSER.declareField(
- optionalConstructorArg(),
- (p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName()),
- EXPECTED_MEMORY_WITHOUT_DISK,
- ObjectParser.ValueType.VALUE);
- PARSER.declareField(
- optionalConstructorArg(),
- (p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITH_DISK.getPreferredName()),
- EXPECTED_MEMORY_WITH_DISK,
- ObjectParser.ValueType.VALUE);
- }
-
- private final ByteSizeValue expectedMemoryWithoutDisk;
- private final ByteSizeValue expectedMemoryWithDisk;
-
- public Response(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) {
- this.expectedMemoryWithoutDisk = expectedMemoryWithoutDisk;
- this.expectedMemoryWithDisk = expectedMemoryWithDisk;
- }
-
- public Response(StreamInput in) throws IOException {
- super(in);
- this.expectedMemoryWithoutDisk = in.readOptionalWriteable(ByteSizeValue::new);
- this.expectedMemoryWithDisk = in.readOptionalWriteable(ByteSizeValue::new);
- }
-
- public ByteSizeValue getExpectedMemoryWithoutDisk() {
- return expectedMemoryWithoutDisk;
- }
-
- public ByteSizeValue getExpectedMemoryWithDisk() {
- return expectedMemoryWithDisk;
- }
-
- @Override
- public void writeTo(StreamOutput out) throws IOException {
- out.writeOptionalWriteable(expectedMemoryWithoutDisk);
- out.writeOptionalWriteable(expectedMemoryWithDisk);
- }
-
- @Override
- public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
- builder.startObject();
- if (expectedMemoryWithoutDisk != null) {
- builder.field(EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName(), expectedMemoryWithoutDisk.getStringRep());
- }
- if (expectedMemoryWithDisk != null) {
- builder.field(EXPECTED_MEMORY_WITH_DISK.getPreferredName(), expectedMemoryWithDisk.getStringRep());
- }
- builder.endObject();
- return builder;
- }
-
- @Override
- public boolean equals(Object other) {
- if (this == other) {
- return true;
- }
- if (other == null || getClass() != other.getClass()) {
- return false;
- }
-
- Response that = (Response) other;
- return Objects.equals(expectedMemoryWithoutDisk, that.expectedMemoryWithoutDisk)
- && Objects.equals(expectedMemoryWithDisk, that.expectedMemoryWithDisk);
- }
-
- @Override
- public int hashCode() {
- return Objects.hash(expectedMemoryWithoutDisk, expectedMemoryWithDisk);
- }
- }
-}
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsAction.java
new file mode 100644
index 00000000000..46888ea27a7
--- /dev/null
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsAction.java
@@ -0,0 +1,101 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.core.ml.action;
+
+import org.elasticsearch.action.ActionResponse;
+import org.elasticsearch.action.ActionType;
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.xcontent.ConstructingObjectParser;
+import org.elasticsearch.common.xcontent.ToXContentObject;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection;
+import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimation;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Objects;
+
+public class ExplainDataFrameAnalyticsAction extends ActionType {
+
+ public static final ExplainDataFrameAnalyticsAction INSTANCE = new ExplainDataFrameAnalyticsAction();
+ public static final String NAME = "cluster:admin/xpack/ml/data_frame/analytics/explain";
+
+ private ExplainDataFrameAnalyticsAction() {
+ super(NAME, ExplainDataFrameAnalyticsAction.Response::new);
+ }
+
+ public static class Response extends ActionResponse implements ToXContentObject {
+
+ public static final ParseField TYPE = new ParseField("explain_data_frame_analytics_response");
+
+ public static final ParseField FIELD_SELECTION = new ParseField("field_selection");
+ public static final ParseField MEMORY_ESTIMATION = new ParseField("memory_estimation");
+
+ static final ConstructingObjectParser PARSER =
+ new ConstructingObjectParser<>(
+ TYPE.getPreferredName(),
+ args -> new Response((List) args[0], (MemoryEstimation) args[1]));
+
+ static {
+ PARSER.declareObjectArray(ConstructingObjectParser.constructorArg(), FieldSelection.PARSER, FIELD_SELECTION);
+ PARSER.declareObject(ConstructingObjectParser.constructorArg(), MemoryEstimation.PARSER, MEMORY_ESTIMATION);
+ }
+
+ private final List fieldSelection;
+ private final MemoryEstimation memoryEstimation;
+
+ public Response(List fieldSelection, MemoryEstimation memoryEstimation) {
+ this.fieldSelection = Objects.requireNonNull(fieldSelection);
+ this.memoryEstimation = Objects.requireNonNull(memoryEstimation);
+ }
+
+ public Response(StreamInput in) throws IOException {
+ super(in);
+ this.fieldSelection = in.readList(FieldSelection::new);
+ this.memoryEstimation = new MemoryEstimation(in);
+ }
+
+ @Override
+ public void writeTo(StreamOutput out) throws IOException {
+ out.writeList(fieldSelection);
+ memoryEstimation.writeTo(out);
+ }
+
+ @Override
+ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.startObject();
+ builder.field(FIELD_SELECTION.getPreferredName(), fieldSelection);
+ builder.field(MEMORY_ESTIMATION.getPreferredName(), memoryEstimation);
+ builder.endObject();
+ return builder;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (this == other) return true;
+ if (other == null || getClass() != other.getClass()) return false;
+
+ Response that = (Response) other;
+ return Objects.equals(fieldSelection, that.fieldSelection)
+ && Objects.equals(memoryEstimation, that.memoryEstimation);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(fieldSelection, memoryEstimation);
+ }
+
+ public MemoryEstimation getMemoryEstimation() {
+ return memoryEstimation;
+ }
+
+ public List getFieldSelection() {
+ return fieldSelection;
+ }
+ }
+}
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/PutDataFrameAnalyticsAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/PutDataFrameAnalyticsAction.java
index 6860162d793..5bce41d8a4a 100644
--- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/PutDataFrameAnalyticsAction.java
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/PutDataFrameAnalyticsAction.java
@@ -51,13 +51,14 @@ public class PutDataFrameAnalyticsAction extends ActionType PARSER = new ConstructingObjectParser<>("field_selection",
+ a -> new FieldSelection((String) a[0], new HashSet<>((List) a[1]), (boolean) a[2], (boolean) a[3], (FeatureType) a[4],
+ (String) a[5]));
+
+ static {
+ PARSER.declareString(ConstructingObjectParser.constructorArg(), NAME);
+ PARSER.declareStringArray(ConstructingObjectParser.constructorArg(), MAPPING_TYPES);
+ PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_INCLUDED);
+ PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_REQUIRED);
+ PARSER.declareField(ConstructingObjectParser.optionalConstructorArg(), p -> {
+ if (p.currentToken() == XContentParser.Token.VALUE_STRING) {
+ return FeatureType.fromString(p.text());
+ }
+ throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]");
+ }, FEATURE_TYPE, ObjectParser.ValueType.STRING);
+ PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), REASON);
+ }
+
+ private final String name;
+ private final Set mappingTypes;
+ private final boolean isIncluded;
+ private final boolean isRequired;
+ private final FeatureType featureType;
+ private final String reason;
+
+ public static FieldSelection included(String name, Set mappingTypes, boolean isRequired, FeatureType featureType) {
+ return new FieldSelection(name, mappingTypes, true, isRequired, featureType, null);
+ }
+
+ public static FieldSelection excluded(String name, Set mappingTypes, String reason) {
+ return new FieldSelection(name, mappingTypes, false, false, null, reason);
+ }
+
+ FieldSelection(String name, Set mappingTypes, boolean isIncluded, boolean isRequired, @Nullable FeatureType featureType,
+ @Nullable String reason) {
+ this.name = Objects.requireNonNull(name);
+ this.mappingTypes = Collections.unmodifiableSet(mappingTypes);
+ this.isIncluded = isIncluded;
+ this.isRequired = isRequired;
+ this.featureType = featureType;
+ this.reason = reason;
+ }
+
+ public FieldSelection(StreamInput in) throws IOException {
+ this.name = in.readString();
+ this.mappingTypes = Collections.unmodifiableSet(in.readSet(StreamInput::readString));
+ this.isIncluded = in.readBoolean();
+ this.isRequired = in.readBoolean();
+ boolean hasFeatureType = in.readBoolean();
+
+ if (hasFeatureType) {
+ this.featureType = in.readEnum(FeatureType.class);
+ } else {
+ this.featureType = null;
+ }
+
+ this.reason = in.readOptionalString();
+ }
+
+ @Override
+ public void writeTo(StreamOutput out) throws IOException {
+ out.writeString(name);
+ out.writeCollection(mappingTypes, StreamOutput::writeString);
+ out.writeBoolean(isIncluded);
+ out.writeBoolean(isRequired);
+
+ if (featureType == null) {
+ out.writeBoolean(false);
+ } else {
+ out.writeBoolean(true);
+ out.writeEnum(featureType);
+ }
+ out.writeOptionalString(reason);
+ }
+
+ @Override
+ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.startObject();
+ builder.field(NAME.getPreferredName(), name);
+ builder.field(MAPPING_TYPES.getPreferredName(), mappingTypes);
+ builder.field(IS_INCLUDED.getPreferredName(), isIncluded);
+ builder.field(IS_REQUIRED.getPreferredName(), isRequired);
+ if (featureType != null) {
+ builder.field(FEATURE_TYPE.getPreferredName(), featureType);
+ }
+ if (reason != null) {
+ builder.field(REASON.getPreferredName(), reason);
+ }
+ builder.endObject();
+ return builder;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ FieldSelection that = (FieldSelection) o;
+ return Objects.equals(name, that.name)
+ && Objects.equals(mappingTypes, that.mappingTypes)
+ && isIncluded == that.isIncluded
+ && isRequired == that.isRequired
+ && Objects.equals(featureType, that.featureType)
+ && Objects.equals(reason, that.reason);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(name, mappingTypes, isIncluded, isRequired, featureType, reason);
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public Set getMappingTypes() {
+ return mappingTypes;
+ }
+
+ public boolean isIncluded() {
+ return isIncluded;
+ }
+
+ public boolean isRequired() {
+ return isRequired;
+ }
+
+ @Nullable
+ public FeatureType getFeatureType() {
+ return featureType;
+ }
+
+ @Nullable
+ public String getReason() {
+ return reason;
+ }
+}
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimation.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimation.java
new file mode 100644
index 00000000000..7972c6a9ee0
--- /dev/null
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimation.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.core.ml.dataframe.explain;
+
+import org.elasticsearch.common.Nullable;
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.xcontent.ConstructingObjectParser;
+import org.elasticsearch.common.xcontent.ObjectParser;
+import org.elasticsearch.common.xcontent.ToXContentObject;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.util.Objects;
+
+import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg;
+
+public class MemoryEstimation implements ToXContentObject, Writeable {
+
+ public static final ParseField EXPECTED_MEMORY_WITHOUT_DISK = new ParseField("expected_memory_without_disk");
+ public static final ParseField EXPECTED_MEMORY_WITH_DISK = new ParseField("expected_memory_with_disk");
+
+ public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>("memory_estimation",
+ a -> new MemoryEstimation((ByteSizeValue) a[0], (ByteSizeValue) a[1]));
+
+ static {
+ PARSER.declareField(
+ optionalConstructorArg(),
+ (p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName()),
+ EXPECTED_MEMORY_WITHOUT_DISK,
+ ObjectParser.ValueType.VALUE);
+ PARSER.declareField(
+ optionalConstructorArg(),
+ (p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITH_DISK.getPreferredName()),
+ EXPECTED_MEMORY_WITH_DISK,
+ ObjectParser.ValueType.VALUE);
+ }
+
+ private final ByteSizeValue expectedMemoryWithoutDisk;
+ private final ByteSizeValue expectedMemoryWithDisk;
+
+ public MemoryEstimation(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) {
+ this.expectedMemoryWithoutDisk = expectedMemoryWithoutDisk;
+ this.expectedMemoryWithDisk = expectedMemoryWithDisk;
+ }
+
+ public MemoryEstimation(StreamInput in) throws IOException {
+ this.expectedMemoryWithoutDisk = in.readOptionalWriteable(ByteSizeValue::new);
+ this.expectedMemoryWithDisk = in.readOptionalWriteable(ByteSizeValue::new);
+ }
+
+ public ByteSizeValue getExpectedMemoryWithoutDisk() {
+ return expectedMemoryWithoutDisk;
+ }
+
+ public ByteSizeValue getExpectedMemoryWithDisk() {
+ return expectedMemoryWithDisk;
+ }
+
+ @Override
+ public void writeTo(StreamOutput out) throws IOException {
+ out.writeOptionalWriteable(expectedMemoryWithoutDisk);
+ out.writeOptionalWriteable(expectedMemoryWithDisk);
+ }
+
+ @Override
+ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.startObject();
+ if (expectedMemoryWithoutDisk != null) {
+ builder.field(EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName(), expectedMemoryWithoutDisk.getStringRep());
+ }
+ if (expectedMemoryWithDisk != null) {
+ builder.field(EXPECTED_MEMORY_WITH_DISK.getPreferredName(), expectedMemoryWithDisk.getStringRep());
+ }
+ builder.endObject();
+ return builder;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (this == other) {
+ return true;
+ }
+ if (other == null || getClass() != other.getClass()) {
+ return false;
+ }
+
+ MemoryEstimation that = (MemoryEstimation) other;
+ return Objects.equals(expectedMemoryWithoutDisk, that.expectedMemoryWithoutDisk)
+ && Objects.equals(expectedMemoryWithDisk, that.expectedMemoryWithDisk);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(expectedMemoryWithoutDisk, expectedMemoryWithDisk);
+ }
+}
diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageActionResponseTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageActionResponseTests.java
deleted file mode 100644
index 1bc8d8970ea..00000000000
--- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageActionResponseTests.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License;
- * you may not use this file except in compliance with the Elastic License.
- */
-package org.elasticsearch.xpack.core.ml.action;
-
-import org.elasticsearch.common.io.stream.Writeable;
-import org.elasticsearch.common.unit.ByteSizeUnit;
-import org.elasticsearch.common.unit.ByteSizeValue;
-import org.elasticsearch.common.xcontent.XContentParser;
-import org.elasticsearch.test.AbstractSerializingTestCase;
-import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction.Response;
-
-import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.nullValue;
-
-public class EstimateMemoryUsageActionResponseTests extends AbstractSerializingTestCase {
-
- @Override
- protected Response createTestInstance() {
- return new Response(
- randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null,
- randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null);
- }
-
- @Override
- protected Writeable.Reader instanceReader() {
- return Response::new;
- }
-
- @Override
- protected Response doParseInstance(XContentParser parser) {
- return Response.PARSER.apply(parser, null);
- }
-
- public void testConstructor_NullValues() {
- Response response = new Response(null, null);
- assertThat(response.getExpectedMemoryWithoutDisk(), nullValue());
- assertThat(response.getExpectedMemoryWithDisk(), nullValue());
- }
-
- public void testConstructor_SmallValues() {
- Response response = new Response(new ByteSizeValue(120, ByteSizeUnit.KB), new ByteSizeValue(30, ByteSizeUnit.KB));
- assertThat(response.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(120, ByteSizeUnit.KB)));
- assertThat(response.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(30, ByteSizeUnit.KB)));
- }
-
- public void testConstructor() {
- Response response = new Response(new ByteSizeValue(20, ByteSizeUnit.MB), new ByteSizeValue(10, ByteSizeUnit.MB));
- assertThat(response.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(20, ByteSizeUnit.MB)));
- assertThat(response.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(10, ByteSizeUnit.MB)));
- }
-}
diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsActionResponseTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsActionResponseTests.java
new file mode 100644
index 00000000000..ea1aca3916c
--- /dev/null
+++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsActionResponseTests.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.core.ml.action;
+
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.test.AbstractSerializingTestCase;
+import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction.Response;
+import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection;
+import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelectionTests;
+import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimation;
+import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimationTests;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.IntStream;
+
+public class ExplainDataFrameAnalyticsActionResponseTests extends AbstractSerializingTestCase {
+
+ @Override
+ protected Response createTestInstance() {
+ int fieldSelectionCount = randomIntBetween(1, 5);
+ List fieldSelection = new ArrayList<>(fieldSelectionCount);
+ IntStream.of(fieldSelectionCount).forEach(i -> fieldSelection.add(FieldSelectionTests.createRandom()));
+ MemoryEstimation memoryEstimation = MemoryEstimationTests.createRandom();
+
+ return new Response(fieldSelection, memoryEstimation);
+ }
+
+ @Override
+ protected Writeable.Reader instanceReader() {
+ return Response::new;
+ }
+
+ @Override
+ protected Response doParseInstance(XContentParser parser) {
+ return Response.PARSER.apply(parser, null);
+ }
+}
diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java
index 3266f488daf..d8c52c83902 100644
--- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java
+++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java
@@ -279,32 +279,32 @@ public class DataFrameAnalyticsConfigTests extends AbstractSerializingTestCase {
+
+ public static FieldSelection createRandom() {
+ Set mappingTypes = randomSubsetOf(randomIntBetween(1, 3), "int", "float", "double", "text", "keyword", "ip")
+ .stream().collect(Collectors.toSet());
+ FieldSelection.FeatureType featureType = randomBoolean() ? null : randomFrom(FieldSelection.FeatureType.values());
+ String reason = randomBoolean() ? null : randomAlphaOfLength(20);
+ return new FieldSelection(randomAlphaOfLength(10),
+ mappingTypes,
+ randomBoolean(),
+ randomBoolean(),
+ featureType,
+ reason);
+ }
+
+ @Override
+ protected FieldSelection createTestInstance() {
+ return createRandom();
+ }
+
+ @Override
+ protected FieldSelection doParseInstance(XContentParser parser) throws IOException {
+ return FieldSelection.PARSER.apply(parser, null);
+ }
+
+ @Override
+ protected Writeable.Reader instanceReader() {
+ return FieldSelection::new;
+ }
+}
diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimationTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimationTests.java
new file mode 100644
index 00000000000..dc9e20bd86a
--- /dev/null
+++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimationTests.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.core.ml.dataframe.explain;
+
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.common.unit.ByteSizeUnit;
+import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.test.AbstractSerializingTestCase;
+
+import java.io.IOException;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.nullValue;
+
+public class MemoryEstimationTests extends AbstractSerializingTestCase {
+
+ public static MemoryEstimation createRandom() {
+ return new MemoryEstimation(
+ randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null,
+ randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null);
+ }
+
+ @Override
+ protected MemoryEstimation createTestInstance() {
+ return createRandom();
+ }
+
+ @Override
+ protected Writeable.Reader instanceReader() {
+ return MemoryEstimation::new;
+ }
+
+ @Override
+ protected MemoryEstimation doParseInstance(XContentParser parser) throws IOException {
+ return MemoryEstimation.PARSER.apply(parser, null);
+ }
+
+ public void testConstructor_NullValues() {
+ MemoryEstimation memoryEstimation = new MemoryEstimation(null, null);
+ assertThat(memoryEstimation.getExpectedMemoryWithoutDisk(), nullValue());
+ assertThat(memoryEstimation.getExpectedMemoryWithDisk(), nullValue());
+ }
+
+ public void testConstructor_SmallValues() {
+ MemoryEstimation memoryEstimation = new MemoryEstimation(
+ new ByteSizeValue(120, ByteSizeUnit.KB), new ByteSizeValue(30, ByteSizeUnit.KB));
+ assertThat(memoryEstimation.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(120, ByteSizeUnit.KB)));
+ assertThat(memoryEstimation.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(30, ByteSizeUnit.KB)));
+ }
+
+ public void testConstructor() {
+ MemoryEstimation memoryEstimation = new MemoryEstimation(
+ new ByteSizeValue(20, ByteSizeUnit.MB), new ByteSizeValue(10, ByteSizeUnit.MB));
+ assertThat(memoryEstimation.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(20, ByteSizeUnit.MB)));
+ assertThat(memoryEstimation.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(10, ByteSizeUnit.MB)));
+ }
+}
diff --git a/x-pack/plugin/ml/qa/ml-with-security/build.gradle b/x-pack/plugin/ml/qa/ml-with-security/build.gradle
index 961dc944ea7..38beb1d1908 100644
--- a/x-pack/plugin/ml/qa/ml-with-security/build.gradle
+++ b/x-pack/plugin/ml/qa/ml-with-security/build.gradle
@@ -92,7 +92,6 @@ integTest.runner {
'ml/data_frame_analytics_crud/Test put classification given num_top_classes is greater than 1k',
'ml/data_frame_analytics_crud/Test put classification given training_percent is less than one',
'ml/data_frame_analytics_crud/Test put classification given training_percent is greater than hundred',
- 'ml/data_frame_analytics_memory_usage_estimation/Test memory usage estimation for empty data frame',
'ml/evaluate_data_frame/Test given missing index',
'ml/evaluate_data_frame/Test given index does not exist',
'ml/evaluate_data_frame/Test given missing evaluation',
@@ -113,6 +112,10 @@ integTest.runner {
'ml/evaluate_data_frame/Test regression given evaluation with empty metrics',
'ml/evaluate_data_frame/Test regression given missing actual_field',
'ml/evaluate_data_frame/Test regression given missing predicted_field',
+ 'ml/explain_data_frame_analytics/Test neither job id nor body',
+ 'ml/explain_data_frame_analytics/Test both job id and body',
+ 'ml/explain_data_frame_analytics/Test missing job',
+ 'ml/explain_data_frame_analytics/Test empty data frame given body',
'ml/delete_job_force/Test cannot force delete a non-existent job',
'ml/delete_model_snapshot/Test delete snapshot missing snapshotId',
'ml/delete_model_snapshot/Test delete snapshot missing job_id',
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java
index 964bc719cbd..0293a367473 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java
@@ -65,6 +65,7 @@ import org.elasticsearch.xpack.core.XPackSettings;
import org.elasticsearch.xpack.core.ml.MachineLearningField;
import org.elasticsearch.xpack.core.ml.MlMetaIndex;
import org.elasticsearch.xpack.core.ml.action.CloseJobAction;
+import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction;
import org.elasticsearch.xpack.core.ml.action.DeleteCalendarAction;
import org.elasticsearch.xpack.core.ml.action.DeleteCalendarEventAction;
import org.elasticsearch.xpack.core.ml.action.DeleteDataFrameAnalyticsAction;
@@ -75,7 +76,6 @@ import org.elasticsearch.xpack.core.ml.action.DeleteForecastAction;
import org.elasticsearch.xpack.core.ml.action.DeleteJobAction;
import org.elasticsearch.xpack.core.ml.action.DeleteModelSnapshotAction;
import org.elasticsearch.xpack.core.ml.action.DeleteTrainedModelAction;
-import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction;
import org.elasticsearch.xpack.core.ml.action.EvaluateDataFrameAction;
import org.elasticsearch.xpack.core.ml.action.FinalizeJobExecutionAction;
import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction;
@@ -98,8 +98,8 @@ import org.elasticsearch.xpack.core.ml.action.GetOverallBucketsAction;
import org.elasticsearch.xpack.core.ml.action.GetRecordsAction;
import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction;
import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsStatsAction;
-import org.elasticsearch.xpack.core.ml.action.IsolateDatafeedAction;
import org.elasticsearch.xpack.core.ml.action.InternalInferModelAction;
+import org.elasticsearch.xpack.core.ml.action.IsolateDatafeedAction;
import org.elasticsearch.xpack.core.ml.action.KillProcessAction;
import org.elasticsearch.xpack.core.ml.action.MlInfoAction;
import org.elasticsearch.xpack.core.ml.action.OpenJobAction;
@@ -136,6 +136,7 @@ import org.elasticsearch.xpack.core.ml.job.persistence.ElasticsearchMappings;
import org.elasticsearch.xpack.core.ml.notifications.AuditorField;
import org.elasticsearch.xpack.core.template.TemplateUtils;
import org.elasticsearch.xpack.ml.action.TransportCloseJobAction;
+import org.elasticsearch.xpack.ml.action.TransportExplainDataFrameAnalyticsAction;
import org.elasticsearch.xpack.ml.action.TransportDeleteCalendarAction;
import org.elasticsearch.xpack.ml.action.TransportDeleteCalendarEventAction;
import org.elasticsearch.xpack.ml.action.TransportDeleteDataFrameAnalyticsAction;
@@ -146,7 +147,6 @@ import org.elasticsearch.xpack.ml.action.TransportDeleteForecastAction;
import org.elasticsearch.xpack.ml.action.TransportDeleteJobAction;
import org.elasticsearch.xpack.ml.action.TransportDeleteModelSnapshotAction;
import org.elasticsearch.xpack.ml.action.TransportDeleteTrainedModelAction;
-import org.elasticsearch.xpack.ml.action.TransportEstimateMemoryUsageAction;
import org.elasticsearch.xpack.ml.action.TransportEvaluateDataFrameAction;
import org.elasticsearch.xpack.ml.action.TransportFinalizeJobExecutionAction;
import org.elasticsearch.xpack.ml.action.TransportFindFileStructureAction;
@@ -167,9 +167,9 @@ import org.elasticsearch.xpack.ml.action.TransportGetJobsStatsAction;
import org.elasticsearch.xpack.ml.action.TransportGetModelSnapshotsAction;
import org.elasticsearch.xpack.ml.action.TransportGetOverallBucketsAction;
import org.elasticsearch.xpack.ml.action.TransportGetRecordsAction;
+import org.elasticsearch.xpack.ml.action.TransportGetTrainedModelsAction;
import org.elasticsearch.xpack.ml.action.TransportGetTrainedModelsStatsAction;
import org.elasticsearch.xpack.ml.action.TransportInternalInferModelAction;
-import org.elasticsearch.xpack.ml.action.TransportGetTrainedModelsAction;
import org.elasticsearch.xpack.ml.action.TransportIsolateDatafeedAction;
import org.elasticsearch.xpack.ml.action.TransportKillProcessAction;
import org.elasticsearch.xpack.ml.action.TransportMlInfoAction;
@@ -258,8 +258,8 @@ import org.elasticsearch.xpack.ml.rest.datafeeds.RestPutDatafeedAction;
import org.elasticsearch.xpack.ml.rest.datafeeds.RestStartDatafeedAction;
import org.elasticsearch.xpack.ml.rest.datafeeds.RestStopDatafeedAction;
import org.elasticsearch.xpack.ml.rest.datafeeds.RestUpdateDatafeedAction;
+import org.elasticsearch.xpack.ml.rest.dataframe.RestExplainDataFrameAnalyticsAction;
import org.elasticsearch.xpack.ml.rest.dataframe.RestDeleteDataFrameAnalyticsAction;
-import org.elasticsearch.xpack.ml.rest.dataframe.RestEstimateMemoryUsageAction;
import org.elasticsearch.xpack.ml.rest.dataframe.RestEvaluateDataFrameAction;
import org.elasticsearch.xpack.ml.rest.dataframe.RestGetDataFrameAnalyticsAction;
import org.elasticsearch.xpack.ml.rest.dataframe.RestGetDataFrameAnalyticsStatsAction;
@@ -759,7 +759,7 @@ public class MachineLearning extends Plugin implements ActionPlugin, AnalysisPlu
new RestStartDataFrameAnalyticsAction(restController),
new RestStopDataFrameAnalyticsAction(restController),
new RestEvaluateDataFrameAction(restController),
- new RestEstimateMemoryUsageAction(restController),
+ new RestExplainDataFrameAnalyticsAction(restController),
new RestGetTrainedModelsAction(restController),
new RestDeleteTrainedModelAction(restController),
new RestGetTrainedModelsStatsAction(restController)
@@ -829,7 +829,7 @@ public class MachineLearning extends Plugin implements ActionPlugin, AnalysisPlu
new ActionHandler<>(StartDataFrameAnalyticsAction.INSTANCE, TransportStartDataFrameAnalyticsAction.class),
new ActionHandler<>(StopDataFrameAnalyticsAction.INSTANCE, TransportStopDataFrameAnalyticsAction.class),
new ActionHandler<>(EvaluateDataFrameAction.INSTANCE, TransportEvaluateDataFrameAction.class),
- new ActionHandler<>(EstimateMemoryUsageAction.INSTANCE, TransportEstimateMemoryUsageAction.class),
+ new ActionHandler<>(ExplainDataFrameAnalyticsAction.INSTANCE, TransportExplainDataFrameAnalyticsAction.class),
new ActionHandler<>(InternalInferModelAction.INSTANCE, TransportInternalInferModelAction.class),
new ActionHandler<>(GetTrainedModelsAction.INSTANCE, TransportGetTrainedModelsAction.class),
new ActionHandler<>(DeleteTrainedModelAction.INSTANCE, TransportDeleteTrainedModelAction.class),
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateMemoryUsageAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateMemoryUsageAction.java
deleted file mode 100644
index a82db7c4f97..00000000000
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateMemoryUsageAction.java
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License;
- * you may not use this file except in compliance with the Elastic License.
- */
-package org.elasticsearch.xpack.ml.action;
-
-import org.elasticsearch.action.ActionListener;
-import org.elasticsearch.action.ActionListenerResponseHandler;
-import org.elasticsearch.action.support.ActionFilters;
-import org.elasticsearch.action.support.HandledTransportAction;
-import org.elasticsearch.client.node.NodeClient;
-import org.elasticsearch.cluster.ClusterState;
-import org.elasticsearch.cluster.node.DiscoveryNode;
-import org.elasticsearch.cluster.service.ClusterService;
-import org.elasticsearch.common.inject.Inject;
-import org.elasticsearch.tasks.Task;
-import org.elasticsearch.transport.TransportService;
-import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction;
-import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction;
-import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
-import org.elasticsearch.xpack.ml.MachineLearning;
-import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractorFactory;
-import org.elasticsearch.xpack.ml.dataframe.process.MemoryUsageEstimationProcessManager;
-
-import java.util.Objects;
-import java.util.Optional;
-
-/**
- * Estimates memory usage for the given data frame analytics spec.
- * Redirects to a different node if the current node is *not* an ML node.
- */
-public class TransportEstimateMemoryUsageAction
- extends HandledTransportAction {
-
- private final TransportService transportService;
- private final ClusterService clusterService;
- private final NodeClient client;
- private final MemoryUsageEstimationProcessManager processManager;
-
- @Inject
- public TransportEstimateMemoryUsageAction(TransportService transportService,
- ActionFilters actionFilters,
- ClusterService clusterService,
- NodeClient client,
- MemoryUsageEstimationProcessManager processManager) {
- super(EstimateMemoryUsageAction.NAME, transportService, actionFilters, PutDataFrameAnalyticsAction.Request::new);
- this.transportService = transportService;
- this.clusterService = Objects.requireNonNull(clusterService);
- this.client = Objects.requireNonNull(client);
- this.processManager = Objects.requireNonNull(processManager);
- }
-
- @Override
- protected void doExecute(Task task,
- PutDataFrameAnalyticsAction.Request request,
- ActionListener listener) {
- DiscoveryNode localNode = clusterService.localNode();
- if (MachineLearning.isMlNode(localNode)) {
- doEstimateMemoryUsage(createTaskIdForMemoryEstimation(task), request, listener);
- } else {
- redirectToMlNode(request, listener);
- }
- }
-
- /**
- * Creates unique task id for the memory estimation process. This id is useful when logging.
- */
- private static String createTaskIdForMemoryEstimation(Task task) {
- return "memory_usage_estimation_" + task.getId();
- }
-
- /**
- * Performs memory usage estimation.
- * Memory usage estimation spawns an ML C++ process which is only available on ML nodes. That's why this method can only be called on
- * the ML node.
- */
- private void doEstimateMemoryUsage(String taskId,
- PutDataFrameAnalyticsAction.Request request,
- ActionListener listener) {
- DataFrameDataExtractorFactory.createForSourceIndices(
- client,
- taskId,
- true, // We are not interested in first-time run validations here
- request.getConfig(),
- ActionListener.wrap(
- dataExtractorFactory -> {
- processManager.runJobAsync(
- taskId,
- request.getConfig(),
- dataExtractorFactory,
- ActionListener.wrap(
- result -> listener.onResponse(
- new EstimateMemoryUsageAction.Response(
- result.getExpectedMemoryWithoutDisk(), result.getExpectedMemoryWithDisk())),
- listener::onFailure
- )
- );
- },
- listener::onFailure
- )
- );
- }
-
- /**
- * Finds the first available ML node in the cluster and redirects the request to this node.
- */
- private void redirectToMlNode(PutDataFrameAnalyticsAction.Request request,
- ActionListener listener) {
- Optional node = findMlNode(clusterService.state());
- if (node.isPresent()) {
- transportService.sendRequest(
- node.get(), actionName, request, new ActionListenerResponseHandler<>(listener, EstimateMemoryUsageAction.Response::new));
- } else {
- listener.onFailure(ExceptionsHelper.badRequestException("No ML node to run on"));
- }
- }
-
- /**
- * Finds the first available ML node in the cluster state.
- */
- private static Optional findMlNode(ClusterState clusterState) {
- for (DiscoveryNode node : clusterState.getNodes()) {
- if (MachineLearning.isMlNode(node)) {
- return Optional.of(node);
- }
- }
- return Optional.empty();
- }
-}
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportExplainDataFrameAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportExplainDataFrameAnalyticsAction.java
new file mode 100644
index 00000000000..7f19deb8d5b
--- /dev/null
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportExplainDataFrameAnalyticsAction.java
@@ -0,0 +1,156 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.ml.action;
+
+import org.elasticsearch.action.ActionListener;
+import org.elasticsearch.action.ActionListenerResponseHandler;
+import org.elasticsearch.action.support.ActionFilters;
+import org.elasticsearch.action.support.HandledTransportAction;
+import org.elasticsearch.client.node.NodeClient;
+import org.elasticsearch.cluster.ClusterState;
+import org.elasticsearch.cluster.node.DiscoveryNode;
+import org.elasticsearch.cluster.service.ClusterService;
+import org.elasticsearch.common.collect.Tuple;
+import org.elasticsearch.common.inject.Inject;
+import org.elasticsearch.license.LicenseUtils;
+import org.elasticsearch.license.XPackLicenseState;
+import org.elasticsearch.tasks.Task;
+import org.elasticsearch.transport.TransportService;
+import org.elasticsearch.xpack.core.XPackField;
+import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction;
+import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction;
+import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection;
+import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimation;
+import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
+import org.elasticsearch.xpack.ml.MachineLearning;
+import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractorFactory;
+import org.elasticsearch.xpack.ml.dataframe.extractor.ExtractedFieldsDetector;
+import org.elasticsearch.xpack.ml.dataframe.extractor.ExtractedFieldsDetectorFactory;
+import org.elasticsearch.xpack.ml.dataframe.process.MemoryUsageEstimationProcessManager;
+import org.elasticsearch.xpack.ml.extractor.ExtractedFields;
+
+import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
+
+/**
+ * Provides explanations on aspects of the given data frame analytics spec like memory estimation, field selection, etc.
+ * Redirects to a different node if the current node is *not* an ML node.
+ */
+public class TransportExplainDataFrameAnalyticsAction
+ extends HandledTransportAction {
+
+ private final XPackLicenseState licenseState;
+ private final TransportService transportService;
+ private final ClusterService clusterService;
+ private final NodeClient client;
+ private final MemoryUsageEstimationProcessManager processManager;
+
+ @Inject
+ public TransportExplainDataFrameAnalyticsAction(TransportService transportService,
+ ActionFilters actionFilters,
+ ClusterService clusterService,
+ NodeClient client,
+ XPackLicenseState licenseState,
+ MemoryUsageEstimationProcessManager processManager) {
+ super(ExplainDataFrameAnalyticsAction.NAME, transportService, actionFilters, PutDataFrameAnalyticsAction.Request::new);
+ this.transportService = transportService;
+ this.clusterService = Objects.requireNonNull(clusterService);
+ this.client = Objects.requireNonNull(client);
+ this.licenseState = licenseState;
+ this.processManager = Objects.requireNonNull(processManager);
+ }
+
+ @Override
+ protected void doExecute(Task task,
+ PutDataFrameAnalyticsAction.Request request,
+ ActionListener listener) {
+ if (licenseState.isMachineLearningAllowed() == false) {
+ listener.onFailure(LicenseUtils.newComplianceException(XPackField.MACHINE_LEARNING));
+ return;
+ }
+
+ DiscoveryNode localNode = clusterService.localNode();
+ if (MachineLearning.isMlNode(localNode)) {
+ explain(task, request, listener);
+ } else {
+ redirectToMlNode(request, listener);
+ }
+ }
+
+ private void explain(Task task, PutDataFrameAnalyticsAction.Request request,
+ ActionListener listener) {
+ ExtractedFieldsDetectorFactory extractedFieldsDetectorFactory = new ExtractedFieldsDetectorFactory(client);
+ extractedFieldsDetectorFactory.createFromSource(request.getConfig(), true, ActionListener.wrap(
+ extractedFieldsDetector -> {
+ explain(task, request, extractedFieldsDetector, listener);
+ },
+ listener::onFailure
+ ));
+ }
+
+ private void explain(Task task, PutDataFrameAnalyticsAction.Request request, ExtractedFieldsDetector extractedFieldsDetector,
+ ActionListener listener) {
+ Tuple> fieldExtraction = extractedFieldsDetector.detect();
+
+ ActionListener memoryEstimationListener = ActionListener.wrap(
+ memoryEstimation -> listener.onResponse(new ExplainDataFrameAnalyticsAction.Response(fieldExtraction.v2(), memoryEstimation)),
+ listener::onFailure
+ );
+
+ estimateMemoryUsage(task, request, fieldExtraction.v1(), memoryEstimationListener);
+ }
+
+ /**
+ * Performs memory usage estimation.
+ * Memory usage estimation spawns an ML C++ process which is only available on ML nodes. That's why this method can only be called on
+ * the ML node.
+ */
+ private void estimateMemoryUsage(Task task,
+ PutDataFrameAnalyticsAction.Request request,
+ ExtractedFields extractedFields,
+ ActionListener listener) {
+ final String estimateMemoryTaskId = "memory_usage_estimation_" + task.getId();
+ DataFrameDataExtractorFactory extractorFactory = DataFrameDataExtractorFactory.createForSourceIndices(
+ client, estimateMemoryTaskId, request.getConfig(), extractedFields);
+ processManager.runJobAsync(
+ estimateMemoryTaskId,
+ request.getConfig(),
+ extractorFactory,
+ ActionListener.wrap(
+ result -> listener.onResponse(
+ new MemoryEstimation(result.getExpectedMemoryWithoutDisk(), result.getExpectedMemoryWithDisk())),
+ listener::onFailure
+ )
+ );
+ }
+
+ /**
+ * Finds the first available ML node in the cluster and redirects the request to this node.
+ */
+ private void redirectToMlNode(PutDataFrameAnalyticsAction.Request request,
+ ActionListener listener) {
+ Optional node = findMlNode(clusterService.state());
+ if (node.isPresent()) {
+ transportService.sendRequest(node.get(), actionName, request,
+ new ActionListenerResponseHandler<>(listener, ExplainDataFrameAnalyticsAction.Response::new));
+ } else {
+ listener.onFailure(ExceptionsHelper.badRequestException("No ML node to run on"));
+ }
+ }
+
+ /**
+ * Finds the first available ML node in the cluster state.
+ */
+ private static Optional findMlNode(ClusterState clusterState) {
+ for (DiscoveryNode node : clusterState.getNodes()) {
+ if (MachineLearning.isMlNode(node)) {
+ return Optional.of(node);
+ }
+ }
+ return Optional.empty();
+ }
+}
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java
index 1740a7fb532..af67750ee6d 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java
@@ -29,6 +29,7 @@ import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.IndexNotFoundException;
import org.elasticsearch.license.LicenseUtils;
@@ -47,7 +48,7 @@ import org.elasticsearch.xpack.core.ClientHelper;
import org.elasticsearch.xpack.core.XPackField;
import org.elasticsearch.xpack.core.ml.MlMetadata;
import org.elasticsearch.xpack.core.ml.MlTasks;
-import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction;
+import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction;
import org.elasticsearch.xpack.core.ml.action.GetDataFrameAnalyticsStatsAction;
import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction;
import org.elasticsearch.xpack.core.ml.action.StartDataFrameAnalyticsAction;
@@ -66,6 +67,7 @@ import org.elasticsearch.xpack.ml.dataframe.SourceDestValidator;
import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractorFactory;
import org.elasticsearch.xpack.ml.dataframe.extractor.ExtractedFieldsDetectorFactory;
import org.elasticsearch.xpack.ml.dataframe.persistence.DataFrameAnalyticsConfigProvider;
+import org.elasticsearch.xpack.ml.extractor.ExtractedFields;
import org.elasticsearch.xpack.ml.job.JobNodeSelector;
import org.elasticsearch.xpack.ml.notifications.DataFrameAnalyticsAuditor;
import org.elasticsearch.xpack.ml.process.MlMemoryTracker;
@@ -190,20 +192,18 @@ public class TransportStartDataFrameAnalyticsAction
final String jobId = startContext.config.getId();
// Tell the job tracker to refresh the memory requirement for this job and all other jobs that have persistent tasks
- ActionListener estimateMemoryUsageListener = ActionListener.wrap(
- estimateMemoryUsageResponse -> {
- auditor.info(
- jobId,
- Messages.getMessage(
- Messages.DATA_FRAME_ANALYTICS_AUDIT_ESTIMATED_MEMORY_USAGE,
- estimateMemoryUsageResponse.getExpectedMemoryWithoutDisk()));
+ ActionListener explainListener = ActionListener.wrap(
+ explainResponse -> {
+ ByteSizeValue expectedMemoryWithoutDisk = explainResponse.getMemoryEstimation().getExpectedMemoryWithoutDisk();
+ auditor.info(jobId,
+ Messages.getMessage(Messages.DATA_FRAME_ANALYTICS_AUDIT_ESTIMATED_MEMORY_USAGE, expectedMemoryWithoutDisk));
// Validate that model memory limit is sufficient to run the analysis
if (startContext.config.getModelMemoryLimit()
- .compareTo(estimateMemoryUsageResponse.getExpectedMemoryWithoutDisk()) < 0) {
+ .compareTo(expectedMemoryWithoutDisk) < 0) {
ElasticsearchStatusException e =
ExceptionsHelper.badRequestException(
"Cannot start because the configured model memory limit [{}] is lower than the expected memory usage [{}]",
- startContext.config.getModelMemoryLimit(), estimateMemoryUsageResponse.getExpectedMemoryWithoutDisk());
+ startContext.config.getModelMemoryLimit(), expectedMemoryWithoutDisk);
listener.onFailure(e);
return;
}
@@ -215,13 +215,13 @@ public class TransportStartDataFrameAnalyticsAction
listener::onFailure
);
- PutDataFrameAnalyticsAction.Request estimateMemoryUsageRequest = new PutDataFrameAnalyticsAction.Request(startContext.config);
+ PutDataFrameAnalyticsAction.Request explainRequest = new PutDataFrameAnalyticsAction.Request(startContext.config);
ClientHelper.executeAsyncWithOrigin(
client,
ClientHelper.ML_ORIGIN,
- EstimateMemoryUsageAction.INSTANCE,
- estimateMemoryUsageRequest,
- estimateMemoryUsageListener);
+ ExplainDataFrameAnalyticsAction.INSTANCE,
+ explainRequest,
+ explainListener);
}
@@ -277,7 +277,11 @@ public class TransportStartDataFrameAnalyticsAction
// Validate extraction is possible
boolean isTaskRestarting = startContext.startingState != DataFrameAnalyticsTask.StartingState.FIRST_TIME;
new ExtractedFieldsDetectorFactory(client).createFromSource(startContext.config, isTaskRestarting, ActionListener.wrap(
- extractedFieldsDetector -> toValidateDestEmptyListener.onResponse(startContext), finalListener::onFailure));
+ extractedFieldsDetector -> {
+ startContext.extractedFields = extractedFieldsDetector.detect().v1();
+ toValidateDestEmptyListener.onResponse(startContext);
+ },
+ finalListener::onFailure));
},
finalListener::onFailure
);
@@ -294,33 +298,27 @@ public class TransportStartDataFrameAnalyticsAction
}
private void validateSourceIndexHasRows(StartContext startContext, ActionListener listener) {
- boolean isTaskRestarting = startContext.startingState != DataFrameAnalyticsTask.StartingState.FIRST_TIME;
- DataFrameDataExtractorFactory.createForSourceIndices(client,
+ DataFrameDataExtractorFactory extractorFactory = DataFrameDataExtractorFactory.createForSourceIndices(client,
"validate_source_index_has_rows-" + startContext.config.getId(),
- isTaskRestarting,
startContext.config,
- ActionListener.wrap(
- dataFrameDataExtractorFactory ->
- dataFrameDataExtractorFactory
- .newExtractor(false)
- .collectDataSummaryAsync(ActionListener.wrap(
- dataSummary -> {
- if (dataSummary.rows == 0) {
- listener.onFailure(ExceptionsHelper.badRequestException(
- "Unable to start {} as no documents in the source indices [{}] contained all the fields "
- + "selected for analysis. If you are relying on automatic field selection then there are "
- + "currently mapped fields that do not exist in any indexed documents, and you will have "
- + "to switch to explicit field selection and include only fields that exist in indexed "
- + "documents.",
- startContext.config.getId(),
- Strings.arrayToCommaDelimitedString(startContext.config.getSource().getIndex())
- ));
- } else {
- listener.onResponse(startContext);
- }
- },
- listener::onFailure
- )),
+ startContext.extractedFields);
+ extractorFactory.newExtractor(false)
+ .collectDataSummaryAsync(ActionListener.wrap(
+ dataSummary -> {
+ if (dataSummary.rows == 0) {
+ listener.onFailure(ExceptionsHelper.badRequestException(
+ "Unable to start {} as no documents in the source indices [{}] contained all the fields "
+ + "selected for analysis. If you are relying on automatic field selection then there are "
+ + "currently mapped fields that do not exist in any indexed documents, and you will have "
+ + "to switch to explicit field selection and include only fields that exist in indexed "
+ + "documents.",
+ startContext.config.getId(),
+ Strings.arrayToCommaDelimitedString(startContext.config.getSource().getIndex())
+ ));
+ } else {
+ listener.onResponse(startContext);
+ }
+ },
listener::onFailure
));
}
@@ -402,6 +400,7 @@ public class TransportStartDataFrameAnalyticsAction
private final DataFrameAnalyticsConfig config;
private final List progressOnStart;
private final DataFrameAnalyticsTask.StartingState startingState;
+ private volatile ExtractedFields extractedFields;
private StartContext(DataFrameAnalyticsConfig config, List progressOnStart) {
this.config = config;
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorFactory.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorFactory.java
index ce21973ca91..f8afd229098 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorFactory.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorFactory.java
@@ -29,7 +29,7 @@ public class DataFrameDataExtractorFactory {
private final Map headers;
private final boolean includeRowsWithMissingValues;
- private DataFrameDataExtractorFactory(Client client, String analyticsId, List indices, ExtractedFields extractedFields,
+ public DataFrameDataExtractorFactory(Client client, String analyticsId, List indices, ExtractedFields extractedFields,
Map headers, boolean includeRowsWithMissingValues) {
this.client = Objects.requireNonNull(client);
this.analyticsId = Objects.requireNonNull(analyticsId);
@@ -66,32 +66,19 @@ public class DataFrameDataExtractorFactory {
}
/**
- * Validate and create a new extractor factory
+ * Create a new extractor factory
*
* The source index must exist and contain at least 1 compatible field or validations will fail.
*
* @param client ES Client used to make calls against the cluster
* @param taskId The task id
- * @param isTaskRestarting Whether the task is restarting or it is running for the first time
* @param config The config from which to create the extractor factory
- * @param listener The listener to notify on creation or failure
+ * @param extractedFields The fields to extract
*/
- public static void createForSourceIndices(Client client,
- String taskId,
- boolean isTaskRestarting,
- DataFrameAnalyticsConfig config,
- ActionListener listener) {
- ExtractedFieldsDetectorFactory extractedFieldsDetectorFactory = new ExtractedFieldsDetectorFactory(client);
- extractedFieldsDetectorFactory.createFromSource(config, isTaskRestarting, ActionListener.wrap(
- extractedFieldsDetector -> {
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
- DataFrameDataExtractorFactory extractorFactory = new DataFrameDataExtractorFactory(client, taskId,
- Arrays.asList(config.getSource().getIndex()), extractedFields, config.getHeaders(),
- config.getAnalysis().supportsMissingValues());
- listener.onResponse(extractorFactory);
- },
- listener::onFailure
- ));
+ public static DataFrameDataExtractorFactory createForSourceIndices(Client client, String taskId, DataFrameAnalyticsConfig config,
+ ExtractedFields extractedFields) {
+ return new DataFrameDataExtractorFactory(client, taskId, Arrays.asList(config.getSource().getIndex()), extractedFields,
+ config.getHeaders(), config.getAnalysis().supportsMissingValues());
}
/**
@@ -111,7 +98,7 @@ public class DataFrameDataExtractorFactory {
ExtractedFieldsDetectorFactory extractedFieldsDetectorFactory = new ExtractedFieldsDetectorFactory(client);
extractedFieldsDetectorFactory.createFromDest(config, isTaskRestarting, ActionListener.wrap(
extractedFieldsDetector -> {
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
+ ExtractedFields extractedFields = extractedFieldsDetector.detect().v1();
DataFrameDataExtractorFactory extractorFactory = new DataFrameDataExtractorFactory(client, config.getId(),
Collections.singletonList(config.getDest().getIndex()), extractedFields, config.getHeaders(),
config.getAnalysis().supportsMissingValues());
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java
index 5d94b57aca5..682cc94433c 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java
@@ -11,6 +11,7 @@ import org.elasticsearch.ResourceNotFoundException;
import org.elasticsearch.action.fieldcaps.FieldCapabilities;
import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse;
import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.BooleanFieldMapper;
@@ -19,6 +20,7 @@ import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig;
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsDest;
import org.elasticsearch.xpack.core.ml.dataframe.analyses.RequiredField;
import org.elasticsearch.xpack.core.ml.dataframe.analyses.Types;
+import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection;
import org.elasticsearch.xpack.core.ml.job.messages.Messages;
import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
import org.elasticsearch.xpack.core.ml.utils.NameResolver;
@@ -29,13 +31,12 @@ import org.elasticsearch.xpack.ml.extractor.ExtractedFields;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
-import java.util.HashSet;
+import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
-import java.util.Optional;
import java.util.Set;
import java.util.TreeSet;
import java.util.stream.Collectors;
@@ -57,9 +58,8 @@ public class ExtractedFieldsDetector {
private final FieldCapabilitiesResponse fieldCapabilitiesResponse;
private final Map fieldCardinalities;
- ExtractedFieldsDetector(String[] index, DataFrameAnalyticsConfig config, boolean isTaskRestarting,
- int docValueFieldsLimit, FieldCapabilitiesResponse fieldCapabilitiesResponse,
- Map fieldCardinalities) {
+ ExtractedFieldsDetector(String[] index, DataFrameAnalyticsConfig config, boolean isTaskRestarting, int docValueFieldsLimit,
+ FieldCapabilitiesResponse fieldCapabilitiesResponse, Map fieldCardinalities) {
this.index = Objects.requireNonNull(index);
this.config = Objects.requireNonNull(config);
this.isTaskRestarting = isTaskRestarting;
@@ -68,8 +68,30 @@ public class ExtractedFieldsDetector {
this.fieldCardinalities = Objects.requireNonNull(fieldCardinalities);
}
- public ExtractedFields detect() {
- Set fields = getIncludedFields();
+ public Tuple> detect() {
+ TreeSet fieldSelection = new TreeSet<>(Comparator.comparing(FieldSelection::getName));
+ Set fields = getIncludedFields(fieldSelection);
+ checkFieldsHaveCompatibleTypes(fields);
+ checkRequiredFields(fields);
+ checkFieldsWithCardinalityLimit();
+ ExtractedFields extractedFields = detectExtractedFields(fields, fieldSelection);
+ addIncludedFields(extractedFields, fieldSelection);
+
+ return Tuple.tuple(extractedFields, Collections.unmodifiableList(new ArrayList<>(fieldSelection)));
+ }
+
+ private Set getIncludedFields(Set fieldSelection) {
+ Set fields = new TreeSet<>(fieldCapabilitiesResponse.get().keySet());
+ fields.removeAll(IGNORE_FIELDS);
+ checkResultsFieldIsNotPresent();
+ removeFieldsUnderResultsField(fields);
+ FetchSourceContext analyzedFields = config.getAnalyzedFields();
+
+ // If the user has not explicitly included fields we'll include all compatible fields
+ if (analyzedFields == null || analyzedFields.includes().length == 0) {
+ removeFieldsWithIncompatibleTypes(fields, fieldSelection);
+ }
+ includeAndExcludeFields(fields, fieldSelection);
if (fields.isEmpty()) {
throw ExceptionsHelper.badRequestException("No compatible fields could be detected in index {}. Supported types are {}.",
@@ -77,26 +99,19 @@ public class ExtractedFieldsDetector {
getSupportedTypes());
}
- checkNoIgnoredFields(fields);
- checkFieldsHaveCompatibleTypes(fields);
- checkRequiredFields(fields);
- checkFieldsWithCardinalityLimit();
- return detectExtractedFields(fields);
+ return fields;
}
- private Set getIncludedFields() {
- Set fields = new HashSet<>(fieldCapabilitiesResponse.get().keySet());
- checkResultsFieldIsNotPresent();
- removeFieldsUnderResultsField(fields);
- FetchSourceContext analyzedFields = config.getAnalyzedFields();
-
- // If the user has not explicitly included fields we'll include all compatible fields
- if (analyzedFields == null || analyzedFields.includes().length == 0) {
- fields.removeAll(IGNORE_FIELDS);
- removeFieldsWithIncompatibleTypes(fields);
+ private void removeFieldsUnderResultsField(Set fields) {
+ String resultsField = config.getDest().getResultsField();
+ Iterator fieldsIterator = fields.iterator();
+ while (fieldsIterator.hasNext()) {
+ String field = fieldsIterator.next();
+ if (field.startsWith(resultsField + ".")) {
+ fieldsIterator.remove();
+ }
}
- includeAndExcludeFields(fields);
- return fields;
+ fields.removeIf(field -> field.startsWith(resultsField + "."));
}
private void checkResultsFieldIsNotPresent() {
@@ -117,16 +132,21 @@ public class ExtractedFieldsDetector {
}
}
- private void removeFieldsUnderResultsField(Set fields) {
- // Ignore fields under the results object
- fields.removeIf(field -> field.startsWith(config.getDest().getResultsField() + "."));
+ private void addExcludedField(String field, String reason, Set fieldSelection) {
+ fieldSelection.add(FieldSelection.excluded(field, getMappingTypes(field), reason));
}
- private void removeFieldsWithIncompatibleTypes(Set fields) {
+ private Set getMappingTypes(String field) {
+ Map fieldCaps = fieldCapabilitiesResponse.getField(field);
+ return fieldCaps == null ? Collections.emptySet() : fieldCaps.keySet();
+ }
+
+ private void removeFieldsWithIncompatibleTypes(Set fields, Set fieldSelection) {
Iterator fieldsIterator = fields.iterator();
while (fieldsIterator.hasNext()) {
String field = fieldsIterator.next();
if (hasCompatibleType(field) == false) {
+ addExcludedField(field, "unsupported type; supported types are " + getSupportedTypes(), fieldSelection);
fieldsIterator.remove();
}
}
@@ -163,7 +183,7 @@ public class ExtractedFieldsDetector {
return supportedTypes;
}
- private void includeAndExcludeFields(Set fields) {
+ private void includeAndExcludeFields(Set fields, Set fieldSelection) {
FetchSourceContext analyzedFields = config.getAnalyzedFields();
if (analyzedFields == null) {
return;
@@ -188,18 +208,30 @@ public class ExtractedFieldsDetector {
Messages.getMessage(Messages.DATA_FRAME_ANALYTICS_BAD_FIELD_FILTER, ex)))
.expand(excludes, true);
- fields.retainAll(includedSet);
- fields.removeAll(excludedSet);
+ applyIncludesExcludes(fields, includedSet, excludedSet, fieldSelection);
} catch (ResourceNotFoundException ex) {
// Re-wrap our exception so that we throw the same exception type when there are no fields.
throw ExceptionsHelper.badRequestException(ex.getMessage());
}
}
- private void checkNoIgnoredFields(Set fields) {
- Optional ignoreField = IGNORE_FIELDS.stream().filter(fields::contains).findFirst();
- if (ignoreField.isPresent()) {
- throw ExceptionsHelper.badRequestException("field [{}] cannot be analyzed", ignoreField.get());
+ private void applyIncludesExcludes(Set fields, Set includes, Set excludes,
+ Set fieldSelection) {
+ Iterator fieldsIterator = fields.iterator();
+ while (fieldsIterator.hasNext()) {
+ String field = fieldsIterator.next();
+ if (includes.contains(field)) {
+ if (IGNORE_FIELDS.contains(field)) {
+ throw ExceptionsHelper.badRequestException("field [{}] cannot be analyzed", field);
+ }
+ } else {
+ fieldsIterator.remove();
+ addExcludedField(field, "field not in includes list", fieldSelection);
+ }
+ if (excludes.contains(field)) {
+ fieldsIterator.remove();
+ addExcludedField(field, "field in excludes list", fieldSelection);
+ }
}
}
@@ -247,13 +279,10 @@ public class ExtractedFieldsDetector {
}
}
- private ExtractedFields detectExtractedFields(Set fields) {
- List sortedFields = new ArrayList<>(fields);
- // We sort the fields to ensure the checksum for each document is deterministic
- Collections.sort(sortedFields);
- ExtractedFields extractedFields = ExtractedFields.build(sortedFields, Collections.emptySet(), fieldCapabilitiesResponse);
+ private ExtractedFields detectExtractedFields(Set fields, Set fieldSelection) {
+ ExtractedFields extractedFields = ExtractedFields.build(fields, Collections.emptySet(), fieldCapabilitiesResponse);
boolean preferSource = extractedFields.getDocValueFields().size() > docValueFieldsLimit;
- extractedFields = deduplicateMultiFields(extractedFields, preferSource);
+ extractedFields = deduplicateMultiFields(extractedFields, preferSource, fieldSelection);
if (preferSource) {
extractedFields = fetchFromSourceIfSupported(extractedFields);
if (extractedFields.getDocValueFields().size() > docValueFieldsLimit) {
@@ -266,7 +295,8 @@ public class ExtractedFieldsDetector {
return extractedFields;
}
- private ExtractedFields deduplicateMultiFields(ExtractedFields extractedFields, boolean preferSource) {
+ private ExtractedFields deduplicateMultiFields(ExtractedFields extractedFields, boolean preferSource,
+ Set fieldSelection) {
Set requiredFields = config.getAnalysis().getRequiredFields().stream().map(RequiredField::getName)
.collect(Collectors.toSet());
Map nameOrParentToField = new LinkedHashMap<>();
@@ -276,43 +306,53 @@ public class ExtractedFieldsDetector {
if (existingField != null) {
ExtractedField parent = currentField.isMultiField() ? existingField : currentField;
ExtractedField multiField = currentField.isMultiField() ? currentField : existingField;
- nameOrParentToField.put(nameOrParent, chooseMultiFieldOrParent(preferSource, requiredFields, parent, multiField));
+ nameOrParentToField.put(nameOrParent,
+ chooseMultiFieldOrParent(preferSource, requiredFields, parent, multiField, fieldSelection));
}
}
return new ExtractedFields(new ArrayList<>(nameOrParentToField.values()));
}
- private ExtractedField chooseMultiFieldOrParent(boolean preferSource, Set requiredFields,
- ExtractedField parent, ExtractedField multiField) {
+ private ExtractedField chooseMultiFieldOrParent(boolean preferSource, Set requiredFields, ExtractedField parent,
+ ExtractedField multiField, Set fieldSelection) {
// Check requirements first
if (requiredFields.contains(parent.getName())) {
+ addExcludedField(multiField.getName(), "[" + parent.getName() + "] is required instead", fieldSelection);
return parent;
}
if (requiredFields.contains(multiField.getName())) {
+ addExcludedField(parent.getName(), "[" + multiField.getName() + "] is required instead", fieldSelection);
return multiField;
}
// If both are multi-fields it means there are several. In this case parent is the previous multi-field
// we selected. We'll just keep that.
if (parent.isMultiField() && multiField.isMultiField()) {
+ addExcludedField(multiField.getName(), "[" + parent.getName() + "] came first", fieldSelection);
return parent;
}
// If we prefer source only the parent may support it. If it does we pick it immediately.
if (preferSource && parent.supportsFromSource()) {
+ addExcludedField(multiField.getName(), "[" + parent.getName() + "] is preferred because it supports fetching from source",
+ fieldSelection);
return parent;
}
// If any of the two is a doc_value field let's prefer it as it'd support aggregations.
// We check the parent first as it'd be a shorter field name.
if (parent.getMethod() == ExtractedField.Method.DOC_VALUE) {
+ addExcludedField(multiField.getName(), "[" + parent.getName() + "] is preferred because it is aggregatable", fieldSelection);
return parent;
}
if (multiField.getMethod() == ExtractedField.Method.DOC_VALUE) {
+ addExcludedField(parent.getName(), "[" + multiField.getName() + "] is preferred because it is aggregatable", fieldSelection);
return multiField;
}
// None is aggregatable. Let's pick the parent for its shorter name.
+ addExcludedField(multiField.getName(), "[" + parent.getName() + "] is preferred because none of the multi-fields are aggregatable",
+ fieldSelection);
return parent;
}
@@ -343,6 +383,26 @@ public class ExtractedFieldsDetector {
return new ExtractedFields(adjusted);
}
+ private void addIncludedFields(ExtractedFields extractedFields, Set fieldSelection) {
+ Set requiredFields = config.getAnalysis().getRequiredFields().stream().map(RequiredField::getName)
+ .collect(Collectors.toSet());
+ Set categoricalFields = getCategoricalFields(extractedFields);
+ for (ExtractedField includedField : extractedFields.getAllFields()) {
+ FieldSelection.FeatureType featureType = categoricalFields.contains(includedField.getName()) ?
+ FieldSelection.FeatureType.CATEGORICAL : FieldSelection.FeatureType.NUMERICAL;
+ fieldSelection.add(FieldSelection.included(includedField.getName(), includedField.getTypes(),
+ requiredFields.contains(includedField.getName()), featureType));
+ }
+ }
+
+ private Set getCategoricalFields(ExtractedFields extractedFields) {
+ return extractedFields.getAllFields().stream()
+ .filter(extractedField -> config.getAnalysis().getAllowedCategoricalTypes(extractedField.getName())
+ .containsAll(extractedField.getTypes()))
+ .map(ExtractedField::getName)
+ .collect(Collectors.toSet());
+ }
+
private static boolean isBoolean(Set types) {
return types.size() == 1 && types.contains(BooleanFieldMapper.CONTENT_TYPE);
}
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java
index 2e5189eb249..6740f8d4d34 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java
@@ -100,9 +100,9 @@ public class MemoryUsageEstimationProcessManager {
} finally {
process.consumeAndCloseOutputStream();
try {
- LOGGER.info("[{}] Closing process", jobId);
+ LOGGER.debug("[{}] Closing process", jobId);
process.close();
- LOGGER.info("[{}] Closed process", jobId);
+ LOGGER.debug("[{}] Closed process", jobId);
} catch (Exception e) {
String errorMsg =
new ParameterizedMessage(
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestEstimateMemoryUsageAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestEstimateMemoryUsageAction.java
deleted file mode 100644
index 25f2bcb4bb8..00000000000
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestEstimateMemoryUsageAction.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License;
- * you may not use this file except in compliance with the Elastic License.
- */
-package org.elasticsearch.xpack.ml.rest.dataframe;
-
-import org.elasticsearch.client.node.NodeClient;
-import org.elasticsearch.rest.BaseRestHandler;
-import org.elasticsearch.rest.RestController;
-import org.elasticsearch.rest.RestRequest;
-import org.elasticsearch.rest.action.RestToXContentListener;
-import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction;
-import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction;
-import org.elasticsearch.xpack.ml.MachineLearning;
-
-import java.io.IOException;
-
-public class RestEstimateMemoryUsageAction extends BaseRestHandler {
-
- public RestEstimateMemoryUsageAction(RestController controller) {
- controller.registerHandler(
- RestRequest.Method.POST,
- MachineLearning.BASE_PATH + "data_frame/analytics/_estimate_memory_usage", this);
- }
-
- @Override
- public String getName() {
- return "ml_estimate_memory_usage_action";
- }
-
- @Override
- protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException {
- PutDataFrameAnalyticsAction.Request request =
- PutDataFrameAnalyticsAction.Request.parseRequestForMemoryEstimation(restRequest.contentOrSourceParamParser());
- return channel -> client.execute(EstimateMemoryUsageAction.INSTANCE, request, new RestToXContentListener<>(channel));
- }
-}
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestExplainDataFrameAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestExplainDataFrameAnalyticsAction.java
new file mode 100644
index 00000000000..b16bf7b3efb
--- /dev/null
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestExplainDataFrameAnalyticsAction.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.ml.rest.dataframe;
+
+import org.elasticsearch.action.ActionListener;
+import org.elasticsearch.client.node.NodeClient;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.rest.BaseRestHandler;
+import org.elasticsearch.rest.RestController;
+import org.elasticsearch.rest.RestRequest;
+import org.elasticsearch.rest.action.RestToXContentListener;
+import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction;
+import org.elasticsearch.xpack.core.ml.action.GetDataFrameAnalyticsAction;
+import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction;
+import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig;
+import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
+import org.elasticsearch.xpack.ml.MachineLearning;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class RestExplainDataFrameAnalyticsAction extends BaseRestHandler {
+
+ public RestExplainDataFrameAnalyticsAction(RestController controller) {
+ controller.registerHandler(RestRequest.Method.GET, MachineLearning.BASE_PATH + "data_frame/analytics/_explain", this);
+ controller.registerHandler(RestRequest.Method.POST, MachineLearning.BASE_PATH + "data_frame/analytics/_explain", this);
+ controller.registerHandler(RestRequest.Method.GET, MachineLearning.BASE_PATH + "data_frame/analytics/{"
+ + DataFrameAnalyticsConfig.ID.getPreferredName() + "}/_explain", this);
+ controller.registerHandler(RestRequest.Method.POST, MachineLearning.BASE_PATH + "data_frame/analytics/{"
+ + DataFrameAnalyticsConfig.ID.getPreferredName() + "}/_explain", this);
+ }
+
+ @Override
+ public String getName() {
+ return "ml_explain_data_frame_analytics_action";
+ }
+
+ @Override
+ protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException {
+ final String jobId = restRequest.param(DataFrameAnalyticsConfig.ID.getPreferredName());
+
+ if (Strings.isNullOrEmpty(jobId) && restRequest.hasContentOrSourceParam() == false) {
+ throw ExceptionsHelper.badRequestException("Please provide a job [{}] or the config object",
+ DataFrameAnalyticsConfig.ID.getPreferredName());
+ }
+
+ if (Strings.isNullOrEmpty(jobId) == false && restRequest.hasContentOrSourceParam()) {
+ throw ExceptionsHelper.badRequestException("Please provide either a job [{}] or the config object but not both",
+ DataFrameAnalyticsConfig.ID.getPreferredName());
+ }
+
+ // We need to consume the body before returning
+ PutDataFrameAnalyticsAction.Request explainRequestFromBody = Strings.isNullOrEmpty(jobId) ?
+ PutDataFrameAnalyticsAction.Request.parseRequestForExplain(restRequest.contentOrSourceParamParser()) : null;
+
+ return channel -> {
+ RestToXContentListener listener = new RestToXContentListener<>(channel);
+
+ if (explainRequestFromBody != null) {
+ client.execute(ExplainDataFrameAnalyticsAction.INSTANCE, explainRequestFromBody, listener);
+ } else {
+ GetDataFrameAnalyticsAction.Request getRequest = new GetDataFrameAnalyticsAction.Request(jobId);
+ getRequest.setAllowNoResources(false);
+ client.execute(GetDataFrameAnalyticsAction.INSTANCE, getRequest, ActionListener.wrap(
+ getResponse -> {
+ List jobs = getResponse.getResources().results();
+ if (jobs.size() > 1) {
+ listener.onFailure(ExceptionsHelper.badRequestException("expected only one config but matched {}",
+ jobs.stream().map(DataFrameAnalyticsConfig::getId).collect(Collectors.toList())));
+ } else {
+ PutDataFrameAnalyticsAction.Request explainRequest = new PutDataFrameAnalyticsAction.Request(jobs.get(0));
+ client.execute(ExplainDataFrameAnalyticsAction.INSTANCE, explainRequest, listener);
+ }
+ },
+ listener::onFailure
+ ));
+ }
+ };
+ }
+}
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java
index 8f33c9bfbbf..5f7bd650a1c 100644
--- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java
+++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java
@@ -8,6 +8,7 @@ package org.elasticsearch.xpack.ml.dataframe.extractor;
import org.elasticsearch.ElasticsearchStatusException;
import org.elasticsearch.action.fieldcaps.FieldCapabilities;
import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse;
+import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.elasticsearch.test.ESTestCase;
@@ -17,6 +18,7 @@ import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsSource;
import org.elasticsearch.xpack.core.ml.dataframe.analyses.Classification;
import org.elasticsearch.xpack.core.ml.dataframe.analyses.OutlierDetection;
import org.elasticsearch.xpack.core.ml.dataframe.analyses.Regression;
+import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection;
import org.elasticsearch.xpack.ml.extractor.ExtractedField;
import org.elasticsearch.xpack.ml.extractor.ExtractedFields;
import org.elasticsearch.xpack.ml.test.SearchHitBuilder;
@@ -25,6 +27,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@@ -48,12 +51,15 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap());
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
+ Tuple> fieldExtraction = extractedFieldsDetector.detect();
- List allFields = extractedFields.getAllFields();
+ List allFields = fieldExtraction.v1().getAllFields();
assertThat(allFields.size(), equalTo(1));
assertThat(allFields.get(0).getName(), equalTo("some_float"));
assertThat(allFields.get(0).getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
+
+ assertFieldSelectionContains(fieldExtraction.v2(),
+ FieldSelection.included("some_float", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL));
}
public void testDetect_GivenNumericFieldWithMultipleTypes() {
@@ -63,12 +69,16 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap());
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
+ Tuple> fieldExtraction = extractedFieldsDetector.detect();
- List allFields = extractedFields.getAllFields();
+ List allFields = fieldExtraction.v1().getAllFields();
assertThat(allFields.size(), equalTo(1));
assertThat(allFields.get(0).getName(), equalTo("some_number"));
assertThat(allFields.get(0).getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
+
+ assertFieldSelectionContains(fieldExtraction.v2(), FieldSelection.included("some_number",
+ new HashSet<>(Arrays.asList("long", "integer", "short", "byte", "double", "float", "half_float", "scaled_float")), false,
+ FieldSelection.FeatureType.NUMERICAL));
}
public void testDetect_GivenOutlierDetectionAndNonNumericField() {
@@ -105,14 +115,22 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap());
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
+ Tuple> fieldExtraction = extractedFieldsDetector.detect();
- List allFields = extractedFields.getAllFields();
+ List allFields = fieldExtraction.v1().getAllFields();
assertThat(allFields.size(), equalTo(3));
assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toSet()),
containsInAnyOrder("some_float", "some_long", "some_boolean"));
assertThat(allFields.stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
contains(equalTo(ExtractedField.Method.DOC_VALUE)));
+
+ assertFieldSelectionContains(fieldExtraction.v2(),
+ FieldSelection.included("some_boolean", Collections.singleton("boolean"), false, FieldSelection.FeatureType.NUMERICAL),
+ FieldSelection.included("some_float", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
+ FieldSelection.excluded("some_keyword", Collections.singleton("keyword"), "unsupported type; " +
+ "supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]"),
+ FieldSelection.included("some_long", Collections.singleton("long"), false, FieldSelection.FeatureType.NUMERICAL)
+ );
}
public void testDetect_GivenRegressionAndMultipleFields() {
@@ -126,14 +144,22 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildRegressionConfig("foo"), false, 100, fieldCapabilities, Collections.emptyMap());
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
+ Tuple> fieldExtraction = extractedFieldsDetector.detect();
- List allFields = extractedFields.getAllFields();
+ List allFields = fieldExtraction.v1().getAllFields();
assertThat(allFields.size(), equalTo(5));
assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toList()),
containsInAnyOrder("foo", "some_float", "some_keyword", "some_long", "some_boolean"));
assertThat(allFields.stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
contains(equalTo(ExtractedField.Method.DOC_VALUE)));
+
+ assertFieldSelectionContains(fieldExtraction.v2(),
+ FieldSelection.included("foo", Collections.singleton("double"), true, FieldSelection.FeatureType.NUMERICAL),
+ FieldSelection.included("some_boolean", Collections.singleton("boolean"), false, FieldSelection.FeatureType.NUMERICAL),
+ FieldSelection.included("some_float", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
+ FieldSelection.included("some_keyword", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL),
+ FieldSelection.included("some_long", Collections.singleton("long"), false, FieldSelection.FeatureType.NUMERICAL)
+ );
}
public void testDetect_GivenRegressionAndRequiredFieldMissing() {
@@ -191,11 +217,16 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildOutlierDetectionConfig(analyzedFields), false, 100, fieldCapabilities, Collections.emptyMap());
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
+ Tuple> fieldExtraction = extractedFieldsDetector.detect();
- List allFields = extractedFields.getAllFields();
+ List allFields = fieldExtraction.v1().getAllFields();
assertThat(allFields.size(), equalTo(1));
assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toList()), contains("bar"));
+
+ assertFieldSelectionContains(fieldExtraction.v2(),
+ FieldSelection.included("bar", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
+ FieldSelection.excluded("foo", Collections.singleton("float"), "field in excludes list")
+ );
}
public void testDetect_GivenRegressionAndRequiredFieldHasInvalidType() {
@@ -258,14 +289,15 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
public void testDetect_GivenIncludedIgnoredField() {
FieldCapabilitiesResponse fieldCapabilities = new MockFieldCapsResponseBuilder()
- .addAggregatableField("_id", "float").build();
+ .addAggregatableField("_id", "float")
+ .build();
FetchSourceContext analyzedFields = new FetchSourceContext(true, new String[]{"_id"}, new String[0]);
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildOutlierDetectionConfig(analyzedFields), false, 100, fieldCapabilities, Collections.emptyMap());
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, () -> extractedFieldsDetector.detect());
- assertThat(e.getMessage(), equalTo("field [_id] cannot be analyzed"));
+ assertThat(e.getMessage(), equalTo("No field [_id] could be detected"));
}
public void testDetect_ShouldSortFieldsAlphabetically() {
@@ -285,9 +317,9 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap());
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
+ Tuple> fieldExtraction = extractedFieldsDetector.detect();
- List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+ List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
.collect(Collectors.toList());
assertThat(extractedFieldNames, equalTo(sortedFields));
}
@@ -333,11 +365,17 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildOutlierDetectionConfig(desiredFields), false, 100, fieldCapabilities, Collections.emptyMap());
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
+ Tuple> fieldExtraction = extractedFieldsDetector.detect();
- List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+ List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
.collect(Collectors.toList());
assertThat(extractedFieldNames, equalTo(Arrays.asList("my_field1", "your_field2")));
+
+ assertFieldSelectionContains(fieldExtraction.v2(),
+ FieldSelection.included("my_field1", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
+ FieldSelection.excluded("my_field1_nope", Collections.singleton("float"), "field in excludes list"),
+ FieldSelection.included("your_field2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL)
+ );
}
public void testDetect_GivenIncludedFieldHasUnsupportedType() {
@@ -384,11 +422,18 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildOutlierDetectionConfig(), true, 100, fieldCapabilities, Collections.emptyMap());
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
+ Tuple> fieldExtraction = extractedFieldsDetector.detect();
- List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+ List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
.collect(Collectors.toList());
assertThat(extractedFieldNames, equalTo(Arrays.asList("my_field1", "your_field2")));
+
+ assertFieldSelectionContains(fieldExtraction.v2(),
+ FieldSelection.included("my_field1", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
+ FieldSelection.included("your_field2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
+ FieldSelection.excluded("your_keyword", Collections.singleton("keyword"), "unsupported type; supported types " +
+ "are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]")
+ );
}
public void testDetect_GivenIncludedResultsField() {
@@ -434,12 +479,12 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildOutlierDetectionConfig(), true, 4, fieldCapabilities, Collections.emptyMap());
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
+ Tuple> fieldExtraction = extractedFieldsDetector.detect();
- List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+ List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
.collect(Collectors.toList());
assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3")));
- assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
+ assertThat(fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
contains(equalTo(ExtractedField.Method.DOC_VALUE)));
}
@@ -453,12 +498,12 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildOutlierDetectionConfig(), true, 3, fieldCapabilities, Collections.emptyMap());
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
+ Tuple> fieldExtraction = extractedFieldsDetector.detect();
- List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+ List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
.collect(Collectors.toList());
assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3")));
- assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
+ assertThat(fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
contains(equalTo(ExtractedField.Method.DOC_VALUE)));
}
@@ -472,12 +517,12 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildOutlierDetectionConfig(), true, 2, fieldCapabilities, Collections.emptyMap());
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
+ Tuple> fieldExtraction = extractedFieldsDetector.detect();
- List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+ List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
.collect(Collectors.toList());
assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3")));
- assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
+ assertThat(fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
contains(equalTo(ExtractedField.Method.SOURCE)));
}
@@ -488,14 +533,18 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap());
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
+ Tuple> fieldExtraction = extractedFieldsDetector.detect();
- List allFields = extractedFields.getAllFields();
+ List allFields = fieldExtraction.v1().getAllFields();
assertThat(allFields.size(), equalTo(1));
ExtractedField booleanField = allFields.get(0);
assertThat(booleanField.getTypes(), contains("boolean"));
assertThat(booleanField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
+ assertFieldSelectionContains(fieldExtraction.v2(),
+ FieldSelection.included("some_boolean", Collections.singleton("boolean"), false, FieldSelection.FeatureType.NUMERICAL)
+ );
+
SearchHit hit = new SearchHitBuilder(42).addField("some_boolean", true).build();
assertThat(booleanField.value(hit), arrayContaining(1));
@@ -514,14 +563,18 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildClassificationConfig("some_boolean"), false, 100, fieldCapabilities,
Collections.singletonMap("some_boolean", 2L));
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
+ Tuple> fieldExtraction = extractedFieldsDetector.detect();
- List allFields = extractedFields.getAllFields();
+ List allFields = fieldExtraction.v1().getAllFields();
assertThat(allFields.size(), equalTo(1));
ExtractedField booleanField = allFields.get(0);
assertThat(booleanField.getTypes(), contains("boolean"));
assertThat(booleanField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
+ assertFieldSelectionContains(fieldExtraction.v2(),
+ FieldSelection.included("some_boolean", Collections.singleton("boolean"), true, FieldSelection.FeatureType.CATEGORICAL)
+ );
+
SearchHit hit = new SearchHitBuilder(42).addField("some_boolean", true).build();
assertThat(booleanField.value(hit), arrayContaining("true"));
@@ -546,12 +599,26 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildRegressionConfig("a_float"), true, 100, fieldCapabilities, Collections.emptyMap());
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
+ Tuple> fieldExtraction = extractedFieldsDetector.detect();
- assertThat(extractedFields.getAllFields().size(), equalTo(5));
- List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+ assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(5));
+ List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
.collect(Collectors.toList());
assertThat(extractedFieldNames, contains("a_float", "keyword_1", "text_1.keyword", "text_2.keyword", "text_without_keyword"));
+
+ assertFieldSelectionContains(fieldExtraction.v2(),
+ FieldSelection.included("a_float", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL),
+ FieldSelection.included("keyword_1", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL),
+ FieldSelection.excluded("keyword_1.text", Collections.singleton("text"),
+ "[keyword_1] is preferred because it is aggregatable"),
+ FieldSelection.excluded("text_1", Collections.singleton("text"),
+ "[text_1.keyword] is preferred because it is aggregatable"),
+ FieldSelection.included("text_1.keyword", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL),
+ FieldSelection.excluded("text_2", Collections.singleton("text"),
+ "[text_2.keyword] is preferred because it is aggregatable"),
+ FieldSelection.included("text_2.keyword", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL),
+ FieldSelection.included("text_without_keyword", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL)
+ );
}
public void testDetect_GivenMultiFieldAndParentIsRequired() {
@@ -563,12 +630,19 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildClassificationConfig("field_1"), true, 100, fieldCapabilities, Collections.singletonMap("field_1", 2L));
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
+ Tuple> fieldExtraction = extractedFieldsDetector.detect();
- assertThat(extractedFields.getAllFields().size(), equalTo(2));
- List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+ assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2));
+ List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
.collect(Collectors.toList());
assertThat(extractedFieldNames, contains("field_1", "field_2"));
+
+ assertFieldSelectionContains(fieldExtraction.v2(),
+ FieldSelection.included("field_1", Collections.singleton("keyword"), true, FieldSelection.FeatureType.CATEGORICAL),
+ FieldSelection.excluded("field_1.keyword", Collections.singleton("keyword"),
+ "[field_1] is required instead"),
+ FieldSelection.included("field_2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL)
+ );
}
public void testDetect_GivenMultiFieldAndMultiFieldIsRequired() {
@@ -581,12 +655,19 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildClassificationConfig("field_1.keyword"), true, 100, fieldCapabilities,
Collections.singletonMap("field_1.keyword", 2L));
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
+ Tuple> fieldExtraction = extractedFieldsDetector.detect();
- assertThat(extractedFields.getAllFields().size(), equalTo(2));
- List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+ assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2));
+ List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
.collect(Collectors.toList());
assertThat(extractedFieldNames, contains("field_1.keyword", "field_2"));
+
+ assertFieldSelectionContains(fieldExtraction.v2(),
+ FieldSelection.excluded("field_1", Collections.singleton("keyword"),
+ "[field_1.keyword] is required instead"),
+ FieldSelection.included("field_1.keyword", Collections.singleton("keyword"), true, FieldSelection.FeatureType.CATEGORICAL),
+ FieldSelection.included("field_2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL)
+ );
}
public void testDetect_GivenSeveralMultiFields_ShouldPickFirstSorted() {
@@ -600,12 +681,21 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildRegressionConfig("field_2"), true, 100, fieldCapabilities, Collections.emptyMap());
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
+ Tuple> fieldExtraction = extractedFieldsDetector.detect();
- assertThat(extractedFields.getAllFields().size(), equalTo(2));
- List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+ assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2));
+ List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
.collect(Collectors.toList());
assertThat(extractedFieldNames, contains("field_1.keyword_1", "field_2"));
+
+ assertFieldSelectionContains(fieldExtraction.v2(),
+ FieldSelection.excluded("field_1", Collections.singleton("text"),
+ "[field_1.keyword_1] is preferred because it is aggregatable"),
+ FieldSelection.included("field_1.keyword_1", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL),
+ FieldSelection.excluded("field_1.keyword_2", Collections.singleton("keyword"), "[field_1.keyword_1] came first"),
+ FieldSelection.excluded("field_1.keyword_3", Collections.singleton("keyword"), "[field_1.keyword_1] came first"),
+ FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL)
+ );
}
public void testDetect_GivenMultiFields_OverDocValueLimit() {
@@ -617,12 +707,19 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildRegressionConfig("field_2"), true, 0, fieldCapabilities, Collections.emptyMap());
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
+ Tuple> fieldExtraction = extractedFieldsDetector.detect();
- assertThat(extractedFields.getAllFields().size(), equalTo(2));
- List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+ assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2));
+ List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
.collect(Collectors.toList());
assertThat(extractedFieldNames, contains("field_1", "field_2"));
+
+ assertFieldSelectionContains(fieldExtraction.v2(),
+ FieldSelection.included("field_1", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL),
+ FieldSelection.excluded("field_1.keyword_1", Collections.singleton("keyword"),
+ "[field_1] is preferred because it supports fetching from source"),
+ FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL)
+ );
}
public void testDetect_GivenParentAndMultiFieldBothAggregatable() {
@@ -635,12 +732,20 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildRegressionConfig("field_2.double"), true, 100, fieldCapabilities, Collections.emptyMap());
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
+ Tuple> fieldExtraction = extractedFieldsDetector.detect();
- assertThat(extractedFields.getAllFields().size(), equalTo(2));
- List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+ assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2));
+ List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
.collect(Collectors.toList());
assertThat(extractedFieldNames, contains("field_1", "field_2.double"));
+
+ assertFieldSelectionContains(fieldExtraction.v2(),
+ FieldSelection.included("field_1", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL),
+ FieldSelection.excluded("field_1.keyword", Collections.singleton("keyword"),
+ "[field_1] is preferred because it is aggregatable"),
+ FieldSelection.included("field_2.double", Collections.singleton("double"), true, FieldSelection.FeatureType.NUMERICAL),
+ FieldSelection.excluded("field_2.keyword", Collections.singleton("float"), "[field_2.double] is required instead")
+ );
}
public void testDetect_GivenParentAndMultiFieldNoneAggregatable() {
@@ -652,12 +757,19 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildRegressionConfig("field_2"), true, 100, fieldCapabilities, Collections.emptyMap());
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
+ Tuple> fieldExtraction = extractedFieldsDetector.detect();
- assertThat(extractedFields.getAllFields().size(), equalTo(2));
- List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+ assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2));
+ List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
.collect(Collectors.toList());
assertThat(extractedFieldNames, contains("field_1", "field_2"));
+
+ assertFieldSelectionContains(fieldExtraction.v2(),
+ FieldSelection.included("field_1", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL),
+ FieldSelection.excluded("field_1.text", Collections.singleton("text"),
+ "[field_1] is preferred because none of the multi-fields are aggregatable"),
+ FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL)
+ );
}
public void testDetect_GivenMultiFields_AndExplicitlyIncludedFields() {
@@ -670,12 +782,18 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildRegressionConfig("field_2", analyzedFields), false, 100, fieldCapabilities, Collections.emptyMap());
- ExtractedFields extractedFields = extractedFieldsDetector.detect();
+ Tuple> fieldExtraction = extractedFieldsDetector.detect();
- assertThat(extractedFields.getAllFields().size(), equalTo(2));
- List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
+ assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2));
+ List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
.collect(Collectors.toList());
assertThat(extractedFieldNames, contains("field_1", "field_2"));
+
+ assertFieldSelectionContains(fieldExtraction.v2(),
+ FieldSelection.included("field_1", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL),
+ FieldSelection.excluded("field_1.keyword", Collections.singleton("keyword"), "field not in includes list"),
+ FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL)
+ );
}
private static DataFrameAnalyticsConfig buildOutlierDetectionConfig() {
@@ -715,6 +833,21 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
.build();
}
+ /**
+ * We assert each field individually to get useful error messages in case of failure
+ */
+ private static void assertFieldSelectionContains(List actual, FieldSelection... expected) {
+ assertThat(actual.size(), equalTo(expected.length));
+ for (int i = 0; i < expected.length; i++) {
+ assertThat("i = " + i, actual.get(i).getName(), equalTo(expected[i].getName()));
+ assertThat("i = " + i, actual.get(i).getMappingTypes(), equalTo(expected[i].getMappingTypes()));
+ assertThat("i = " + i, actual.get(i).isIncluded(), equalTo(expected[i].isIncluded()));
+ assertThat("i = " + i, actual.get(i).isRequired(), equalTo(expected[i].isRequired()));
+ assertThat("i = " + i, actual.get(i).getFeatureType(), equalTo(expected[i].getFeatureType()));
+ assertThat("i = " + i, actual.get(i).getReason(), equalTo(expected[i].getReason()));
+ }
+ }
+
private static class MockFieldCapsResponseBuilder {
private final Map> fieldCaps = new HashMap<>();
diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.estimate_memory_usage.json b/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.estimate_memory_usage.json
deleted file mode 100644
index 99bd6527de3..00000000000
--- a/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.estimate_memory_usage.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
- "ml.estimate_memory_usage": {
- "documentation": {
- "url": "http://www.elastic.co/guide/en/elasticsearch/reference/current/estimate-memory-usage-dfanalytics.html"
- },
- "stability": "experimental",
- "url": {
- "paths" : [
- {
- "path" : "/_ml/data_frame/analytics/_estimate_memory_usage",
- "methods": [ "POST" ],
- "parts": {}
- }
- ]
- },
- "body": {
- "description" : "Memory usage estimation definition",
- "required" : true
- }
- }
-}
diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.explain_data_frame_analytics.json b/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.explain_data_frame_analytics.json
new file mode 100644
index 00000000000..6969cf9a49f
--- /dev/null
+++ b/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.explain_data_frame_analytics.json
@@ -0,0 +1,31 @@
+{
+ "ml.explain_data_frame_analytics": {
+ "documentation": {
+ "url": "http://www.elastic.co/guide/en/elasticsearch/reference/current/explain-dfanalytics.html"
+ },
+ "stability": "experimental",
+ "url": {
+ "paths" : [
+ {
+ "path" : "/_ml/data_frame/analytics/_explain",
+ "methods": [ "GET", "POST" ],
+ "parts": {}
+ },
+ {
+ "path" : "/_ml/data_frame/analytics/{id}/_explain",
+ "methods": [ "GET", "POST" ],
+ "parts":{
+ "id":{
+ "type":"string",
+ "description":"The ID of the data frame analytics to explain"
+ }
+ }
+ }
+ ]
+ },
+ "body": {
+ "description" : "The data frame analytics config to explain",
+ "required" : false
+ }
+ }
+}
diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml
deleted file mode 100644
index 39fe8005fa8..00000000000
--- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml
+++ /dev/null
@@ -1,84 +0,0 @@
----
-setup:
- - do:
- indices.create:
- index: index-source
- body:
- mappings:
- properties:
- x:
- type: float
- y:
- type: float
-
----
-"Test memory usage estimation for empty data frame":
- - do:
- catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/
- ml.estimate_memory_usage:
- body:
- source: { index: "index-source" }
- analysis: { outlier_detection: {} }
-
- - do:
- index:
- index: index-source
- refresh: true
- body: { x: 1 }
- - match: { result: "created" }
-
- # Note that value for "y" is missing and outlier detection analysis does not support missing values.
- # Hence, the data frame is still considered empty.
- - do:
- catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/
- ml.estimate_memory_usage:
- body:
- source: { index: "index-source" }
- analysis: { outlier_detection: {} }
-
----
-"Test memory usage estimation for non-empty data frame":
- - do:
- index:
- index: index-source
- refresh: true
- body: { x: 1, y: 10 }
- - match: { result: "created" }
-
- - do:
- ml.estimate_memory_usage:
- body:
- source: { index: "index-source" }
- analysis: { outlier_detection: {} }
- - match: { expected_memory_without_disk: "3kb" }
- - match: { expected_memory_with_disk: "3kb" }
-
- - do:
- index:
- index: index-source
- refresh: true
- body: { x: 2, y: 20 }
- - match: { result: "created" }
-
- - do:
- ml.estimate_memory_usage:
- body:
- source: { index: "index-source" }
- analysis: { outlier_detection: {} }
- - match: { expected_memory_without_disk: "4kb" }
- - match: { expected_memory_with_disk: "4kb" }
-
- - do:
- index:
- index: index-source
- refresh: true
- body: { x: 3, y: 30 }
- - match: { result: "created" }
-
- - do:
- ml.estimate_memory_usage:
- body:
- source: { index: "index-source" }
- analysis: { outlier_detection: {} }
- - match: { expected_memory_without_disk: "6kb" }
- - match: { expected_memory_with_disk: "5kb" }
diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/explain_data_frame_analytics.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/explain_data_frame_analytics.yml
new file mode 100644
index 00000000000..f4296427256
--- /dev/null
+++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/explain_data_frame_analytics.yml
@@ -0,0 +1,308 @@
+---
+"Test neither job id nor body":
+ - do:
+ catch: /Please provide a job \[id\] or the config object/
+ ml.explain_data_frame_analytics:
+ id: ""
+
+---
+"Test both job id and body":
+ - do:
+ catch: /Please provide either a job \[id\] or the config object but not both/
+ ml.explain_data_frame_analytics:
+ id: "foo"
+ body:
+ source: { index: "index-source" }
+ analysis: { outlier_detection: {} }
+
+---
+"Test missing job":
+ - do:
+ catch: missing
+ ml.explain_data_frame_analytics:
+ id: "no_such_job"
+
+---
+"Test id that matches multiple jobs":
+
+ - do:
+ indices.create:
+ index: index-source
+
+ - do:
+ ml.put_data_frame_analytics:
+ id: "foo-1"
+ body: >
+ {
+ "source": {
+ "index": "index-source"
+ },
+ "dest": {
+ "index": "index-dest"
+ },
+ "analysis": {"outlier_detection":{}}
+ }
+
+ - do:
+ ml.put_data_frame_analytics:
+ id: "foo-2"
+ body: >
+ {
+ "source": {
+ "index": "index-source"
+ },
+ "dest": {
+ "index": "index-dest"
+ },
+ "analysis": {"outlier_detection":{}}
+ }
+
+ - do:
+ catch: /expected only one config but matched \[foo-1, foo-2\]/
+ ml.explain_data_frame_analytics:
+ id: "foo-*"
+
+---
+"Test empty data frame given body":
+
+ - do:
+ indices.create:
+ index: index-source
+ body:
+ mappings:
+ properties:
+ x:
+ type: float
+ y:
+ type: float
+
+ - do:
+ catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/
+ ml.explain_data_frame_analytics:
+ body:
+ source: { index: "index-source" }
+ analysis: { outlier_detection: {} }
+
+ - do:
+ index:
+ index: index-source
+ refresh: true
+ body: { x: 1 }
+ - match: { result: "created" }
+
+ # Note that value for "y" is missing and outlier detection analysis does not support missing values.
+ # Hence, the data frame is still considered empty.
+ - do:
+ catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/
+ ml.explain_data_frame_analytics:
+ body:
+ source: { index: "index-source" }
+ analysis: { outlier_detection: {} }
+
+---
+"Test non-empty data frame given body":
+
+ - do:
+ indices.create:
+ index: index-source
+ body:
+ mappings:
+ properties:
+ x:
+ type: float
+ y:
+ type: float
+
+ - do:
+ index:
+ index: index-source
+ refresh: true
+ body: { x: 1, y: 10 }
+ - match: { result: "created" }
+
+ - do:
+ ml.explain_data_frame_analytics:
+ body:
+ source: { index: "index-source" }
+ analysis: { outlier_detection: {} }
+ - match: { memory_estimation.expected_memory_without_disk: "3kb" }
+ - match: { memory_estimation.expected_memory_with_disk: "3kb" }
+ - length: { field_selection: 2 }
+ - match: { field_selection.0.name: "x" }
+ - match: { field_selection.0.mapping_types: ["float"] }
+ - match: { field_selection.0.is_included: true }
+ - match: { field_selection.0.is_required: false }
+ - match: { field_selection.0.feature_type: "numerical" }
+ - is_false: field_selection.0.reason
+ - match: { field_selection.1.name: "y" }
+ - match: { field_selection.1.mapping_types: ["float"] }
+ - match: { field_selection.1.is_included: true }
+ - match: { field_selection.1.is_required: false }
+ - match: { field_selection.1.feature_type: "numerical" }
+ - is_false: field_selection.1.reason
+
+ - do:
+ index:
+ index: index-source
+ refresh: true
+ body: { x: 2, y: 20 }
+ - match: { result: "created" }
+
+ - do:
+ ml.explain_data_frame_analytics:
+ body:
+ source: { index: "index-source" }
+ analysis: { outlier_detection: {} }
+ - match: { memory_estimation.expected_memory_without_disk: "4kb" }
+ - match: { memory_estimation.expected_memory_with_disk: "4kb" }
+
+ - do:
+ index:
+ index: index-source
+ refresh: true
+ body: { x: 3, y: 30 }
+ - match: { result: "created" }
+
+ - do:
+ ml.explain_data_frame_analytics:
+ body:
+ source: { index: "index-source" }
+ analysis: { outlier_detection: {} }
+ - match: { memory_estimation.expected_memory_without_disk: "6kb" }
+ - match: { memory_estimation.expected_memory_with_disk: "5kb" }
+
+---
+"Test field_selection given body":
+
+ - do:
+ indices.create:
+ index: index-source
+ body:
+ mappings:
+ properties:
+ field_1:
+ type: integer
+ field_2:
+ type: double
+ field_3:
+ type: date
+
+ - do:
+ index:
+ index: index-source
+ refresh: true
+ body: { field_1: 3, field_2: 3.14, field_3: "2019-11-11T00:00:00", field_4: "blah" }
+ - match: { result: "created" }
+
+ - do:
+ ml.explain_data_frame_analytics:
+ body:
+ source: { index: "index-source" }
+ analysis: { regression: { dependent_variable: "field_1" } }
+ - is_true: memory_estimation.expected_memory_without_disk
+ - is_true: memory_estimation.expected_memory_with_disk
+ - length: { field_selection: 5 }
+ - match: { field_selection.0.name: "field_1" }
+ - match: { field_selection.0.mapping_types: ["integer"] }
+ - match: { field_selection.0.is_included: true }
+ - match: { field_selection.0.is_required: true }
+ - match: { field_selection.0.feature_type: "numerical" }
+ - is_false: field_selection.0.reason
+ - match: { field_selection.1.name: "field_2" }
+ - match: { field_selection.1.mapping_types: ["double"] }
+ - match: { field_selection.1.is_included: true }
+ - match: { field_selection.1.is_required: false }
+ - match: { field_selection.1.feature_type: "numerical" }
+ - is_false: field_selection.1.reason
+ - match: { field_selection.2.name: "field_3" }
+ - match: { field_selection.2.mapping_types: ["date"] }
+ - match: { field_selection.2.is_included: false }
+ - match: { field_selection.2.is_required: false }
+ - is_false: field_selection.2.feature_type
+ - match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text]" }
+ - match: { field_selection.3.name: "field_4" }
+ - match: { field_selection.3.mapping_types: ["text"] }
+ - match: { field_selection.3.is_included: false }
+ - match: { field_selection.3.is_required: false }
+ - is_false: field_selection.3.feature_type
+ - match: { field_selection.3.reason: "[field_4.keyword] is preferred because it is aggregatable" }
+ - match: { field_selection.4.name: "field_4.keyword" }
+ - match: { field_selection.4.mapping_types: ["keyword"] }
+ - match: { field_selection.4.is_included: true }
+ - match: { field_selection.4.is_required: false }
+ - match: { field_selection.4.feature_type: "categorical" }
+ - is_false: field_selection.4.reason
+
+---
+"Test field_selection given job":
+
+ - do:
+ indices.create:
+ index: index-source
+ body:
+ mappings:
+ properties:
+ field_1:
+ type: integer
+ field_2:
+ type: double
+ field_3:
+ type: date
+
+ - do:
+ index:
+ index: index-source
+ refresh: true
+ body: { field_1: 3, field_2: 3.14, field_3: "2019-11-11T00:00:00", field_4: "blah" }
+ - match: { result: "created" }
+
+ - do:
+ ml.put_data_frame_analytics:
+ id: "got-a-job-for-this-one"
+ body: >
+ {
+ "source": {
+ "index": "index-source"
+ },
+ "dest": {
+ "index": "index-dest"
+ },
+ "analysis": {"regression":{ "dependent_variable": "field_1" }}
+ }
+
+ - do:
+ ml.explain_data_frame_analytics:
+ id: "got-a-job-for-this-one"
+ - is_true: memory_estimation.expected_memory_without_disk
+ - is_true: memory_estimation.expected_memory_with_disk
+ - length: { field_selection: 5 }
+ - match: { field_selection.0.name: "field_1" }
+ - match: { field_selection.0.mapping_types: ["integer"] }
+ - match: { field_selection.0.is_included: true }
+ - match: { field_selection.0.is_required: true }
+ - match: { field_selection.0.feature_type: "numerical" }
+ - is_false: field_selection.0.reason
+ - match: { field_selection.1.name: "field_2" }
+ - match: { field_selection.1.mapping_types: ["double"] }
+ - match: { field_selection.1.is_included: true }
+ - match: { field_selection.1.is_required: false }
+ - match: { field_selection.1.feature_type: "numerical" }
+ - is_false: field_selection.1.reason
+ - match: { field_selection.2.name: "field_3" }
+ - match: { field_selection.2.mapping_types: ["date"] }
+ - match: { field_selection.2.is_included: false }
+ - match: { field_selection.2.is_required: false }
+ - is_false: field_selection.2.feature_type
+ - match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text]" }
+ - match: { field_selection.3.name: "field_4" }
+ - match: { field_selection.3.mapping_types: ["text"] }
+ - match: { field_selection.3.is_included: false }
+ - match: { field_selection.3.is_required: false }
+ - is_false: field_selection.3.feature_type
+ - match: { field_selection.3.reason: "[field_4.keyword] is preferred because it is aggregatable" }
+ - match: { field_selection.4.name: "field_4.keyword" }
+ - match: { field_selection.4.mapping_types: ["keyword"] }
+ - match: { field_selection.4.is_included: true }
+ - match: { field_selection.4.is_required: false }
+ - match: { field_selection.4.feature_type: "categorical" }
+ - is_false: field_selection.4.reason