From 8eaee7cbdc6a38fc3e6460679d79ea28811287a4 Mon Sep 17 00:00:00 2001 From: Dimitris Athanasiou Date: Fri, 22 Nov 2019 22:06:10 +0200 Subject: [PATCH] [7.x][ML] Explain data frame analytics API (#49455) (#49504) This commit replaces the _estimate_memory_usage API with a new API, the _explain API. The API consolidates information that is useful before creating a data frame analytics job. It includes: - memory estimation - field selection explanation Memory estimation is moved here from what was previously calculated in the _estimate_memory_usage API. Field selection is a new feature that explains to the user whether each available field was selected to be included or not in the analysis. In the case it was not included, it also explains the reason why. Backport of #49455 --- .../client/MLRequestConverters.java | 18 +- .../client/MachineLearningClient.java | 37 ++- .../ml/ExplainDataFrameAnalyticsRequest.java | 72 ++++ .../ml/ExplainDataFrameAnalyticsResponse.java | 94 ++++++ .../ml/dataframe/explain/FieldSelection.java | 163 +++++++++ .../explain/MemoryEstimation.java} | 23 +- .../client/MLRequestConvertersTests.java | 28 +- .../client/MachineLearningIT.java | 41 ++- .../MlClientDocumentationIT.java | 63 ++-- ...ExplainDataFrameAnalyticsRequestTests.java | 44 +++ ...xplainDataFrameAnalyticsResponseTests.java | 54 +++ .../explain/FieldSelectionTests.java | 57 ++++ .../explain/MemoryEstimationTests.java} | 18 +- .../ml/estimate-memory-usage.asciidoc | 36 -- .../ml/explain-data-frame-analytics.asciidoc | 48 +++ .../high-level/supported-apis.asciidoc | 4 +- ...estimate-memory-usage-dfanalytics.asciidoc | 80 ----- .../apis/explain-dfanalytics.asciidoc | 159 +++++++++ .../ml/df-analytics/apis/index.asciidoc | 8 +- .../xpack/core/XPackClientPlugin.java | 12 +- .../ml/action/EstimateMemoryUsageAction.java | 119 ------- .../ExplainDataFrameAnalyticsAction.java | 101 ++++++ .../action/PutDataFrameAnalyticsAction.java | 9 +- .../dataframe/DataFrameAnalyticsConfig.java | 4 +- .../ml/dataframe/explain/FieldSelection.java | 184 +++++++++++ .../dataframe/explain/MemoryEstimation.java | 103 ++++++ ...stimateMemoryUsageActionResponseTests.java | 54 --- ...DataFrameAnalyticsActionResponseTests.java | 42 +++ .../DataFrameAnalyticsConfigTests.java | 12 +- .../explain/FieldSelectionTests.java | 45 +++ .../explain/MemoryEstimationTests.java | 61 ++++ .../ml/qa/ml-with-security/build.gradle | 5 +- .../xpack/ml/MachineLearning.java | 14 +- .../TransportEstimateMemoryUsageAction.java | 130 -------- ...nsportExplainDataFrameAnalyticsAction.java | 156 +++++++++ ...ransportStartDataFrameAnalyticsAction.java | 79 +++-- .../DataFrameDataExtractorFactory.java | 29 +- .../extractor/ExtractedFieldsDetector.java | 150 ++++++--- .../MemoryUsageEstimationProcessManager.java | 4 +- .../RestEstimateMemoryUsageAction.java | 38 --- .../RestExplainDataFrameAnalyticsAction.java | 84 +++++ .../ExtractedFieldsDetectorTests.java | 243 ++++++++++---- .../api/ml.estimate_memory_usage.json | 21 -- .../api/ml.explain_data_frame_analytics.json | 31 ++ ...rame_analytics_memory_usage_estimation.yml | 84 ----- .../test/ml/explain_data_frame_analytics.yml | 308 ++++++++++++++++++ 46 files changed, 2315 insertions(+), 854 deletions(-) create mode 100644 client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequest.java create mode 100644 client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponse.java create mode 100644 client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelection.java rename client/rest-high-level/src/main/java/org/elasticsearch/client/ml/{EstimateMemoryUsageResponse.java => dataframe/explain/MemoryEstimation.java} (81%) create mode 100644 client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequestTests.java create mode 100644 client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponseTests.java create mode 100644 client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelectionTests.java rename client/rest-high-level/src/test/java/org/elasticsearch/client/ml/{EstimateMemoryUsageResponseTests.java => dataframe/explain/MemoryEstimationTests.java} (68%) delete mode 100644 docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc create mode 100644 docs/java-rest/high-level/ml/explain-data-frame-analytics.asciidoc delete mode 100644 docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc create mode 100644 docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc delete mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageAction.java create mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsAction.java create mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/FieldSelection.java create mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimation.java delete mode 100644 x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageActionResponseTests.java create mode 100644 x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsActionResponseTests.java create mode 100644 x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/FieldSelectionTests.java create mode 100644 x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimationTests.java delete mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateMemoryUsageAction.java create mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportExplainDataFrameAnalyticsAction.java delete mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestEstimateMemoryUsageAction.java create mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestExplainDataFrameAnalyticsAction.java delete mode 100644 x-pack/plugin/src/test/resources/rest-api-spec/api/ml.estimate_memory_usage.json create mode 100644 x-pack/plugin/src/test/resources/rest-api-spec/api/ml.explain_data_frame_analytics.json delete mode 100644 x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml create mode 100644 x-pack/plugin/src/test/resources/rest-api-spec/test/ml/explain_data_frame_analytics.yml diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java index 2fc23acd134..0a1a18eeb44 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java @@ -29,6 +29,7 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.client.RequestConverters.EndpointBuilder; import org.elasticsearch.client.core.PageParams; import org.elasticsearch.client.ml.CloseJobRequest; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest; import org.elasticsearch.client.ml.DeleteCalendarEventRequest; import org.elasticsearch.client.ml.DeleteCalendarJobRequest; import org.elasticsearch.client.ml.DeleteCalendarRequest; @@ -701,12 +702,17 @@ final class MLRequestConverters { return request; } - static Request estimateMemoryUsage(PutDataFrameAnalyticsRequest estimateRequest) throws IOException { - String endpoint = new EndpointBuilder() - .addPathPartAsIs("_ml", "data_frame", "analytics", "_estimate_memory_usage") - .build(); - Request request = new Request(HttpPost.METHOD_NAME, endpoint); - request.setEntity(createEntity(estimateRequest, REQUEST_BODY_CONTENT_TYPE)); + static Request explainDataFrameAnalytics(ExplainDataFrameAnalyticsRequest explainRequest) throws IOException { + EndpointBuilder endpoint = new EndpointBuilder().addPathPartAsIs("_ml", "data_frame", "analytics"); + if (explainRequest.getId() != null) { + endpoint.addPathPart(explainRequest.getId()); + } + endpoint.addPathPartAsIs("_explain"); + + Request request = new Request(HttpPost.METHOD_NAME, endpoint.build()); + if (explainRequest.getConfig() != null) { + request.setEntity(createEntity(explainRequest.getConfig(), REQUEST_BODY_CONTENT_TYPE)); + } return request; } diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java index 2ddc8839f96..468cd535c01 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java @@ -22,6 +22,8 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.client.ml.CloseJobRequest; import org.elasticsearch.client.ml.CloseJobResponse; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsResponse; import org.elasticsearch.client.ml.DeleteCalendarEventRequest; import org.elasticsearch.client.ml.DeleteCalendarJobRequest; import org.elasticsearch.client.ml.DeleteCalendarRequest; @@ -34,7 +36,6 @@ import org.elasticsearch.client.ml.DeleteForecastRequest; import org.elasticsearch.client.ml.DeleteJobRequest; import org.elasticsearch.client.ml.DeleteJobResponse; import org.elasticsearch.client.ml.DeleteModelSnapshotRequest; -import org.elasticsearch.client.ml.EstimateMemoryUsageResponse; import org.elasticsearch.client.ml.EvaluateDataFrameRequest; import org.elasticsearch.client.ml.EvaluateDataFrameResponse; import org.elasticsearch.client.ml.FindFileStructureRequest; @@ -2249,46 +2250,46 @@ public final class MachineLearningClient { } /** - * Estimates memory usage for the given Data Frame Analytics + * Explains the given Data Frame Analytics *

* For additional info - * see - * Estimate Memory Usage for Data Frame Analytics documentation + * see + * Explain Data Frame Analytics documentation * - * @param request The {@link PutDataFrameAnalyticsRequest} + * @param request The {@link ExplainDataFrameAnalyticsRequest} * @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized - * @return {@link EstimateMemoryUsageResponse} response object + * @return {@link ExplainDataFrameAnalyticsResponse} response object * @throws IOException when there is a serialization issue sending the request or receiving the response */ - public EstimateMemoryUsageResponse estimateMemoryUsage(PutDataFrameAnalyticsRequest request, - RequestOptions options) throws IOException { + public ExplainDataFrameAnalyticsResponse explainDataFrameAnalytics(ExplainDataFrameAnalyticsRequest request, + RequestOptions options) throws IOException { return restHighLevelClient.performRequestAndParseEntity( request, - MLRequestConverters::estimateMemoryUsage, + MLRequestConverters::explainDataFrameAnalytics, options, - EstimateMemoryUsageResponse::fromXContent, + ExplainDataFrameAnalyticsResponse::fromXContent, Collections.emptySet()); } /** - * Estimates memory usage for the given Data Frame Analytics asynchronously and notifies listener upon completion + * Explains the given Data Frame Analytics asynchronously and notifies listener upon completion *

* For additional info - * see - * Estimate Memory Usage for Data Frame Analytics documentation + * see + * Explain Data Frame Analytics documentation * - * @param request The {@link PutDataFrameAnalyticsRequest} + * @param request The {@link ExplainDataFrameAnalyticsRequest} * @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized * @param listener Listener to be notified upon request completion * @return cancellable that may be used to cancel the request */ - public Cancellable estimateMemoryUsageAsync(PutDataFrameAnalyticsRequest request, RequestOptions options, - ActionListener listener) { + public Cancellable explainDataFrameAnalyticsAsync(ExplainDataFrameAnalyticsRequest request, RequestOptions options, + ActionListener listener) { return restHighLevelClient.performRequestAsyncAndParseEntity( request, - MLRequestConverters::estimateMemoryUsage, + MLRequestConverters::explainDataFrameAnalytics, options, - EstimateMemoryUsageResponse::fromXContent, + ExplainDataFrameAnalyticsResponse::fromXContent, listener, Collections.emptySet()); } diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequest.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequest.java new file mode 100644 index 00000000000..880e87b2eea --- /dev/null +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequest.java @@ -0,0 +1,72 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.client.ml; + +import org.elasticsearch.client.Validatable; +import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsConfig; +import org.elasticsearch.common.Nullable; + +import java.util.Objects; + +/** + * Request to explain the following about a data frame analytics job: + *

+ */ +public class ExplainDataFrameAnalyticsRequest implements Validatable { + + private final String id; + private final DataFrameAnalyticsConfig config; + + public ExplainDataFrameAnalyticsRequest(String id) { + this.id = Objects.requireNonNull(id); + this.config = null; + } + + public ExplainDataFrameAnalyticsRequest(DataFrameAnalyticsConfig config) { + this.id = null; + this.config = Objects.requireNonNull(config); + } + + @Nullable + public String getId() { + return id; + } + + @Nullable + public DataFrameAnalyticsConfig getConfig() { + return config; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + ExplainDataFrameAnalyticsRequest other = (ExplainDataFrameAnalyticsRequest) o; + return Objects.equals(id, other.id) && Objects.equals(config, other.config); + } + + @Override + public int hashCode() { + return Objects.hash(id, config); + } +} diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponse.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponse.java new file mode 100644 index 00000000000..5879ffc7154 --- /dev/null +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponse.java @@ -0,0 +1,94 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.client.ml; + +import org.elasticsearch.client.ml.dataframe.explain.FieldSelection; +import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +public class ExplainDataFrameAnalyticsResponse implements ToXContentObject { + + public static final ParseField TYPE = new ParseField("explain_data_frame_analytics_response"); + + public static final ParseField FIELD_SELECTION = new ParseField("field_selection"); + public static final ParseField MEMORY_ESTIMATION = new ParseField("memory_estimation"); + + public static ExplainDataFrameAnalyticsResponse fromXContent(XContentParser parser) throws IOException { + return PARSER.parse(parser, null); + } + + @SuppressWarnings("unchecked") + static final ConstructingObjectParser PARSER = + new ConstructingObjectParser<>( + TYPE.getPreferredName(), true, + args -> new ExplainDataFrameAnalyticsResponse((List) args[0], (MemoryEstimation) args[1])); + + static { + PARSER.declareObjectArray(ConstructingObjectParser.constructorArg(), FieldSelection.PARSER, FIELD_SELECTION); + PARSER.declareObject(ConstructingObjectParser.constructorArg(), MemoryEstimation.PARSER, MEMORY_ESTIMATION); + } + + private final List fieldSelection; + private final MemoryEstimation memoryEstimation; + + public ExplainDataFrameAnalyticsResponse(List fieldSelection, MemoryEstimation memoryEstimation) { + this.fieldSelection = Objects.requireNonNull(fieldSelection); + this.memoryEstimation = Objects.requireNonNull(memoryEstimation); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(FIELD_SELECTION.getPreferredName(), fieldSelection); + builder.field(MEMORY_ESTIMATION.getPreferredName(), memoryEstimation); + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object other) { + if (this == other) return true; + if (other == null || getClass() != other.getClass()) return false; + + ExplainDataFrameAnalyticsResponse that = (ExplainDataFrameAnalyticsResponse) other; + return Objects.equals(fieldSelection, that.fieldSelection) + && Objects.equals(memoryEstimation, that.memoryEstimation); + } + + @Override + public int hashCode() { + return Objects.hash(fieldSelection, memoryEstimation); + } + + public MemoryEstimation getMemoryEstimation() { + return memoryEstimation; + } + + public List getFieldSelection() { + return fieldSelection; + } +} diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelection.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelection.java new file mode 100644 index 00000000000..4483b6fa5e0 --- /dev/null +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelection.java @@ -0,0 +1,163 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.client.ml.dataframe.explain; + +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.ObjectParser; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Objects; +import java.util.Set; + +public class FieldSelection implements ToXContentObject { + + private static final ParseField NAME = new ParseField("name"); + private static final ParseField MAPPING_TYPES = new ParseField("mapping_types"); + private static final ParseField IS_INCLUDED = new ParseField("is_included"); + private static final ParseField IS_REQUIRED = new ParseField("is_required"); + private static final ParseField FEATURE_TYPE = new ParseField("feature_type"); + private static final ParseField REASON = new ParseField("reason"); + + public enum FeatureType { + CATEGORICAL, NUMERICAL; + + public static FeatureType fromString(String value) { + return FeatureType.valueOf(value.toUpperCase(Locale.ROOT)); + } + + @Override + public String toString() { + return name().toLowerCase(Locale.ROOT); + } + } + + @SuppressWarnings("unchecked") + public static ConstructingObjectParser PARSER = new ConstructingObjectParser<>("field_selection", true, + a -> new FieldSelection((String) a[0], new HashSet<>((List) a[1]), (boolean) a[2], (boolean) a[3], (FeatureType) a[4], + (String) a[5])); + + static { + PARSER.declareString(ConstructingObjectParser.constructorArg(), NAME); + PARSER.declareStringArray(ConstructingObjectParser.constructorArg(), MAPPING_TYPES); + PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_INCLUDED); + PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_REQUIRED); + PARSER.declareField(ConstructingObjectParser.optionalConstructorArg(), p -> { + if (p.currentToken() == XContentParser.Token.VALUE_STRING) { + return FeatureType.fromString(p.text()); + } + throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]"); + }, FEATURE_TYPE, ObjectParser.ValueType.STRING); + PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), REASON); + } + + private final String name; + private final Set mappingTypes; + private final boolean isIncluded; + private final boolean isRequired; + private final FeatureType featureType; + private final String reason; + + public static FieldSelection included(String name, Set mappingTypes, boolean isRequired, FeatureType featureType) { + return new FieldSelection(name, mappingTypes, true, isRequired, featureType, null); + } + + public static FieldSelection excluded(String name, Set mappingTypes, String reason) { + return new FieldSelection(name, mappingTypes, false, false, null, reason); + } + + FieldSelection(String name, Set mappingTypes, boolean isIncluded, boolean isRequired, @Nullable FeatureType featureType, + @Nullable String reason) { + this.name = Objects.requireNonNull(name); + this.mappingTypes = Collections.unmodifiableSet(mappingTypes); + this.isIncluded = isIncluded; + this.isRequired = isRequired; + this.featureType = featureType; + this.reason = reason; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(NAME.getPreferredName(), name); + builder.field(MAPPING_TYPES.getPreferredName(), mappingTypes); + builder.field(IS_INCLUDED.getPreferredName(), isIncluded); + builder.field(IS_REQUIRED.getPreferredName(), isRequired); + if (featureType != null) { + builder.field(FEATURE_TYPE.getPreferredName(), featureType); + } + if (reason != null) { + builder.field(REASON.getPreferredName(), reason); + } + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + FieldSelection that = (FieldSelection) o; + return Objects.equals(name, that.name) + && Objects.equals(mappingTypes, that.mappingTypes) + && isIncluded == that.isIncluded + && isRequired == that.isRequired + && Objects.equals(featureType, that.featureType) + && Objects.equals(reason, that.reason); + } + + @Override + public int hashCode() { + return Objects.hash(name, mappingTypes, isIncluded, isRequired, featureType, reason); + } + + public String getName() { + return name; + } + + public Set getMappingTypes() { + return mappingTypes; + } + + public boolean isIncluded() { + return isIncluded; + } + + public boolean isRequired() { + return isRequired; + } + + @Nullable + public FeatureType getFeatureType() { + return featureType; + } + + @Nullable + public String getReason() { + return reason; + } +} diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponse.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimation.java similarity index 81% rename from client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponse.java rename to client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimation.java index c97cc545cdb..9151b8ce5dd 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponse.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimation.java @@ -16,8 +16,7 @@ * specific language governing permissions and limitations * under the License. */ - -package org.elasticsearch.client.ml; +package org.elasticsearch.client.ml.dataframe.explain; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.ParseField; @@ -26,23 +25,19 @@ import org.elasticsearch.common.xcontent.ConstructingObjectParser; import org.elasticsearch.common.xcontent.ObjectParser; import org.elasticsearch.common.xcontent.ToXContentObject; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentParser; import java.io.IOException; import java.util.Objects; import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; -public class EstimateMemoryUsageResponse implements ToXContentObject { - +public class MemoryEstimation implements ToXContentObject { + public static final ParseField EXPECTED_MEMORY_WITHOUT_DISK = new ParseField("expected_memory_without_disk"); public static final ParseField EXPECTED_MEMORY_WITH_DISK = new ParseField("expected_memory_with_disk"); - static final ConstructingObjectParser PARSER = - new ConstructingObjectParser<>( - "estimate_memory_usage_response", - true, - args -> new EstimateMemoryUsageResponse((ByteSizeValue) args[0], (ByteSizeValue) args[1])); + public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>("memory_estimation", true, + a -> new MemoryEstimation((ByteSizeValue) a[0], (ByteSizeValue) a[1])); static { PARSER.declareField( @@ -57,14 +52,10 @@ public class EstimateMemoryUsageResponse implements ToXContentObject { ObjectParser.ValueType.VALUE); } - public static EstimateMemoryUsageResponse fromXContent(XContentParser parser) { - return PARSER.apply(parser, null); - } - private final ByteSizeValue expectedMemoryWithoutDisk; private final ByteSizeValue expectedMemoryWithDisk; - public EstimateMemoryUsageResponse(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) { + public MemoryEstimation(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) { this.expectedMemoryWithoutDisk = expectedMemoryWithoutDisk; this.expectedMemoryWithDisk = expectedMemoryWithDisk; } @@ -99,7 +90,7 @@ public class EstimateMemoryUsageResponse implements ToXContentObject { return false; } - EstimateMemoryUsageResponse that = (EstimateMemoryUsageResponse) other; + MemoryEstimation that = (MemoryEstimation) other; return Objects.equals(expectedMemoryWithoutDisk, that.expectedMemoryWithoutDisk) && Objects.equals(expectedMemoryWithDisk, that.expectedMemoryWithDisk); } diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java index db59054cdb8..633e5363ff1 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java @@ -25,6 +25,7 @@ import org.apache.http.client.methods.HttpPost; import org.apache.http.client.methods.HttpPut; import org.elasticsearch.client.core.PageParams; import org.elasticsearch.client.ml.CloseJobRequest; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest; import org.elasticsearch.client.ml.DeleteCalendarEventRequest; import org.elasticsearch.client.ml.DeleteCalendarJobRequest; import org.elasticsearch.client.ml.DeleteCalendarRequest; @@ -788,14 +789,25 @@ public class MLRequestConvertersTests extends ESTestCase { } } - public void testEstimateMemoryUsage() throws IOException { - PutDataFrameAnalyticsRequest estimateRequest = new PutDataFrameAnalyticsRequest(randomDataFrameAnalyticsConfig()); - Request request = MLRequestConverters.estimateMemoryUsage(estimateRequest); - assertEquals(HttpPost.METHOD_NAME, request.getMethod()); - assertEquals("/_ml/data_frame/analytics/_estimate_memory_usage", request.getEndpoint()); - try (XContentParser parser = createParser(JsonXContent.jsonXContent, request.getEntity().getContent())) { - DataFrameAnalyticsConfig parsedConfig = DataFrameAnalyticsConfig.fromXContent(parser); - assertThat(parsedConfig, equalTo(estimateRequest.getConfig())); + public void testExplainDataFrameAnalytics() throws IOException { + // Request with config + { + ExplainDataFrameAnalyticsRequest estimateRequest = new ExplainDataFrameAnalyticsRequest(randomDataFrameAnalyticsConfig()); + Request request = MLRequestConverters.explainDataFrameAnalytics(estimateRequest); + assertEquals(HttpPost.METHOD_NAME, request.getMethod()); + assertEquals("/_ml/data_frame/analytics/_explain", request.getEndpoint()); + try (XContentParser parser = createParser(JsonXContent.jsonXContent, request.getEntity().getContent())) { + DataFrameAnalyticsConfig parsedConfig = DataFrameAnalyticsConfig.fromXContent(parser); + assertThat(parsedConfig, equalTo(estimateRequest.getConfig())); + } + } + // Request with id + { + ExplainDataFrameAnalyticsRequest estimateRequest = new ExplainDataFrameAnalyticsRequest("foo"); + Request request = MLRequestConverters.explainDataFrameAnalytics(estimateRequest); + assertEquals(HttpPost.METHOD_NAME, request.getMethod()); + assertEquals("/_ml/data_frame/analytics/foo/_explain", request.getEndpoint()); + assertNull(request.getEntity()); } } diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java index 361b3674550..efb62b3f526 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java @@ -32,6 +32,8 @@ import org.elasticsearch.client.indices.CreateIndexRequest; import org.elasticsearch.client.indices.GetIndexRequest; import org.elasticsearch.client.ml.CloseJobRequest; import org.elasticsearch.client.ml.CloseJobResponse; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsResponse; import org.elasticsearch.client.ml.DeleteCalendarEventRequest; import org.elasticsearch.client.ml.DeleteCalendarJobRequest; import org.elasticsearch.client.ml.DeleteCalendarRequest; @@ -44,7 +46,6 @@ import org.elasticsearch.client.ml.DeleteForecastRequest; import org.elasticsearch.client.ml.DeleteJobRequest; import org.elasticsearch.client.ml.DeleteJobResponse; import org.elasticsearch.client.ml.DeleteModelSnapshotRequest; -import org.elasticsearch.client.ml.EstimateMemoryUsageResponse; import org.elasticsearch.client.ml.EvaluateDataFrameRequest; import org.elasticsearch.client.ml.EvaluateDataFrameResponse; import org.elasticsearch.client.ml.FindFileStructureRequest; @@ -140,6 +141,8 @@ import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.Binar import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric; import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetric; import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetric; +import org.elasticsearch.client.ml.dataframe.explain.FieldSelection; +import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation; import org.elasticsearch.client.ml.filestructurefinder.FileStructure; import org.elasticsearch.client.ml.inference.TrainedModelConfig; import org.elasticsearch.client.ml.inference.TrainedModelDefinition; @@ -1996,8 +1999,8 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase { highLevelClient().indices().create(new CreateIndexRequest(indexName).mapping(mapping), RequestOptions.DEFAULT); } - public void testEstimateMemoryUsage() throws IOException { - String indexName = "estimate-test-index"; + public void testExplainDataFrameAnalytics() throws IOException { + String indexName = "explain-df-test-index"; createIndex(indexName, mappingForSoftClassification()); BulkRequest bulk1 = new BulkRequest() .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); @@ -2007,8 +2010,8 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase { highLevelClient().bulk(bulk1, RequestOptions.DEFAULT); MachineLearningClient machineLearningClient = highLevelClient().machineLearning(); - PutDataFrameAnalyticsRequest estimateMemoryUsageRequest = - new PutDataFrameAnalyticsRequest( + ExplainDataFrameAnalyticsRequest explainRequest = + new ExplainDataFrameAnalyticsRequest( DataFrameAnalyticsConfig.builder() .setSource(DataFrameAnalyticsSource.builder().setIndex(indexName).build()) .setAnalysis(OutlierDetection.createDefault()) @@ -2019,11 +2022,16 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase { ByteSizeValue upperBound = new ByteSizeValue(1, ByteSizeUnit.GB); // Data Frame has 10 rows, expect that the returned estimates fall within (1kB, 1GB) range. - EstimateMemoryUsageResponse response1 = - execute( - estimateMemoryUsageRequest, machineLearningClient::estimateMemoryUsage, machineLearningClient::estimateMemoryUsageAsync); - assertThat(response1.getExpectedMemoryWithoutDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound))); - assertThat(response1.getExpectedMemoryWithDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound))); + ExplainDataFrameAnalyticsResponse response1 = execute(explainRequest, machineLearningClient::explainDataFrameAnalytics, + machineLearningClient::explainDataFrameAnalyticsAsync); + + MemoryEstimation memoryEstimation1 = response1.getMemoryEstimation(); + assertThat(memoryEstimation1.getExpectedMemoryWithoutDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound))); + assertThat(memoryEstimation1.getExpectedMemoryWithDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound))); + + List fieldSelection = response1.getFieldSelection(); + assertThat(fieldSelection.size(), equalTo(3)); + assertThat(fieldSelection.stream().map(FieldSelection::getName).collect(Collectors.toList()), contains("dataset", "label", "p")); BulkRequest bulk2 = new BulkRequest() .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); @@ -2033,15 +2041,16 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase { highLevelClient().bulk(bulk2, RequestOptions.DEFAULT); // Data Frame now has 100 rows, expect that the returned estimates will be greater than or equal to the previous ones. - EstimateMemoryUsageResponse response2 = + ExplainDataFrameAnalyticsResponse response2 = execute( - estimateMemoryUsageRequest, machineLearningClient::estimateMemoryUsage, machineLearningClient::estimateMemoryUsageAsync); + explainRequest, machineLearningClient::explainDataFrameAnalytics, machineLearningClient::explainDataFrameAnalyticsAsync); + MemoryEstimation memoryEstimation2 = response2.getMemoryEstimation(); assertThat( - response2.getExpectedMemoryWithoutDisk(), - allOf(greaterThanOrEqualTo(response1.getExpectedMemoryWithoutDisk()), lessThan(upperBound))); + memoryEstimation2.getExpectedMemoryWithoutDisk(), + allOf(greaterThanOrEqualTo(memoryEstimation1.getExpectedMemoryWithoutDisk()), lessThan(upperBound))); assertThat( - response2.getExpectedMemoryWithDisk(), - allOf(greaterThanOrEqualTo(response1.getExpectedMemoryWithDisk()), lessThan(upperBound))); + memoryEstimation2.getExpectedMemoryWithDisk(), + allOf(greaterThanOrEqualTo(memoryEstimation1.getExpectedMemoryWithDisk()), lessThan(upperBound))); } public void testGetTrainedModels() throws Exception { diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java index da12420535f..8a118672d95 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java @@ -36,6 +36,8 @@ import org.elasticsearch.client.core.PageParams; import org.elasticsearch.client.indices.CreateIndexRequest; import org.elasticsearch.client.ml.CloseJobRequest; import org.elasticsearch.client.ml.CloseJobResponse; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsResponse; import org.elasticsearch.client.ml.DeleteCalendarEventRequest; import org.elasticsearch.client.ml.DeleteCalendarJobRequest; import org.elasticsearch.client.ml.DeleteCalendarRequest; @@ -48,7 +50,6 @@ import org.elasticsearch.client.ml.DeleteForecastRequest; import org.elasticsearch.client.ml.DeleteJobRequest; import org.elasticsearch.client.ml.DeleteJobResponse; import org.elasticsearch.client.ml.DeleteModelSnapshotRequest; -import org.elasticsearch.client.ml.EstimateMemoryUsageResponse; import org.elasticsearch.client.ml.EvaluateDataFrameRequest; import org.elasticsearch.client.ml.EvaluateDataFrameResponse; import org.elasticsearch.client.ml.FindFileStructureRequest; @@ -155,6 +156,8 @@ import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.Confu import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric.ConfusionMatrix; import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetric; import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetric; +import org.elasticsearch.client.ml.dataframe.explain.FieldSelection; +import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation; import org.elasticsearch.client.ml.filestructurefinder.FileStructure; import org.elasticsearch.client.ml.inference.TrainedModelConfig; import org.elasticsearch.client.ml.inference.TrainedModelDefinition; @@ -213,6 +216,7 @@ import java.util.zip.GZIPOutputStream; import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.closeTo; +import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; @@ -3460,10 +3464,10 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase { } } - public void testEstimateMemoryUsage() throws Exception { - createIndex("estimate-test-source-index"); + public void testExplainDataFrameAnalytics() throws Exception { + createIndex("explain-df-test-source-index"); BulkRequest bulkRequest = - new BulkRequest("estimate-test-source-index") + new BulkRequest("explain-df-test-source-index") .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); for (int i = 0; i < 10; ++i) { bulkRequest.add(new IndexRequest().source(XContentType.JSON, "timestamp", 123456789L, "total", 10L)); @@ -3471,22 +3475,33 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase { RestHighLevelClient client = highLevelClient(); client.bulk(bulkRequest, RequestOptions.DEFAULT); { - // tag::estimate-memory-usage-request + // tag::explain-data-frame-analytics-id-request + ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest("existing_job_id"); // <1> + // end::explain-data-frame-analytics-id-request + + // tag::explain-data-frame-analytics-config-request DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder() - .setSource(DataFrameAnalyticsSource.builder().setIndex("estimate-test-source-index").build()) + .setSource(DataFrameAnalyticsSource.builder().setIndex("explain-df-test-source-index").build()) .setAnalysis(OutlierDetection.createDefault()) .build(); - PutDataFrameAnalyticsRequest request = new PutDataFrameAnalyticsRequest(config); // <1> - // end::estimate-memory-usage-request + request = new ExplainDataFrameAnalyticsRequest(config); // <1> + // end::explain-data-frame-analytics-config-request - // tag::estimate-memory-usage-execute - EstimateMemoryUsageResponse response = client.machineLearning().estimateMemoryUsage(request, RequestOptions.DEFAULT); - // end::estimate-memory-usage-execute + // tag::explain-data-frame-analytics-execute + ExplainDataFrameAnalyticsResponse response = client.machineLearning().explainDataFrameAnalytics(request, + RequestOptions.DEFAULT); + // end::explain-data-frame-analytics-execute - // tag::estimate-memory-usage-response - ByteSizeValue expectedMemoryWithoutDisk = response.getExpectedMemoryWithoutDisk(); // <1> - ByteSizeValue expectedMemoryWithDisk = response.getExpectedMemoryWithDisk(); // <2> - // end::estimate-memory-usage-response + // tag::explain-data-frame-analytics-response + List fieldSelection = response.getFieldSelection(); // <1> + MemoryEstimation memoryEstimation = response.getMemoryEstimation(); // <2> + // end::explain-data-frame-analytics-response + + assertThat(fieldSelection.size(), equalTo(2)); + assertThat(fieldSelection.stream().map(FieldSelection::getName).collect(Collectors.toList()), contains("timestamp", "total")); + + ByteSizeValue expectedMemoryWithoutDisk = memoryEstimation.getExpectedMemoryWithoutDisk(); // <1> + ByteSizeValue expectedMemoryWithDisk = memoryEstimation.getExpectedMemoryWithDisk(); // <2> // We are pretty liberal here as this test does not aim at verifying concrete numbers but rather end-to-end user workflow. ByteSizeValue lowerBound = new ByteSizeValue(1, ByteSizeUnit.KB); @@ -3496,14 +3511,14 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase { } { DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder() - .setSource(DataFrameAnalyticsSource.builder().setIndex("estimate-test-source-index").build()) + .setSource(DataFrameAnalyticsSource.builder().setIndex("explain-df-test-source-index").build()) .setAnalysis(OutlierDetection.createDefault()) .build(); - PutDataFrameAnalyticsRequest request = new PutDataFrameAnalyticsRequest(config); - // tag::estimate-memory-usage-execute-listener - ActionListener listener = new ActionListener() { + ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest(config); + // tag::explain-data-frame-analytics-execute-listener + ActionListener listener = new ActionListener() { @Override - public void onResponse(EstimateMemoryUsageResponse response) { + public void onResponse(ExplainDataFrameAnalyticsResponse response) { // <1> } @@ -3512,15 +3527,15 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase { // <2> } }; - // end::estimate-memory-usage-execute-listener + // end::explain-data-frame-analytics-execute-listener // Replace the empty listener by a blocking listener in test final CountDownLatch latch = new CountDownLatch(1); listener = new LatchedActionListener<>(listener, latch); - // tag::estimate-memory-usage-execute-async - client.machineLearning().estimateMemoryUsageAsync(request, RequestOptions.DEFAULT, listener); // <1> - // end::estimate-memory-usage-execute-async + // tag::explain-data-frame-analytics-execute-async + client.machineLearning().explainDataFrameAnalyticsAsync(request, RequestOptions.DEFAULT, listener); // <1> + // end::explain-data-frame-analytics-execute-async assertTrue(latch.await(30L, TimeUnit.SECONDS)); } diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequestTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequestTests.java new file mode 100644 index 00000000000..7273a40e298 --- /dev/null +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequestTests.java @@ -0,0 +1,44 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.client.ml; + +import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsConfig; +import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsConfigTests; +import org.elasticsearch.test.ESTestCase; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.nullValue; + +public class ExplainDataFrameAnalyticsRequestTests extends ESTestCase { + + public void testIdConstructor() { + ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest("foo"); + assertThat(request.getId(), equalTo("foo")); + assertThat(request.getConfig(), is(nullValue())); + } + + public void testConfigConstructor() { + DataFrameAnalyticsConfig config = DataFrameAnalyticsConfigTests.randomDataFrameAnalyticsConfig(); + + ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest(config); + assertThat(request.getId(), is(nullValue())); + assertThat(request.getConfig(), equalTo(config)); + } +} diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponseTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponseTests.java new file mode 100644 index 00000000000..f4adbd09ba7 --- /dev/null +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponseTests.java @@ -0,0 +1,54 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.client.ml; + +import org.elasticsearch.client.ml.dataframe.explain.FieldSelection; +import org.elasticsearch.client.ml.dataframe.explain.FieldSelectionTests; +import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation; +import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimationTests; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.test.AbstractXContentTestCase; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.IntStream; + +public class ExplainDataFrameAnalyticsResponseTests extends AbstractXContentTestCase { + + @Override + protected ExplainDataFrameAnalyticsResponse createTestInstance() { + int fieldSelectionCount = randomIntBetween(1, 5); + List fieldSelection = new ArrayList<>(fieldSelectionCount); + IntStream.of(fieldSelectionCount).forEach(i -> fieldSelection.add(FieldSelectionTests.createRandom())); + MemoryEstimation memoryEstimation = MemoryEstimationTests.createRandom(); + + return new ExplainDataFrameAnalyticsResponse(fieldSelection, memoryEstimation); + } + + @Override + protected ExplainDataFrameAnalyticsResponse doParseInstance(XContentParser parser) throws IOException { + return ExplainDataFrameAnalyticsResponse.fromXContent(parser); + } + + @Override + protected boolean supportsUnknownFields() { + return true; + } +} diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelectionTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelectionTests.java new file mode 100644 index 00000000000..e76f39b5b85 --- /dev/null +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelectionTests.java @@ -0,0 +1,57 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.client.ml.dataframe.explain; + +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.test.AbstractXContentTestCase; + +import java.io.IOException; +import java.util.Set; +import java.util.stream.Collectors; + +public class FieldSelectionTests extends AbstractXContentTestCase { + + public static FieldSelection createRandom() { + Set mappingTypes = randomSubsetOf(randomIntBetween(1, 3), "int", "float", "double", "text", "keyword", "ip") + .stream().collect(Collectors.toSet()); + FieldSelection.FeatureType featureType = randomBoolean() ? null : randomFrom(FieldSelection.FeatureType.values()); + String reason = randomBoolean() ? null : randomAlphaOfLength(20); + return new FieldSelection(randomAlphaOfLength(10), + mappingTypes, + randomBoolean(), + randomBoolean(), + featureType, + reason); + } + + @Override + protected FieldSelection createTestInstance() { + return createRandom(); + } + + @Override + protected FieldSelection doParseInstance(XContentParser parser) throws IOException { + return FieldSelection.PARSER.apply(parser, null); + } + + @Override + protected boolean supportsUnknownFields() { + return true; + } +} diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponseTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimationTests.java similarity index 68% rename from client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponseTests.java rename to client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimationTests.java index f8f2746204d..884736e573e 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponseTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimationTests.java @@ -7,7 +7,7 @@ * not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.client.ml; +package org.elasticsearch.client.ml.dataframe.explain; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.xcontent.XContentParser; @@ -24,22 +24,22 @@ import org.elasticsearch.test.AbstractXContentTestCase; import java.io.IOException; -public class EstimateMemoryUsageResponseTests extends AbstractXContentTestCase { +public class MemoryEstimationTests extends AbstractXContentTestCase { - public static EstimateMemoryUsageResponse randomResponse() { - return new EstimateMemoryUsageResponse( + public static MemoryEstimation createRandom() { + return new MemoryEstimation( randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null, randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null); } @Override - protected EstimateMemoryUsageResponse createTestInstance() { - return randomResponse(); + protected MemoryEstimation createTestInstance() { + return createRandom(); } @Override - protected EstimateMemoryUsageResponse doParseInstance(XContentParser parser) throws IOException { - return EstimateMemoryUsageResponse.fromXContent(parser); + protected MemoryEstimation doParseInstance(XContentParser parser) throws IOException { + return MemoryEstimation.PARSER.apply(parser, null); } @Override diff --git a/docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc b/docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc deleted file mode 100644 index 8b7ae0f55c8..00000000000 --- a/docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc +++ /dev/null @@ -1,36 +0,0 @@ --- -:api: estimate-memory-usage -:request: PutDataFrameAnalyticsRequest -:response: EstimateMemoryUsageResponse --- -[role="xpack"] -[id="{upid}-{api}"] -=== Estimate memory usage API - -Estimates memory usage of {dfanalytics}. -Estimation results can be used when deciding the appropriate value for `model_memory_limit` setting later on. - -The API accepts an +{request}+ object and returns an +{response}+. - -[id="{upid}-{api}-request"] -==== Estimate memory usage request - -["source","java",subs="attributes,callouts,macros"] --------------------------------------------------- -include-tagged::{doc-tests-file}[{api}-request] --------------------------------------------------- -<1> Constructing a new request containing a {dataframe-analytics-config} for which memory usage estimation should be performed - -include::../execution.asciidoc[] - -[id="{upid}-{api}-response"] -==== Response - -The returned +{response}+ contains the memory usage estimates. - -["source","java",subs="attributes,callouts,macros"] --------------------------------------------------- -include-tagged::{doc-tests-file}[{api}-response] --------------------------------------------------- -<1> Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory (i.e. without overflowing to disk). -<2> Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}. \ No newline at end of file diff --git a/docs/java-rest/high-level/ml/explain-data-frame-analytics.asciidoc b/docs/java-rest/high-level/ml/explain-data-frame-analytics.asciidoc new file mode 100644 index 00000000000..3c41531d222 --- /dev/null +++ b/docs/java-rest/high-level/ml/explain-data-frame-analytics.asciidoc @@ -0,0 +1,48 @@ +-- +:api: explain-data-frame-analytics +:request: ExplainDataFrameAnalyticsRequest +:response: ExplainDataFrameAnalyticsResponse +-- +[role="xpack"] +[id="{upid}-{api}"] +=== Explain {dfanalytics}} API + +Explains the following about a {dataframe-analytics-config}: + +* field selection: which fields are included or not in the analysis +* memory estimation: how much memory is estimated to be required. The estimate can be used when deciding the appropriate value for `model_memory_limit` setting later on. + +The API accepts an +{request}+ object and returns an +{response}+. + +[id="{upid}-{api}-request"] +==== Explain {dfanalytics} request + +The request can be constructed with the id of an existing {dfanalytics-job}. + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-id-request] +-------------------------------------------------- +<1> Constructing a new request with the id of an existing {dfanalytics-job} + +It can also be constructed with a {dataframe-analytics-config} to explain it before creating it. + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-config-request] +-------------------------------------------------- +<1> Constructing a new request containing a {dataframe-analytics-config} + +include::../execution.asciidoc[] + +[id="{upid}-{api}-response"] +==== Response + +The returned +{response}+ contains the field selection and the memory usage estimation. + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-response] +-------------------------------------------------- +<1> A list where each item explains whether a field was selected for analysis or not +<2> The memory estimation for the {dfanalytics-job} diff --git a/docs/java-rest/high-level/supported-apis.asciidoc b/docs/java-rest/high-level/supported-apis.asciidoc index 770866a0755..d691a3ac34b 100644 --- a/docs/java-rest/high-level/supported-apis.asciidoc +++ b/docs/java-rest/high-level/supported-apis.asciidoc @@ -300,7 +300,7 @@ The Java High Level REST Client supports the following Machine Learning APIs: * <<{upid}-start-data-frame-analytics>> * <<{upid}-stop-data-frame-analytics>> * <<{upid}-evaluate-data-frame>> -* <<{upid}-estimate-memory-usage>> +* <<{upid}-explain-data-frame-analytics>> * <<{upid}-get-trained-models>> * <<{upid}-put-filter>> * <<{upid}-get-filters>> @@ -353,7 +353,7 @@ include::ml/delete-data-frame-analytics.asciidoc[] include::ml/start-data-frame-analytics.asciidoc[] include::ml/stop-data-frame-analytics.asciidoc[] include::ml/evaluate-data-frame.asciidoc[] -include::ml/estimate-memory-usage.asciidoc[] +include::ml/explain-data-frame-analytics.asciidoc[] include::ml/get-trained-models.asciidoc[] include::ml/put-filter.asciidoc[] include::ml/get-filters.asciidoc[] diff --git a/docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc b/docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc deleted file mode 100644 index 64db472dfd1..00000000000 --- a/docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc +++ /dev/null @@ -1,80 +0,0 @@ -[role="xpack"] -[testenv="platinum"] -[[estimate-memory-usage-dfanalytics]] -=== Estimate memory usage API - -[subs="attributes"] -++++ -Estimate memory usage for {dfanalytics-jobs} -++++ - -Estimates memory usage for the given {dataframe-analytics-config}. - -experimental[] - -[[ml-estimate-memory-usage-dfanalytics-request]] -==== {api-request-title} - -`POST _ml/data_frame/analytics/_estimate_memory_usage` - -[[ml-estimate-memory-usage-dfanalytics-prereq]] -==== {api-prereq-title} - -* You must have `monitor_ml` privilege to use this API. For more -information, see <> and <>. - -[[ml-estimate-memory-usage-dfanalytics-desc]] -==== {api-description-title} - -This API estimates memory usage for the given {dataframe-analytics-config} before the {dfanalytics-job} is even created. - -Serves as an advice on how to set `model_memory_limit` when creating {dfanalytics-job}. - -[[ml-estimate-memory-usage-dfanalytics-request-body]] -==== {api-request-body-title} - -`data_frame_analytics_config`:: - (Required, object) Intended configuration of {dfanalytics-job}. For more information, see - <>. - Note that `id` and `dest` don't need to be provided in the context of this API. - -[[ml-estimate-memory-usage-dfanalytics-results]] -==== {api-response-body-title} - -`expected_memory_without_disk`:: - (string) Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory - (i.e. without overflowing to disk). - -`expected_memory_with_disk`:: - (string) Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}. - `expected_memory_with_disk` is usually smaller than `expected_memory_without_disk` as using disk allows to - limit the main memory needed to perform {dfanalytics}. - -[[ml-estimate-memory-usage-dfanalytics-example]] -==== {api-examples-title} - -[source,console] --------------------------------------------------- -POST _ml/data_frame/analytics/_estimate_memory_usage -{ - "data_frame_analytics_config": { - "source": { - "index": "logdata" - }, - "analysis": { - "outlier_detection": {} - } - } -} --------------------------------------------------- -// TEST[skip:TBD] - -The API returns the following results: - -[source,console-result] ----- -{ - "expected_memory_without_disk": "128MB", - "expected_memory_with_disk": "32MB" -} ----- diff --git a/docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc b/docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc new file mode 100644 index 00000000000..c9ee565e9b2 --- /dev/null +++ b/docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc @@ -0,0 +1,159 @@ +[role="xpack"] +[testenv="platinum"] +[[explain-dfanalytics]] +=== Explain {dfanalytics} API + +[subs="attributes"] +++++ +Explain {dfanalytics} API +++++ + +Explains a {dataframe-analytics-config}. + +experimental[] + +[[ml-explain-dfanalytics-request]] +==== {api-request-title} + +`GET _ml/data_frame/analytics/_explain` + + +`POST _ml/data_frame/analytics/_explain` + + +`GET _ml/data_frame/analytics//_explain` + + +`POST _ml/data_frame/analytics//_explain` + +[[ml-explain-dfanalytics-prereq]] +==== {api-prereq-title} + +* You must have `monitor_ml` privilege to use this API. For more +information, see <> and <>. + +[[ml-explain-dfanalytics-desc]] +==== {api-description-title} + +This API provides explanations for a {dataframe-analytics-config} that either exists already or one that has not been created yet. +The following explanations are provided: + +* which fields are included or not in the analysis and why +* how much memory is estimated to be required. The estimate can be used when deciding the appropriate value for `model_memory_limit` setting later on. +about either an existing {dfanalytics-job} or one that has not been created yet. + +[[ml-explain-dfanalytics-path-params]] +==== {api-path-parms-title} + +``:: + (Optional, string) A numerical character string that uniquely identifies the existing + {dfanalytics-job} to explain. This identifier can contain lowercase alphanumeric + characters (a-z and 0-9), hyphens, and underscores. It must start and end with + alphanumeric characters. + +[[ml-explain-dfanalytics-request-body]] +==== {api-request-body-title} + +`data_frame_analytics_config`:: + (Optional, object) Intended configuration of {dfanalytics-job}. For more information, see + <>. + Note that `id` and `dest` don't need to be provided in the context of this API. + +[[ml-explain-dfanalytics-results]] +==== {api-response-body-title} + +The API returns a response that contains the following: + +`field_selection`:: + (array) An array of objects that explain selection for each field, sorted by the field names. + Each object in the array has the following properties: + + `name`::: + (string) The field name. + + `mapping_types`::: + (string) The mapping types of the field. + + `is_included`::: + (boolean) Whether the field is selected to be included in the analysis. + + `is_required`::: + (boolean) Whether the field is required. + + `feature_type`::: + (string) The feature type of this field for the analysis. May be `categorical` or `numerical`. + + `reason`::: + (string) The reason a field is not selected to be included in the analysis. + +`memory_estimation`:: + (object) An object containing the memory estimates. The object has the following properties: + + `expected_memory_without_disk`::: + (string) Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory + (i.e. without overflowing to disk). + + `expected_memory_with_disk`::: + (string) Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}. + `expected_memory_with_disk` is usually smaller than `expected_memory_without_disk` as using disk allows to + limit the main memory needed to perform {dfanalytics}. + +[[ml-explain-dfanalytics-example]] +==== {api-examples-title} + +[source,console] +-------------------------------------------------- +POST _ml/data_frame/analytics/_explain +{ + "data_frame_analytics_config": { + "source": { + "index": "houses_sold_last_10_yrs" + }, + "analysis": { + "regression": { + "dependent_variable": "price" + } + } + } +} +-------------------------------------------------- +// TEST[skip:TBD] + +The API returns the following results: + +[source,console-result] +---- +{ + "field_selection": [ + { + "field": "number_of_bedrooms", + "mappings_types": ["integer"], + "is_included": true, + "is_required": false, + "feature_type": "numerical" + }, + { + "field": "postcode", + "mappings_types": ["text"], + "is_included": false, + "is_required": false, + "reason": "[postcode.keyword] is preferred because it is aggregatable" + }, + { + "field": "postcode.keyword", + "mappings_types": ["keyword"], + "is_included": true, + "is_required": false, + "feature_type": "categorical" + }, + { + "field": "price", + "mappings_types": ["float"], + "is_included": true, + "is_required": true, + "feature_type": "numerical" + } + ], + "memory_estimation": { + "expected_memory_without_disk": "128MB", + "expected_memory_with_disk": "32MB" + } +} +---- diff --git a/docs/reference/ml/df-analytics/apis/index.asciidoc b/docs/reference/ml/df-analytics/apis/index.asciidoc index 30e909f3ffa..6bf63e7ddb8 100644 --- a/docs/reference/ml/df-analytics/apis/index.asciidoc +++ b/docs/reference/ml/df-analytics/apis/index.asciidoc @@ -5,16 +5,16 @@ You can use the following APIs to perform {ml} {dfanalytics} activities. -* <> +* <> * <> * <> * <> * <> * <> * <> -* <> +* <> -See also <>. +See also <>. //CREATE include::put-dfanalytics.asciidoc[] @@ -23,7 +23,7 @@ include::delete-dfanalytics.asciidoc[] //EVALUATE include::evaluate-dfanalytics.asciidoc[] //ESTIMATE_MEMORY_USAGE -include::estimate-memory-usage-dfanalytics.asciidoc[] +include::explain-dfanalytics.asciidoc[] //GET include::get-dfanalytics.asciidoc[] include::get-dfanalytics-stats.asciidoc[] diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java index 8caac9d6e20..d99dd1ec233 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java @@ -79,6 +79,7 @@ import org.elasticsearch.xpack.core.ml.MachineLearningFeatureSetUsage; import org.elasticsearch.xpack.core.ml.MlMetadata; import org.elasticsearch.xpack.core.ml.MlTasks; import org.elasticsearch.xpack.core.ml.action.CloseJobAction; +import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction; import org.elasticsearch.xpack.core.ml.action.DeleteCalendarAction; import org.elasticsearch.xpack.core.ml.action.DeleteCalendarEventAction; import org.elasticsearch.xpack.core.ml.action.DeleteDataFrameAnalyticsAction; @@ -89,7 +90,6 @@ import org.elasticsearch.xpack.core.ml.action.DeleteForecastAction; import org.elasticsearch.xpack.core.ml.action.DeleteJobAction; import org.elasticsearch.xpack.core.ml.action.DeleteModelSnapshotAction; import org.elasticsearch.xpack.core.ml.action.DeleteTrainedModelAction; -import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction; import org.elasticsearch.xpack.core.ml.action.EvaluateDataFrameAction; import org.elasticsearch.xpack.core.ml.action.FinalizeJobExecutionAction; import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction; @@ -158,6 +158,10 @@ import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.P import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.Recall; import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.ScoreByThresholdResult; import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.SoftClassificationMetric; +import org.elasticsearch.xpack.core.ml.inference.preprocessing.FrequencyEncoding; +import org.elasticsearch.xpack.core.ml.inference.preprocessing.OneHotEncoding; +import org.elasticsearch.xpack.core.ml.inference.preprocessing.PreProcessor; +import org.elasticsearch.xpack.core.ml.inference.preprocessing.TargetMeanEncoding; import org.elasticsearch.xpack.core.ml.inference.results.ClassificationInferenceResults; import org.elasticsearch.xpack.core.ml.inference.results.InferenceResults; import org.elasticsearch.xpack.core.ml.inference.results.RegressionInferenceResults; @@ -171,10 +175,6 @@ import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ensemble.OutputAgg import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ensemble.WeightedMode; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ensemble.WeightedSum; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.tree.Tree; -import org.elasticsearch.xpack.core.ml.inference.preprocessing.FrequencyEncoding; -import org.elasticsearch.xpack.core.ml.inference.preprocessing.OneHotEncoding; -import org.elasticsearch.xpack.core.ml.inference.preprocessing.PreProcessor; -import org.elasticsearch.xpack.core.ml.inference.preprocessing.TargetMeanEncoding; import org.elasticsearch.xpack.core.ml.job.config.JobTaskState; import org.elasticsearch.xpack.core.monitoring.MonitoringFeatureSetUsage; import org.elasticsearch.xpack.core.rollup.RollupFeatureSetUsage; @@ -381,7 +381,7 @@ public class XPackClientPlugin extends Plugin implements ActionPlugin, NetworkPl StartDataFrameAnalyticsAction.INSTANCE, StopDataFrameAnalyticsAction.INSTANCE, EvaluateDataFrameAction.INSTANCE, - EstimateMemoryUsageAction.INSTANCE, + ExplainDataFrameAnalyticsAction.INSTANCE, InternalInferModelAction.INSTANCE, GetTrainedModelsAction.INSTANCE, DeleteTrainedModelAction.INSTANCE, diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageAction.java deleted file mode 100644 index 529db21cced..00000000000 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageAction.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -package org.elasticsearch.xpack.core.ml.action; - -import org.elasticsearch.action.ActionResponse; -import org.elasticsearch.action.ActionType; -import org.elasticsearch.common.Nullable; -import org.elasticsearch.common.ParseField; -import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.unit.ByteSizeValue; -import org.elasticsearch.common.xcontent.ConstructingObjectParser; -import org.elasticsearch.common.xcontent.ObjectParser; -import org.elasticsearch.common.xcontent.ToXContentObject; -import org.elasticsearch.common.xcontent.XContentBuilder; - -import java.io.IOException; -import java.util.Objects; - -import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; - -public class EstimateMemoryUsageAction extends ActionType { - - public static final EstimateMemoryUsageAction INSTANCE = new EstimateMemoryUsageAction(); - public static final String NAME = "cluster:admin/xpack/ml/data_frame/analytics/estimate_memory_usage"; - - private EstimateMemoryUsageAction() { - super(NAME, EstimateMemoryUsageAction.Response::new); - } - - public static class Response extends ActionResponse implements ToXContentObject { - - public static final ParseField TYPE = new ParseField("memory_usage_estimation_result"); - - public static final ParseField EXPECTED_MEMORY_WITHOUT_DISK = new ParseField("expected_memory_without_disk"); - public static final ParseField EXPECTED_MEMORY_WITH_DISK = new ParseField("expected_memory_with_disk"); - - static final ConstructingObjectParser PARSER = - new ConstructingObjectParser<>( - TYPE.getPreferredName(), - args -> new Response((ByteSizeValue) args[0], (ByteSizeValue) args[1])); - - static { - PARSER.declareField( - optionalConstructorArg(), - (p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName()), - EXPECTED_MEMORY_WITHOUT_DISK, - ObjectParser.ValueType.VALUE); - PARSER.declareField( - optionalConstructorArg(), - (p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITH_DISK.getPreferredName()), - EXPECTED_MEMORY_WITH_DISK, - ObjectParser.ValueType.VALUE); - } - - private final ByteSizeValue expectedMemoryWithoutDisk; - private final ByteSizeValue expectedMemoryWithDisk; - - public Response(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) { - this.expectedMemoryWithoutDisk = expectedMemoryWithoutDisk; - this.expectedMemoryWithDisk = expectedMemoryWithDisk; - } - - public Response(StreamInput in) throws IOException { - super(in); - this.expectedMemoryWithoutDisk = in.readOptionalWriteable(ByteSizeValue::new); - this.expectedMemoryWithDisk = in.readOptionalWriteable(ByteSizeValue::new); - } - - public ByteSizeValue getExpectedMemoryWithoutDisk() { - return expectedMemoryWithoutDisk; - } - - public ByteSizeValue getExpectedMemoryWithDisk() { - return expectedMemoryWithDisk; - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeOptionalWriteable(expectedMemoryWithoutDisk); - out.writeOptionalWriteable(expectedMemoryWithDisk); - } - - @Override - public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(); - if (expectedMemoryWithoutDisk != null) { - builder.field(EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName(), expectedMemoryWithoutDisk.getStringRep()); - } - if (expectedMemoryWithDisk != null) { - builder.field(EXPECTED_MEMORY_WITH_DISK.getPreferredName(), expectedMemoryWithDisk.getStringRep()); - } - builder.endObject(); - return builder; - } - - @Override - public boolean equals(Object other) { - if (this == other) { - return true; - } - if (other == null || getClass() != other.getClass()) { - return false; - } - - Response that = (Response) other; - return Objects.equals(expectedMemoryWithoutDisk, that.expectedMemoryWithoutDisk) - && Objects.equals(expectedMemoryWithDisk, that.expectedMemoryWithDisk); - } - - @Override - public int hashCode() { - return Objects.hash(expectedMemoryWithoutDisk, expectedMemoryWithDisk); - } - } -} diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsAction.java new file mode 100644 index 00000000000..46888ea27a7 --- /dev/null +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsAction.java @@ -0,0 +1,101 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.core.ml.action; + +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.action.ActionType; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection; +import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimation; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +public class ExplainDataFrameAnalyticsAction extends ActionType { + + public static final ExplainDataFrameAnalyticsAction INSTANCE = new ExplainDataFrameAnalyticsAction(); + public static final String NAME = "cluster:admin/xpack/ml/data_frame/analytics/explain"; + + private ExplainDataFrameAnalyticsAction() { + super(NAME, ExplainDataFrameAnalyticsAction.Response::new); + } + + public static class Response extends ActionResponse implements ToXContentObject { + + public static final ParseField TYPE = new ParseField("explain_data_frame_analytics_response"); + + public static final ParseField FIELD_SELECTION = new ParseField("field_selection"); + public static final ParseField MEMORY_ESTIMATION = new ParseField("memory_estimation"); + + static final ConstructingObjectParser PARSER = + new ConstructingObjectParser<>( + TYPE.getPreferredName(), + args -> new Response((List) args[0], (MemoryEstimation) args[1])); + + static { + PARSER.declareObjectArray(ConstructingObjectParser.constructorArg(), FieldSelection.PARSER, FIELD_SELECTION); + PARSER.declareObject(ConstructingObjectParser.constructorArg(), MemoryEstimation.PARSER, MEMORY_ESTIMATION); + } + + private final List fieldSelection; + private final MemoryEstimation memoryEstimation; + + public Response(List fieldSelection, MemoryEstimation memoryEstimation) { + this.fieldSelection = Objects.requireNonNull(fieldSelection); + this.memoryEstimation = Objects.requireNonNull(memoryEstimation); + } + + public Response(StreamInput in) throws IOException { + super(in); + this.fieldSelection = in.readList(FieldSelection::new); + this.memoryEstimation = new MemoryEstimation(in); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeList(fieldSelection); + memoryEstimation.writeTo(out); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(FIELD_SELECTION.getPreferredName(), fieldSelection); + builder.field(MEMORY_ESTIMATION.getPreferredName(), memoryEstimation); + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object other) { + if (this == other) return true; + if (other == null || getClass() != other.getClass()) return false; + + Response that = (Response) other; + return Objects.equals(fieldSelection, that.fieldSelection) + && Objects.equals(memoryEstimation, that.memoryEstimation); + } + + @Override + public int hashCode() { + return Objects.hash(fieldSelection, memoryEstimation); + } + + public MemoryEstimation getMemoryEstimation() { + return memoryEstimation; + } + + public List getFieldSelection() { + return fieldSelection; + } + } +} diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/PutDataFrameAnalyticsAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/PutDataFrameAnalyticsAction.java index 6860162d793..5bce41d8a4a 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/PutDataFrameAnalyticsAction.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/PutDataFrameAnalyticsAction.java @@ -51,13 +51,14 @@ public class PutDataFrameAnalyticsAction extends ActionType PARSER = new ConstructingObjectParser<>("field_selection", + a -> new FieldSelection((String) a[0], new HashSet<>((List) a[1]), (boolean) a[2], (boolean) a[3], (FeatureType) a[4], + (String) a[5])); + + static { + PARSER.declareString(ConstructingObjectParser.constructorArg(), NAME); + PARSER.declareStringArray(ConstructingObjectParser.constructorArg(), MAPPING_TYPES); + PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_INCLUDED); + PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_REQUIRED); + PARSER.declareField(ConstructingObjectParser.optionalConstructorArg(), p -> { + if (p.currentToken() == XContentParser.Token.VALUE_STRING) { + return FeatureType.fromString(p.text()); + } + throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]"); + }, FEATURE_TYPE, ObjectParser.ValueType.STRING); + PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), REASON); + } + + private final String name; + private final Set mappingTypes; + private final boolean isIncluded; + private final boolean isRequired; + private final FeatureType featureType; + private final String reason; + + public static FieldSelection included(String name, Set mappingTypes, boolean isRequired, FeatureType featureType) { + return new FieldSelection(name, mappingTypes, true, isRequired, featureType, null); + } + + public static FieldSelection excluded(String name, Set mappingTypes, String reason) { + return new FieldSelection(name, mappingTypes, false, false, null, reason); + } + + FieldSelection(String name, Set mappingTypes, boolean isIncluded, boolean isRequired, @Nullable FeatureType featureType, + @Nullable String reason) { + this.name = Objects.requireNonNull(name); + this.mappingTypes = Collections.unmodifiableSet(mappingTypes); + this.isIncluded = isIncluded; + this.isRequired = isRequired; + this.featureType = featureType; + this.reason = reason; + } + + public FieldSelection(StreamInput in) throws IOException { + this.name = in.readString(); + this.mappingTypes = Collections.unmodifiableSet(in.readSet(StreamInput::readString)); + this.isIncluded = in.readBoolean(); + this.isRequired = in.readBoolean(); + boolean hasFeatureType = in.readBoolean(); + + if (hasFeatureType) { + this.featureType = in.readEnum(FeatureType.class); + } else { + this.featureType = null; + } + + this.reason = in.readOptionalString(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(name); + out.writeCollection(mappingTypes, StreamOutput::writeString); + out.writeBoolean(isIncluded); + out.writeBoolean(isRequired); + + if (featureType == null) { + out.writeBoolean(false); + } else { + out.writeBoolean(true); + out.writeEnum(featureType); + } + out.writeOptionalString(reason); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(NAME.getPreferredName(), name); + builder.field(MAPPING_TYPES.getPreferredName(), mappingTypes); + builder.field(IS_INCLUDED.getPreferredName(), isIncluded); + builder.field(IS_REQUIRED.getPreferredName(), isRequired); + if (featureType != null) { + builder.field(FEATURE_TYPE.getPreferredName(), featureType); + } + if (reason != null) { + builder.field(REASON.getPreferredName(), reason); + } + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + FieldSelection that = (FieldSelection) o; + return Objects.equals(name, that.name) + && Objects.equals(mappingTypes, that.mappingTypes) + && isIncluded == that.isIncluded + && isRequired == that.isRequired + && Objects.equals(featureType, that.featureType) + && Objects.equals(reason, that.reason); + } + + @Override + public int hashCode() { + return Objects.hash(name, mappingTypes, isIncluded, isRequired, featureType, reason); + } + + public String getName() { + return name; + } + + public Set getMappingTypes() { + return mappingTypes; + } + + public boolean isIncluded() { + return isIncluded; + } + + public boolean isRequired() { + return isRequired; + } + + @Nullable + public FeatureType getFeatureType() { + return featureType; + } + + @Nullable + public String getReason() { + return reason; + } +} diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimation.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimation.java new file mode 100644 index 00000000000..7972c6a9ee0 --- /dev/null +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimation.java @@ -0,0 +1,103 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.core.ml.dataframe.explain; + +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.ObjectParser; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Objects; + +import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; + +public class MemoryEstimation implements ToXContentObject, Writeable { + + public static final ParseField EXPECTED_MEMORY_WITHOUT_DISK = new ParseField("expected_memory_without_disk"); + public static final ParseField EXPECTED_MEMORY_WITH_DISK = new ParseField("expected_memory_with_disk"); + + public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>("memory_estimation", + a -> new MemoryEstimation((ByteSizeValue) a[0], (ByteSizeValue) a[1])); + + static { + PARSER.declareField( + optionalConstructorArg(), + (p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName()), + EXPECTED_MEMORY_WITHOUT_DISK, + ObjectParser.ValueType.VALUE); + PARSER.declareField( + optionalConstructorArg(), + (p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITH_DISK.getPreferredName()), + EXPECTED_MEMORY_WITH_DISK, + ObjectParser.ValueType.VALUE); + } + + private final ByteSizeValue expectedMemoryWithoutDisk; + private final ByteSizeValue expectedMemoryWithDisk; + + public MemoryEstimation(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) { + this.expectedMemoryWithoutDisk = expectedMemoryWithoutDisk; + this.expectedMemoryWithDisk = expectedMemoryWithDisk; + } + + public MemoryEstimation(StreamInput in) throws IOException { + this.expectedMemoryWithoutDisk = in.readOptionalWriteable(ByteSizeValue::new); + this.expectedMemoryWithDisk = in.readOptionalWriteable(ByteSizeValue::new); + } + + public ByteSizeValue getExpectedMemoryWithoutDisk() { + return expectedMemoryWithoutDisk; + } + + public ByteSizeValue getExpectedMemoryWithDisk() { + return expectedMemoryWithDisk; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeOptionalWriteable(expectedMemoryWithoutDisk); + out.writeOptionalWriteable(expectedMemoryWithDisk); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + if (expectedMemoryWithoutDisk != null) { + builder.field(EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName(), expectedMemoryWithoutDisk.getStringRep()); + } + if (expectedMemoryWithDisk != null) { + builder.field(EXPECTED_MEMORY_WITH_DISK.getPreferredName(), expectedMemoryWithDisk.getStringRep()); + } + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (other == null || getClass() != other.getClass()) { + return false; + } + + MemoryEstimation that = (MemoryEstimation) other; + return Objects.equals(expectedMemoryWithoutDisk, that.expectedMemoryWithoutDisk) + && Objects.equals(expectedMemoryWithDisk, that.expectedMemoryWithDisk); + } + + @Override + public int hashCode() { + return Objects.hash(expectedMemoryWithoutDisk, expectedMemoryWithDisk); + } +} diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageActionResponseTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageActionResponseTests.java deleted file mode 100644 index 1bc8d8970ea..00000000000 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageActionResponseTests.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -package org.elasticsearch.xpack.core.ml.action; - -import org.elasticsearch.common.io.stream.Writeable; -import org.elasticsearch.common.unit.ByteSizeUnit; -import org.elasticsearch.common.unit.ByteSizeValue; -import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.test.AbstractSerializingTestCase; -import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction.Response; - -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.nullValue; - -public class EstimateMemoryUsageActionResponseTests extends AbstractSerializingTestCase { - - @Override - protected Response createTestInstance() { - return new Response( - randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null, - randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null); - } - - @Override - protected Writeable.Reader instanceReader() { - return Response::new; - } - - @Override - protected Response doParseInstance(XContentParser parser) { - return Response.PARSER.apply(parser, null); - } - - public void testConstructor_NullValues() { - Response response = new Response(null, null); - assertThat(response.getExpectedMemoryWithoutDisk(), nullValue()); - assertThat(response.getExpectedMemoryWithDisk(), nullValue()); - } - - public void testConstructor_SmallValues() { - Response response = new Response(new ByteSizeValue(120, ByteSizeUnit.KB), new ByteSizeValue(30, ByteSizeUnit.KB)); - assertThat(response.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(120, ByteSizeUnit.KB))); - assertThat(response.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(30, ByteSizeUnit.KB))); - } - - public void testConstructor() { - Response response = new Response(new ByteSizeValue(20, ByteSizeUnit.MB), new ByteSizeValue(10, ByteSizeUnit.MB)); - assertThat(response.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(20, ByteSizeUnit.MB))); - assertThat(response.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(10, ByteSizeUnit.MB))); - } -} diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsActionResponseTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsActionResponseTests.java new file mode 100644 index 00000000000..ea1aca3916c --- /dev/null +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsActionResponseTests.java @@ -0,0 +1,42 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.core.ml.action; + +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.test.AbstractSerializingTestCase; +import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction.Response; +import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection; +import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelectionTests; +import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimation; +import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimationTests; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.IntStream; + +public class ExplainDataFrameAnalyticsActionResponseTests extends AbstractSerializingTestCase { + + @Override + protected Response createTestInstance() { + int fieldSelectionCount = randomIntBetween(1, 5); + List fieldSelection = new ArrayList<>(fieldSelectionCount); + IntStream.of(fieldSelectionCount).forEach(i -> fieldSelection.add(FieldSelectionTests.createRandom())); + MemoryEstimation memoryEstimation = MemoryEstimationTests.createRandom(); + + return new Response(fieldSelection, memoryEstimation); + } + + @Override + protected Writeable.Reader instanceReader() { + return Response::new; + } + + @Override + protected Response doParseInstance(XContentParser parser) { + return Response.PARSER.apply(parser, null); + } +} diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java index 3266f488daf..d8c52c83902 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java @@ -279,32 +279,32 @@ public class DataFrameAnalyticsConfigTests extends AbstractSerializingTestCase { + + public static FieldSelection createRandom() { + Set mappingTypes = randomSubsetOf(randomIntBetween(1, 3), "int", "float", "double", "text", "keyword", "ip") + .stream().collect(Collectors.toSet()); + FieldSelection.FeatureType featureType = randomBoolean() ? null : randomFrom(FieldSelection.FeatureType.values()); + String reason = randomBoolean() ? null : randomAlphaOfLength(20); + return new FieldSelection(randomAlphaOfLength(10), + mappingTypes, + randomBoolean(), + randomBoolean(), + featureType, + reason); + } + + @Override + protected FieldSelection createTestInstance() { + return createRandom(); + } + + @Override + protected FieldSelection doParseInstance(XContentParser parser) throws IOException { + return FieldSelection.PARSER.apply(parser, null); + } + + @Override + protected Writeable.Reader instanceReader() { + return FieldSelection::new; + } +} diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimationTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimationTests.java new file mode 100644 index 00000000000..dc9e20bd86a --- /dev/null +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimationTests.java @@ -0,0 +1,61 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.core.ml.dataframe.explain; + +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.unit.ByteSizeUnit; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.test.AbstractSerializingTestCase; + +import java.io.IOException; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.nullValue; + +public class MemoryEstimationTests extends AbstractSerializingTestCase { + + public static MemoryEstimation createRandom() { + return new MemoryEstimation( + randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null, + randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null); + } + + @Override + protected MemoryEstimation createTestInstance() { + return createRandom(); + } + + @Override + protected Writeable.Reader instanceReader() { + return MemoryEstimation::new; + } + + @Override + protected MemoryEstimation doParseInstance(XContentParser parser) throws IOException { + return MemoryEstimation.PARSER.apply(parser, null); + } + + public void testConstructor_NullValues() { + MemoryEstimation memoryEstimation = new MemoryEstimation(null, null); + assertThat(memoryEstimation.getExpectedMemoryWithoutDisk(), nullValue()); + assertThat(memoryEstimation.getExpectedMemoryWithDisk(), nullValue()); + } + + public void testConstructor_SmallValues() { + MemoryEstimation memoryEstimation = new MemoryEstimation( + new ByteSizeValue(120, ByteSizeUnit.KB), new ByteSizeValue(30, ByteSizeUnit.KB)); + assertThat(memoryEstimation.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(120, ByteSizeUnit.KB))); + assertThat(memoryEstimation.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(30, ByteSizeUnit.KB))); + } + + public void testConstructor() { + MemoryEstimation memoryEstimation = new MemoryEstimation( + new ByteSizeValue(20, ByteSizeUnit.MB), new ByteSizeValue(10, ByteSizeUnit.MB)); + assertThat(memoryEstimation.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(20, ByteSizeUnit.MB))); + assertThat(memoryEstimation.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(10, ByteSizeUnit.MB))); + } +} diff --git a/x-pack/plugin/ml/qa/ml-with-security/build.gradle b/x-pack/plugin/ml/qa/ml-with-security/build.gradle index 961dc944ea7..38beb1d1908 100644 --- a/x-pack/plugin/ml/qa/ml-with-security/build.gradle +++ b/x-pack/plugin/ml/qa/ml-with-security/build.gradle @@ -92,7 +92,6 @@ integTest.runner { 'ml/data_frame_analytics_crud/Test put classification given num_top_classes is greater than 1k', 'ml/data_frame_analytics_crud/Test put classification given training_percent is less than one', 'ml/data_frame_analytics_crud/Test put classification given training_percent is greater than hundred', - 'ml/data_frame_analytics_memory_usage_estimation/Test memory usage estimation for empty data frame', 'ml/evaluate_data_frame/Test given missing index', 'ml/evaluate_data_frame/Test given index does not exist', 'ml/evaluate_data_frame/Test given missing evaluation', @@ -113,6 +112,10 @@ integTest.runner { 'ml/evaluate_data_frame/Test regression given evaluation with empty metrics', 'ml/evaluate_data_frame/Test regression given missing actual_field', 'ml/evaluate_data_frame/Test regression given missing predicted_field', + 'ml/explain_data_frame_analytics/Test neither job id nor body', + 'ml/explain_data_frame_analytics/Test both job id and body', + 'ml/explain_data_frame_analytics/Test missing job', + 'ml/explain_data_frame_analytics/Test empty data frame given body', 'ml/delete_job_force/Test cannot force delete a non-existent job', 'ml/delete_model_snapshot/Test delete snapshot missing snapshotId', 'ml/delete_model_snapshot/Test delete snapshot missing job_id', diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index 964bc719cbd..0293a367473 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -65,6 +65,7 @@ import org.elasticsearch.xpack.core.XPackSettings; import org.elasticsearch.xpack.core.ml.MachineLearningField; import org.elasticsearch.xpack.core.ml.MlMetaIndex; import org.elasticsearch.xpack.core.ml.action.CloseJobAction; +import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction; import org.elasticsearch.xpack.core.ml.action.DeleteCalendarAction; import org.elasticsearch.xpack.core.ml.action.DeleteCalendarEventAction; import org.elasticsearch.xpack.core.ml.action.DeleteDataFrameAnalyticsAction; @@ -75,7 +76,6 @@ import org.elasticsearch.xpack.core.ml.action.DeleteForecastAction; import org.elasticsearch.xpack.core.ml.action.DeleteJobAction; import org.elasticsearch.xpack.core.ml.action.DeleteModelSnapshotAction; import org.elasticsearch.xpack.core.ml.action.DeleteTrainedModelAction; -import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction; import org.elasticsearch.xpack.core.ml.action.EvaluateDataFrameAction; import org.elasticsearch.xpack.core.ml.action.FinalizeJobExecutionAction; import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction; @@ -98,8 +98,8 @@ import org.elasticsearch.xpack.core.ml.action.GetOverallBucketsAction; import org.elasticsearch.xpack.core.ml.action.GetRecordsAction; import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction; import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsStatsAction; -import org.elasticsearch.xpack.core.ml.action.IsolateDatafeedAction; import org.elasticsearch.xpack.core.ml.action.InternalInferModelAction; +import org.elasticsearch.xpack.core.ml.action.IsolateDatafeedAction; import org.elasticsearch.xpack.core.ml.action.KillProcessAction; import org.elasticsearch.xpack.core.ml.action.MlInfoAction; import org.elasticsearch.xpack.core.ml.action.OpenJobAction; @@ -136,6 +136,7 @@ import org.elasticsearch.xpack.core.ml.job.persistence.ElasticsearchMappings; import org.elasticsearch.xpack.core.ml.notifications.AuditorField; import org.elasticsearch.xpack.core.template.TemplateUtils; import org.elasticsearch.xpack.ml.action.TransportCloseJobAction; +import org.elasticsearch.xpack.ml.action.TransportExplainDataFrameAnalyticsAction; import org.elasticsearch.xpack.ml.action.TransportDeleteCalendarAction; import org.elasticsearch.xpack.ml.action.TransportDeleteCalendarEventAction; import org.elasticsearch.xpack.ml.action.TransportDeleteDataFrameAnalyticsAction; @@ -146,7 +147,6 @@ import org.elasticsearch.xpack.ml.action.TransportDeleteForecastAction; import org.elasticsearch.xpack.ml.action.TransportDeleteJobAction; import org.elasticsearch.xpack.ml.action.TransportDeleteModelSnapshotAction; import org.elasticsearch.xpack.ml.action.TransportDeleteTrainedModelAction; -import org.elasticsearch.xpack.ml.action.TransportEstimateMemoryUsageAction; import org.elasticsearch.xpack.ml.action.TransportEvaluateDataFrameAction; import org.elasticsearch.xpack.ml.action.TransportFinalizeJobExecutionAction; import org.elasticsearch.xpack.ml.action.TransportFindFileStructureAction; @@ -167,9 +167,9 @@ import org.elasticsearch.xpack.ml.action.TransportGetJobsStatsAction; import org.elasticsearch.xpack.ml.action.TransportGetModelSnapshotsAction; import org.elasticsearch.xpack.ml.action.TransportGetOverallBucketsAction; import org.elasticsearch.xpack.ml.action.TransportGetRecordsAction; +import org.elasticsearch.xpack.ml.action.TransportGetTrainedModelsAction; import org.elasticsearch.xpack.ml.action.TransportGetTrainedModelsStatsAction; import org.elasticsearch.xpack.ml.action.TransportInternalInferModelAction; -import org.elasticsearch.xpack.ml.action.TransportGetTrainedModelsAction; import org.elasticsearch.xpack.ml.action.TransportIsolateDatafeedAction; import org.elasticsearch.xpack.ml.action.TransportKillProcessAction; import org.elasticsearch.xpack.ml.action.TransportMlInfoAction; @@ -258,8 +258,8 @@ import org.elasticsearch.xpack.ml.rest.datafeeds.RestPutDatafeedAction; import org.elasticsearch.xpack.ml.rest.datafeeds.RestStartDatafeedAction; import org.elasticsearch.xpack.ml.rest.datafeeds.RestStopDatafeedAction; import org.elasticsearch.xpack.ml.rest.datafeeds.RestUpdateDatafeedAction; +import org.elasticsearch.xpack.ml.rest.dataframe.RestExplainDataFrameAnalyticsAction; import org.elasticsearch.xpack.ml.rest.dataframe.RestDeleteDataFrameAnalyticsAction; -import org.elasticsearch.xpack.ml.rest.dataframe.RestEstimateMemoryUsageAction; import org.elasticsearch.xpack.ml.rest.dataframe.RestEvaluateDataFrameAction; import org.elasticsearch.xpack.ml.rest.dataframe.RestGetDataFrameAnalyticsAction; import org.elasticsearch.xpack.ml.rest.dataframe.RestGetDataFrameAnalyticsStatsAction; @@ -759,7 +759,7 @@ public class MachineLearning extends Plugin implements ActionPlugin, AnalysisPlu new RestStartDataFrameAnalyticsAction(restController), new RestStopDataFrameAnalyticsAction(restController), new RestEvaluateDataFrameAction(restController), - new RestEstimateMemoryUsageAction(restController), + new RestExplainDataFrameAnalyticsAction(restController), new RestGetTrainedModelsAction(restController), new RestDeleteTrainedModelAction(restController), new RestGetTrainedModelsStatsAction(restController) @@ -829,7 +829,7 @@ public class MachineLearning extends Plugin implements ActionPlugin, AnalysisPlu new ActionHandler<>(StartDataFrameAnalyticsAction.INSTANCE, TransportStartDataFrameAnalyticsAction.class), new ActionHandler<>(StopDataFrameAnalyticsAction.INSTANCE, TransportStopDataFrameAnalyticsAction.class), new ActionHandler<>(EvaluateDataFrameAction.INSTANCE, TransportEvaluateDataFrameAction.class), - new ActionHandler<>(EstimateMemoryUsageAction.INSTANCE, TransportEstimateMemoryUsageAction.class), + new ActionHandler<>(ExplainDataFrameAnalyticsAction.INSTANCE, TransportExplainDataFrameAnalyticsAction.class), new ActionHandler<>(InternalInferModelAction.INSTANCE, TransportInternalInferModelAction.class), new ActionHandler<>(GetTrainedModelsAction.INSTANCE, TransportGetTrainedModelsAction.class), new ActionHandler<>(DeleteTrainedModelAction.INSTANCE, TransportDeleteTrainedModelAction.class), diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateMemoryUsageAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateMemoryUsageAction.java deleted file mode 100644 index a82db7c4f97..00000000000 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateMemoryUsageAction.java +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -package org.elasticsearch.xpack.ml.action; - -import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.ActionListenerResponseHandler; -import org.elasticsearch.action.support.ActionFilters; -import org.elasticsearch.action.support.HandledTransportAction; -import org.elasticsearch.client.node.NodeClient; -import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.node.DiscoveryNode; -import org.elasticsearch.cluster.service.ClusterService; -import org.elasticsearch.common.inject.Inject; -import org.elasticsearch.tasks.Task; -import org.elasticsearch.transport.TransportService; -import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction; -import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction; -import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; -import org.elasticsearch.xpack.ml.MachineLearning; -import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractorFactory; -import org.elasticsearch.xpack.ml.dataframe.process.MemoryUsageEstimationProcessManager; - -import java.util.Objects; -import java.util.Optional; - -/** - * Estimates memory usage for the given data frame analytics spec. - * Redirects to a different node if the current node is *not* an ML node. - */ -public class TransportEstimateMemoryUsageAction - extends HandledTransportAction { - - private final TransportService transportService; - private final ClusterService clusterService; - private final NodeClient client; - private final MemoryUsageEstimationProcessManager processManager; - - @Inject - public TransportEstimateMemoryUsageAction(TransportService transportService, - ActionFilters actionFilters, - ClusterService clusterService, - NodeClient client, - MemoryUsageEstimationProcessManager processManager) { - super(EstimateMemoryUsageAction.NAME, transportService, actionFilters, PutDataFrameAnalyticsAction.Request::new); - this.transportService = transportService; - this.clusterService = Objects.requireNonNull(clusterService); - this.client = Objects.requireNonNull(client); - this.processManager = Objects.requireNonNull(processManager); - } - - @Override - protected void doExecute(Task task, - PutDataFrameAnalyticsAction.Request request, - ActionListener listener) { - DiscoveryNode localNode = clusterService.localNode(); - if (MachineLearning.isMlNode(localNode)) { - doEstimateMemoryUsage(createTaskIdForMemoryEstimation(task), request, listener); - } else { - redirectToMlNode(request, listener); - } - } - - /** - * Creates unique task id for the memory estimation process. This id is useful when logging. - */ - private static String createTaskIdForMemoryEstimation(Task task) { - return "memory_usage_estimation_" + task.getId(); - } - - /** - * Performs memory usage estimation. - * Memory usage estimation spawns an ML C++ process which is only available on ML nodes. That's why this method can only be called on - * the ML node. - */ - private void doEstimateMemoryUsage(String taskId, - PutDataFrameAnalyticsAction.Request request, - ActionListener listener) { - DataFrameDataExtractorFactory.createForSourceIndices( - client, - taskId, - true, // We are not interested in first-time run validations here - request.getConfig(), - ActionListener.wrap( - dataExtractorFactory -> { - processManager.runJobAsync( - taskId, - request.getConfig(), - dataExtractorFactory, - ActionListener.wrap( - result -> listener.onResponse( - new EstimateMemoryUsageAction.Response( - result.getExpectedMemoryWithoutDisk(), result.getExpectedMemoryWithDisk())), - listener::onFailure - ) - ); - }, - listener::onFailure - ) - ); - } - - /** - * Finds the first available ML node in the cluster and redirects the request to this node. - */ - private void redirectToMlNode(PutDataFrameAnalyticsAction.Request request, - ActionListener listener) { - Optional node = findMlNode(clusterService.state()); - if (node.isPresent()) { - transportService.sendRequest( - node.get(), actionName, request, new ActionListenerResponseHandler<>(listener, EstimateMemoryUsageAction.Response::new)); - } else { - listener.onFailure(ExceptionsHelper.badRequestException("No ML node to run on")); - } - } - - /** - * Finds the first available ML node in the cluster state. - */ - private static Optional findMlNode(ClusterState clusterState) { - for (DiscoveryNode node : clusterState.getNodes()) { - if (MachineLearning.isMlNode(node)) { - return Optional.of(node); - } - } - return Optional.empty(); - } -} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportExplainDataFrameAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportExplainDataFrameAnalyticsAction.java new file mode 100644 index 00000000000..7f19deb8d5b --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportExplainDataFrameAnalyticsAction.java @@ -0,0 +1,156 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.action; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.ActionListenerResponseHandler; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.HandledTransportAction; +import org.elasticsearch.client.node.NodeClient; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.collect.Tuple; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.license.LicenseUtils; +import org.elasticsearch.license.XPackLicenseState; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.transport.TransportService; +import org.elasticsearch.xpack.core.XPackField; +import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction; +import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction; +import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection; +import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimation; +import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; +import org.elasticsearch.xpack.ml.MachineLearning; +import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractorFactory; +import org.elasticsearch.xpack.ml.dataframe.extractor.ExtractedFieldsDetector; +import org.elasticsearch.xpack.ml.dataframe.extractor.ExtractedFieldsDetectorFactory; +import org.elasticsearch.xpack.ml.dataframe.process.MemoryUsageEstimationProcessManager; +import org.elasticsearch.xpack.ml.extractor.ExtractedFields; + +import java.util.List; +import java.util.Objects; +import java.util.Optional; + +/** + * Provides explanations on aspects of the given data frame analytics spec like memory estimation, field selection, etc. + * Redirects to a different node if the current node is *not* an ML node. + */ +public class TransportExplainDataFrameAnalyticsAction + extends HandledTransportAction { + + private final XPackLicenseState licenseState; + private final TransportService transportService; + private final ClusterService clusterService; + private final NodeClient client; + private final MemoryUsageEstimationProcessManager processManager; + + @Inject + public TransportExplainDataFrameAnalyticsAction(TransportService transportService, + ActionFilters actionFilters, + ClusterService clusterService, + NodeClient client, + XPackLicenseState licenseState, + MemoryUsageEstimationProcessManager processManager) { + super(ExplainDataFrameAnalyticsAction.NAME, transportService, actionFilters, PutDataFrameAnalyticsAction.Request::new); + this.transportService = transportService; + this.clusterService = Objects.requireNonNull(clusterService); + this.client = Objects.requireNonNull(client); + this.licenseState = licenseState; + this.processManager = Objects.requireNonNull(processManager); + } + + @Override + protected void doExecute(Task task, + PutDataFrameAnalyticsAction.Request request, + ActionListener listener) { + if (licenseState.isMachineLearningAllowed() == false) { + listener.onFailure(LicenseUtils.newComplianceException(XPackField.MACHINE_LEARNING)); + return; + } + + DiscoveryNode localNode = clusterService.localNode(); + if (MachineLearning.isMlNode(localNode)) { + explain(task, request, listener); + } else { + redirectToMlNode(request, listener); + } + } + + private void explain(Task task, PutDataFrameAnalyticsAction.Request request, + ActionListener listener) { + ExtractedFieldsDetectorFactory extractedFieldsDetectorFactory = new ExtractedFieldsDetectorFactory(client); + extractedFieldsDetectorFactory.createFromSource(request.getConfig(), true, ActionListener.wrap( + extractedFieldsDetector -> { + explain(task, request, extractedFieldsDetector, listener); + }, + listener::onFailure + )); + } + + private void explain(Task task, PutDataFrameAnalyticsAction.Request request, ExtractedFieldsDetector extractedFieldsDetector, + ActionListener listener) { + Tuple> fieldExtraction = extractedFieldsDetector.detect(); + + ActionListener memoryEstimationListener = ActionListener.wrap( + memoryEstimation -> listener.onResponse(new ExplainDataFrameAnalyticsAction.Response(fieldExtraction.v2(), memoryEstimation)), + listener::onFailure + ); + + estimateMemoryUsage(task, request, fieldExtraction.v1(), memoryEstimationListener); + } + + /** + * Performs memory usage estimation. + * Memory usage estimation spawns an ML C++ process which is only available on ML nodes. That's why this method can only be called on + * the ML node. + */ + private void estimateMemoryUsage(Task task, + PutDataFrameAnalyticsAction.Request request, + ExtractedFields extractedFields, + ActionListener listener) { + final String estimateMemoryTaskId = "memory_usage_estimation_" + task.getId(); + DataFrameDataExtractorFactory extractorFactory = DataFrameDataExtractorFactory.createForSourceIndices( + client, estimateMemoryTaskId, request.getConfig(), extractedFields); + processManager.runJobAsync( + estimateMemoryTaskId, + request.getConfig(), + extractorFactory, + ActionListener.wrap( + result -> listener.onResponse( + new MemoryEstimation(result.getExpectedMemoryWithoutDisk(), result.getExpectedMemoryWithDisk())), + listener::onFailure + ) + ); + } + + /** + * Finds the first available ML node in the cluster and redirects the request to this node. + */ + private void redirectToMlNode(PutDataFrameAnalyticsAction.Request request, + ActionListener listener) { + Optional node = findMlNode(clusterService.state()); + if (node.isPresent()) { + transportService.sendRequest(node.get(), actionName, request, + new ActionListenerResponseHandler<>(listener, ExplainDataFrameAnalyticsAction.Response::new)); + } else { + listener.onFailure(ExceptionsHelper.badRequestException("No ML node to run on")); + } + } + + /** + * Finds the first available ML node in the cluster state. + */ + private static Optional findMlNode(ClusterState clusterState) { + for (DiscoveryNode node : clusterState.getNodes()) { + if (MachineLearning.isMlNode(node)) { + return Optional.of(node); + } + } + return Optional.empty(); + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java index 1740a7fb532..af67750ee6d 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java @@ -29,6 +29,7 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.index.IndexNotFoundException; import org.elasticsearch.license.LicenseUtils; @@ -47,7 +48,7 @@ import org.elasticsearch.xpack.core.ClientHelper; import org.elasticsearch.xpack.core.XPackField; import org.elasticsearch.xpack.core.ml.MlMetadata; import org.elasticsearch.xpack.core.ml.MlTasks; -import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction; +import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction; import org.elasticsearch.xpack.core.ml.action.GetDataFrameAnalyticsStatsAction; import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction; import org.elasticsearch.xpack.core.ml.action.StartDataFrameAnalyticsAction; @@ -66,6 +67,7 @@ import org.elasticsearch.xpack.ml.dataframe.SourceDestValidator; import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractorFactory; import org.elasticsearch.xpack.ml.dataframe.extractor.ExtractedFieldsDetectorFactory; import org.elasticsearch.xpack.ml.dataframe.persistence.DataFrameAnalyticsConfigProvider; +import org.elasticsearch.xpack.ml.extractor.ExtractedFields; import org.elasticsearch.xpack.ml.job.JobNodeSelector; import org.elasticsearch.xpack.ml.notifications.DataFrameAnalyticsAuditor; import org.elasticsearch.xpack.ml.process.MlMemoryTracker; @@ -190,20 +192,18 @@ public class TransportStartDataFrameAnalyticsAction final String jobId = startContext.config.getId(); // Tell the job tracker to refresh the memory requirement for this job and all other jobs that have persistent tasks - ActionListener estimateMemoryUsageListener = ActionListener.wrap( - estimateMemoryUsageResponse -> { - auditor.info( - jobId, - Messages.getMessage( - Messages.DATA_FRAME_ANALYTICS_AUDIT_ESTIMATED_MEMORY_USAGE, - estimateMemoryUsageResponse.getExpectedMemoryWithoutDisk())); + ActionListener explainListener = ActionListener.wrap( + explainResponse -> { + ByteSizeValue expectedMemoryWithoutDisk = explainResponse.getMemoryEstimation().getExpectedMemoryWithoutDisk(); + auditor.info(jobId, + Messages.getMessage(Messages.DATA_FRAME_ANALYTICS_AUDIT_ESTIMATED_MEMORY_USAGE, expectedMemoryWithoutDisk)); // Validate that model memory limit is sufficient to run the analysis if (startContext.config.getModelMemoryLimit() - .compareTo(estimateMemoryUsageResponse.getExpectedMemoryWithoutDisk()) < 0) { + .compareTo(expectedMemoryWithoutDisk) < 0) { ElasticsearchStatusException e = ExceptionsHelper.badRequestException( "Cannot start because the configured model memory limit [{}] is lower than the expected memory usage [{}]", - startContext.config.getModelMemoryLimit(), estimateMemoryUsageResponse.getExpectedMemoryWithoutDisk()); + startContext.config.getModelMemoryLimit(), expectedMemoryWithoutDisk); listener.onFailure(e); return; } @@ -215,13 +215,13 @@ public class TransportStartDataFrameAnalyticsAction listener::onFailure ); - PutDataFrameAnalyticsAction.Request estimateMemoryUsageRequest = new PutDataFrameAnalyticsAction.Request(startContext.config); + PutDataFrameAnalyticsAction.Request explainRequest = new PutDataFrameAnalyticsAction.Request(startContext.config); ClientHelper.executeAsyncWithOrigin( client, ClientHelper.ML_ORIGIN, - EstimateMemoryUsageAction.INSTANCE, - estimateMemoryUsageRequest, - estimateMemoryUsageListener); + ExplainDataFrameAnalyticsAction.INSTANCE, + explainRequest, + explainListener); } @@ -277,7 +277,11 @@ public class TransportStartDataFrameAnalyticsAction // Validate extraction is possible boolean isTaskRestarting = startContext.startingState != DataFrameAnalyticsTask.StartingState.FIRST_TIME; new ExtractedFieldsDetectorFactory(client).createFromSource(startContext.config, isTaskRestarting, ActionListener.wrap( - extractedFieldsDetector -> toValidateDestEmptyListener.onResponse(startContext), finalListener::onFailure)); + extractedFieldsDetector -> { + startContext.extractedFields = extractedFieldsDetector.detect().v1(); + toValidateDestEmptyListener.onResponse(startContext); + }, + finalListener::onFailure)); }, finalListener::onFailure ); @@ -294,33 +298,27 @@ public class TransportStartDataFrameAnalyticsAction } private void validateSourceIndexHasRows(StartContext startContext, ActionListener listener) { - boolean isTaskRestarting = startContext.startingState != DataFrameAnalyticsTask.StartingState.FIRST_TIME; - DataFrameDataExtractorFactory.createForSourceIndices(client, + DataFrameDataExtractorFactory extractorFactory = DataFrameDataExtractorFactory.createForSourceIndices(client, "validate_source_index_has_rows-" + startContext.config.getId(), - isTaskRestarting, startContext.config, - ActionListener.wrap( - dataFrameDataExtractorFactory -> - dataFrameDataExtractorFactory - .newExtractor(false) - .collectDataSummaryAsync(ActionListener.wrap( - dataSummary -> { - if (dataSummary.rows == 0) { - listener.onFailure(ExceptionsHelper.badRequestException( - "Unable to start {} as no documents in the source indices [{}] contained all the fields " - + "selected for analysis. If you are relying on automatic field selection then there are " - + "currently mapped fields that do not exist in any indexed documents, and you will have " - + "to switch to explicit field selection and include only fields that exist in indexed " - + "documents.", - startContext.config.getId(), - Strings.arrayToCommaDelimitedString(startContext.config.getSource().getIndex()) - )); - } else { - listener.onResponse(startContext); - } - }, - listener::onFailure - )), + startContext.extractedFields); + extractorFactory.newExtractor(false) + .collectDataSummaryAsync(ActionListener.wrap( + dataSummary -> { + if (dataSummary.rows == 0) { + listener.onFailure(ExceptionsHelper.badRequestException( + "Unable to start {} as no documents in the source indices [{}] contained all the fields " + + "selected for analysis. If you are relying on automatic field selection then there are " + + "currently mapped fields that do not exist in any indexed documents, and you will have " + + "to switch to explicit field selection and include only fields that exist in indexed " + + "documents.", + startContext.config.getId(), + Strings.arrayToCommaDelimitedString(startContext.config.getSource().getIndex()) + )); + } else { + listener.onResponse(startContext); + } + }, listener::onFailure )); } @@ -402,6 +400,7 @@ public class TransportStartDataFrameAnalyticsAction private final DataFrameAnalyticsConfig config; private final List progressOnStart; private final DataFrameAnalyticsTask.StartingState startingState; + private volatile ExtractedFields extractedFields; private StartContext(DataFrameAnalyticsConfig config, List progressOnStart) { this.config = config; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorFactory.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorFactory.java index ce21973ca91..f8afd229098 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorFactory.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorFactory.java @@ -29,7 +29,7 @@ public class DataFrameDataExtractorFactory { private final Map headers; private final boolean includeRowsWithMissingValues; - private DataFrameDataExtractorFactory(Client client, String analyticsId, List indices, ExtractedFields extractedFields, + public DataFrameDataExtractorFactory(Client client, String analyticsId, List indices, ExtractedFields extractedFields, Map headers, boolean includeRowsWithMissingValues) { this.client = Objects.requireNonNull(client); this.analyticsId = Objects.requireNonNull(analyticsId); @@ -66,32 +66,19 @@ public class DataFrameDataExtractorFactory { } /** - * Validate and create a new extractor factory + * Create a new extractor factory * * The source index must exist and contain at least 1 compatible field or validations will fail. * * @param client ES Client used to make calls against the cluster * @param taskId The task id - * @param isTaskRestarting Whether the task is restarting or it is running for the first time * @param config The config from which to create the extractor factory - * @param listener The listener to notify on creation or failure + * @param extractedFields The fields to extract */ - public static void createForSourceIndices(Client client, - String taskId, - boolean isTaskRestarting, - DataFrameAnalyticsConfig config, - ActionListener listener) { - ExtractedFieldsDetectorFactory extractedFieldsDetectorFactory = new ExtractedFieldsDetectorFactory(client); - extractedFieldsDetectorFactory.createFromSource(config, isTaskRestarting, ActionListener.wrap( - extractedFieldsDetector -> { - ExtractedFields extractedFields = extractedFieldsDetector.detect(); - DataFrameDataExtractorFactory extractorFactory = new DataFrameDataExtractorFactory(client, taskId, - Arrays.asList(config.getSource().getIndex()), extractedFields, config.getHeaders(), - config.getAnalysis().supportsMissingValues()); - listener.onResponse(extractorFactory); - }, - listener::onFailure - )); + public static DataFrameDataExtractorFactory createForSourceIndices(Client client, String taskId, DataFrameAnalyticsConfig config, + ExtractedFields extractedFields) { + return new DataFrameDataExtractorFactory(client, taskId, Arrays.asList(config.getSource().getIndex()), extractedFields, + config.getHeaders(), config.getAnalysis().supportsMissingValues()); } /** @@ -111,7 +98,7 @@ public class DataFrameDataExtractorFactory { ExtractedFieldsDetectorFactory extractedFieldsDetectorFactory = new ExtractedFieldsDetectorFactory(client); extractedFieldsDetectorFactory.createFromDest(config, isTaskRestarting, ActionListener.wrap( extractedFieldsDetector -> { - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + ExtractedFields extractedFields = extractedFieldsDetector.detect().v1(); DataFrameDataExtractorFactory extractorFactory = new DataFrameDataExtractorFactory(client, config.getId(), Collections.singletonList(config.getDest().getIndex()), extractedFields, config.getHeaders(), config.getAnalysis().supportsMissingValues()); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java index 5d94b57aca5..682cc94433c 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java @@ -11,6 +11,7 @@ import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.action.fieldcaps.FieldCapabilities; import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.regex.Regex; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.BooleanFieldMapper; @@ -19,6 +20,7 @@ import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsDest; import org.elasticsearch.xpack.core.ml.dataframe.analyses.RequiredField; import org.elasticsearch.xpack.core.ml.dataframe.analyses.Types; +import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection; import org.elasticsearch.xpack.core.ml.job.messages.Messages; import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; import org.elasticsearch.xpack.core.ml.utils.NameResolver; @@ -29,13 +31,12 @@ import org.elasticsearch.xpack.ml.extractor.ExtractedFields; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.HashSet; +import java.util.Comparator; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.Optional; import java.util.Set; import java.util.TreeSet; import java.util.stream.Collectors; @@ -57,9 +58,8 @@ public class ExtractedFieldsDetector { private final FieldCapabilitiesResponse fieldCapabilitiesResponse; private final Map fieldCardinalities; - ExtractedFieldsDetector(String[] index, DataFrameAnalyticsConfig config, boolean isTaskRestarting, - int docValueFieldsLimit, FieldCapabilitiesResponse fieldCapabilitiesResponse, - Map fieldCardinalities) { + ExtractedFieldsDetector(String[] index, DataFrameAnalyticsConfig config, boolean isTaskRestarting, int docValueFieldsLimit, + FieldCapabilitiesResponse fieldCapabilitiesResponse, Map fieldCardinalities) { this.index = Objects.requireNonNull(index); this.config = Objects.requireNonNull(config); this.isTaskRestarting = isTaskRestarting; @@ -68,8 +68,30 @@ public class ExtractedFieldsDetector { this.fieldCardinalities = Objects.requireNonNull(fieldCardinalities); } - public ExtractedFields detect() { - Set fields = getIncludedFields(); + public Tuple> detect() { + TreeSet fieldSelection = new TreeSet<>(Comparator.comparing(FieldSelection::getName)); + Set fields = getIncludedFields(fieldSelection); + checkFieldsHaveCompatibleTypes(fields); + checkRequiredFields(fields); + checkFieldsWithCardinalityLimit(); + ExtractedFields extractedFields = detectExtractedFields(fields, fieldSelection); + addIncludedFields(extractedFields, fieldSelection); + + return Tuple.tuple(extractedFields, Collections.unmodifiableList(new ArrayList<>(fieldSelection))); + } + + private Set getIncludedFields(Set fieldSelection) { + Set fields = new TreeSet<>(fieldCapabilitiesResponse.get().keySet()); + fields.removeAll(IGNORE_FIELDS); + checkResultsFieldIsNotPresent(); + removeFieldsUnderResultsField(fields); + FetchSourceContext analyzedFields = config.getAnalyzedFields(); + + // If the user has not explicitly included fields we'll include all compatible fields + if (analyzedFields == null || analyzedFields.includes().length == 0) { + removeFieldsWithIncompatibleTypes(fields, fieldSelection); + } + includeAndExcludeFields(fields, fieldSelection); if (fields.isEmpty()) { throw ExceptionsHelper.badRequestException("No compatible fields could be detected in index {}. Supported types are {}.", @@ -77,26 +99,19 @@ public class ExtractedFieldsDetector { getSupportedTypes()); } - checkNoIgnoredFields(fields); - checkFieldsHaveCompatibleTypes(fields); - checkRequiredFields(fields); - checkFieldsWithCardinalityLimit(); - return detectExtractedFields(fields); + return fields; } - private Set getIncludedFields() { - Set fields = new HashSet<>(fieldCapabilitiesResponse.get().keySet()); - checkResultsFieldIsNotPresent(); - removeFieldsUnderResultsField(fields); - FetchSourceContext analyzedFields = config.getAnalyzedFields(); - - // If the user has not explicitly included fields we'll include all compatible fields - if (analyzedFields == null || analyzedFields.includes().length == 0) { - fields.removeAll(IGNORE_FIELDS); - removeFieldsWithIncompatibleTypes(fields); + private void removeFieldsUnderResultsField(Set fields) { + String resultsField = config.getDest().getResultsField(); + Iterator fieldsIterator = fields.iterator(); + while (fieldsIterator.hasNext()) { + String field = fieldsIterator.next(); + if (field.startsWith(resultsField + ".")) { + fieldsIterator.remove(); + } } - includeAndExcludeFields(fields); - return fields; + fields.removeIf(field -> field.startsWith(resultsField + ".")); } private void checkResultsFieldIsNotPresent() { @@ -117,16 +132,21 @@ public class ExtractedFieldsDetector { } } - private void removeFieldsUnderResultsField(Set fields) { - // Ignore fields under the results object - fields.removeIf(field -> field.startsWith(config.getDest().getResultsField() + ".")); + private void addExcludedField(String field, String reason, Set fieldSelection) { + fieldSelection.add(FieldSelection.excluded(field, getMappingTypes(field), reason)); } - private void removeFieldsWithIncompatibleTypes(Set fields) { + private Set getMappingTypes(String field) { + Map fieldCaps = fieldCapabilitiesResponse.getField(field); + return fieldCaps == null ? Collections.emptySet() : fieldCaps.keySet(); + } + + private void removeFieldsWithIncompatibleTypes(Set fields, Set fieldSelection) { Iterator fieldsIterator = fields.iterator(); while (fieldsIterator.hasNext()) { String field = fieldsIterator.next(); if (hasCompatibleType(field) == false) { + addExcludedField(field, "unsupported type; supported types are " + getSupportedTypes(), fieldSelection); fieldsIterator.remove(); } } @@ -163,7 +183,7 @@ public class ExtractedFieldsDetector { return supportedTypes; } - private void includeAndExcludeFields(Set fields) { + private void includeAndExcludeFields(Set fields, Set fieldSelection) { FetchSourceContext analyzedFields = config.getAnalyzedFields(); if (analyzedFields == null) { return; @@ -188,18 +208,30 @@ public class ExtractedFieldsDetector { Messages.getMessage(Messages.DATA_FRAME_ANALYTICS_BAD_FIELD_FILTER, ex))) .expand(excludes, true); - fields.retainAll(includedSet); - fields.removeAll(excludedSet); + applyIncludesExcludes(fields, includedSet, excludedSet, fieldSelection); } catch (ResourceNotFoundException ex) { // Re-wrap our exception so that we throw the same exception type when there are no fields. throw ExceptionsHelper.badRequestException(ex.getMessage()); } } - private void checkNoIgnoredFields(Set fields) { - Optional ignoreField = IGNORE_FIELDS.stream().filter(fields::contains).findFirst(); - if (ignoreField.isPresent()) { - throw ExceptionsHelper.badRequestException("field [{}] cannot be analyzed", ignoreField.get()); + private void applyIncludesExcludes(Set fields, Set includes, Set excludes, + Set fieldSelection) { + Iterator fieldsIterator = fields.iterator(); + while (fieldsIterator.hasNext()) { + String field = fieldsIterator.next(); + if (includes.contains(field)) { + if (IGNORE_FIELDS.contains(field)) { + throw ExceptionsHelper.badRequestException("field [{}] cannot be analyzed", field); + } + } else { + fieldsIterator.remove(); + addExcludedField(field, "field not in includes list", fieldSelection); + } + if (excludes.contains(field)) { + fieldsIterator.remove(); + addExcludedField(field, "field in excludes list", fieldSelection); + } } } @@ -247,13 +279,10 @@ public class ExtractedFieldsDetector { } } - private ExtractedFields detectExtractedFields(Set fields) { - List sortedFields = new ArrayList<>(fields); - // We sort the fields to ensure the checksum for each document is deterministic - Collections.sort(sortedFields); - ExtractedFields extractedFields = ExtractedFields.build(sortedFields, Collections.emptySet(), fieldCapabilitiesResponse); + private ExtractedFields detectExtractedFields(Set fields, Set fieldSelection) { + ExtractedFields extractedFields = ExtractedFields.build(fields, Collections.emptySet(), fieldCapabilitiesResponse); boolean preferSource = extractedFields.getDocValueFields().size() > docValueFieldsLimit; - extractedFields = deduplicateMultiFields(extractedFields, preferSource); + extractedFields = deduplicateMultiFields(extractedFields, preferSource, fieldSelection); if (preferSource) { extractedFields = fetchFromSourceIfSupported(extractedFields); if (extractedFields.getDocValueFields().size() > docValueFieldsLimit) { @@ -266,7 +295,8 @@ public class ExtractedFieldsDetector { return extractedFields; } - private ExtractedFields deduplicateMultiFields(ExtractedFields extractedFields, boolean preferSource) { + private ExtractedFields deduplicateMultiFields(ExtractedFields extractedFields, boolean preferSource, + Set fieldSelection) { Set requiredFields = config.getAnalysis().getRequiredFields().stream().map(RequiredField::getName) .collect(Collectors.toSet()); Map nameOrParentToField = new LinkedHashMap<>(); @@ -276,43 +306,53 @@ public class ExtractedFieldsDetector { if (existingField != null) { ExtractedField parent = currentField.isMultiField() ? existingField : currentField; ExtractedField multiField = currentField.isMultiField() ? currentField : existingField; - nameOrParentToField.put(nameOrParent, chooseMultiFieldOrParent(preferSource, requiredFields, parent, multiField)); + nameOrParentToField.put(nameOrParent, + chooseMultiFieldOrParent(preferSource, requiredFields, parent, multiField, fieldSelection)); } } return new ExtractedFields(new ArrayList<>(nameOrParentToField.values())); } - private ExtractedField chooseMultiFieldOrParent(boolean preferSource, Set requiredFields, - ExtractedField parent, ExtractedField multiField) { + private ExtractedField chooseMultiFieldOrParent(boolean preferSource, Set requiredFields, ExtractedField parent, + ExtractedField multiField, Set fieldSelection) { // Check requirements first if (requiredFields.contains(parent.getName())) { + addExcludedField(multiField.getName(), "[" + parent.getName() + "] is required instead", fieldSelection); return parent; } if (requiredFields.contains(multiField.getName())) { + addExcludedField(parent.getName(), "[" + multiField.getName() + "] is required instead", fieldSelection); return multiField; } // If both are multi-fields it means there are several. In this case parent is the previous multi-field // we selected. We'll just keep that. if (parent.isMultiField() && multiField.isMultiField()) { + addExcludedField(multiField.getName(), "[" + parent.getName() + "] came first", fieldSelection); return parent; } // If we prefer source only the parent may support it. If it does we pick it immediately. if (preferSource && parent.supportsFromSource()) { + addExcludedField(multiField.getName(), "[" + parent.getName() + "] is preferred because it supports fetching from source", + fieldSelection); return parent; } // If any of the two is a doc_value field let's prefer it as it'd support aggregations. // We check the parent first as it'd be a shorter field name. if (parent.getMethod() == ExtractedField.Method.DOC_VALUE) { + addExcludedField(multiField.getName(), "[" + parent.getName() + "] is preferred because it is aggregatable", fieldSelection); return parent; } if (multiField.getMethod() == ExtractedField.Method.DOC_VALUE) { + addExcludedField(parent.getName(), "[" + multiField.getName() + "] is preferred because it is aggregatable", fieldSelection); return multiField; } // None is aggregatable. Let's pick the parent for its shorter name. + addExcludedField(multiField.getName(), "[" + parent.getName() + "] is preferred because none of the multi-fields are aggregatable", + fieldSelection); return parent; } @@ -343,6 +383,26 @@ public class ExtractedFieldsDetector { return new ExtractedFields(adjusted); } + private void addIncludedFields(ExtractedFields extractedFields, Set fieldSelection) { + Set requiredFields = config.getAnalysis().getRequiredFields().stream().map(RequiredField::getName) + .collect(Collectors.toSet()); + Set categoricalFields = getCategoricalFields(extractedFields); + for (ExtractedField includedField : extractedFields.getAllFields()) { + FieldSelection.FeatureType featureType = categoricalFields.contains(includedField.getName()) ? + FieldSelection.FeatureType.CATEGORICAL : FieldSelection.FeatureType.NUMERICAL; + fieldSelection.add(FieldSelection.included(includedField.getName(), includedField.getTypes(), + requiredFields.contains(includedField.getName()), featureType)); + } + } + + private Set getCategoricalFields(ExtractedFields extractedFields) { + return extractedFields.getAllFields().stream() + .filter(extractedField -> config.getAnalysis().getAllowedCategoricalTypes(extractedField.getName()) + .containsAll(extractedField.getTypes())) + .map(ExtractedField::getName) + .collect(Collectors.toSet()); + } + private static boolean isBoolean(Set types) { return types.size() == 1 && types.contains(BooleanFieldMapper.CONTENT_TYPE); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java index 2e5189eb249..6740f8d4d34 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java @@ -100,9 +100,9 @@ public class MemoryUsageEstimationProcessManager { } finally { process.consumeAndCloseOutputStream(); try { - LOGGER.info("[{}] Closing process", jobId); + LOGGER.debug("[{}] Closing process", jobId); process.close(); - LOGGER.info("[{}] Closed process", jobId); + LOGGER.debug("[{}] Closed process", jobId); } catch (Exception e) { String errorMsg = new ParameterizedMessage( diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestEstimateMemoryUsageAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestEstimateMemoryUsageAction.java deleted file mode 100644 index 25f2bcb4bb8..00000000000 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestEstimateMemoryUsageAction.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -package org.elasticsearch.xpack.ml.rest.dataframe; - -import org.elasticsearch.client.node.NodeClient; -import org.elasticsearch.rest.BaseRestHandler; -import org.elasticsearch.rest.RestController; -import org.elasticsearch.rest.RestRequest; -import org.elasticsearch.rest.action.RestToXContentListener; -import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction; -import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction; -import org.elasticsearch.xpack.ml.MachineLearning; - -import java.io.IOException; - -public class RestEstimateMemoryUsageAction extends BaseRestHandler { - - public RestEstimateMemoryUsageAction(RestController controller) { - controller.registerHandler( - RestRequest.Method.POST, - MachineLearning.BASE_PATH + "data_frame/analytics/_estimate_memory_usage", this); - } - - @Override - public String getName() { - return "ml_estimate_memory_usage_action"; - } - - @Override - protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException { - PutDataFrameAnalyticsAction.Request request = - PutDataFrameAnalyticsAction.Request.parseRequestForMemoryEstimation(restRequest.contentOrSourceParamParser()); - return channel -> client.execute(EstimateMemoryUsageAction.INSTANCE, request, new RestToXContentListener<>(channel)); - } -} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestExplainDataFrameAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestExplainDataFrameAnalyticsAction.java new file mode 100644 index 00000000000..b16bf7b3efb --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestExplainDataFrameAnalyticsAction.java @@ -0,0 +1,84 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.rest.dataframe; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.client.node.NodeClient; +import org.elasticsearch.common.Strings; +import org.elasticsearch.rest.BaseRestHandler; +import org.elasticsearch.rest.RestController; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.rest.action.RestToXContentListener; +import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction; +import org.elasticsearch.xpack.core.ml.action.GetDataFrameAnalyticsAction; +import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction; +import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; +import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; +import org.elasticsearch.xpack.ml.MachineLearning; + +import java.io.IOException; +import java.util.List; +import java.util.stream.Collectors; + +public class RestExplainDataFrameAnalyticsAction extends BaseRestHandler { + + public RestExplainDataFrameAnalyticsAction(RestController controller) { + controller.registerHandler(RestRequest.Method.GET, MachineLearning.BASE_PATH + "data_frame/analytics/_explain", this); + controller.registerHandler(RestRequest.Method.POST, MachineLearning.BASE_PATH + "data_frame/analytics/_explain", this); + controller.registerHandler(RestRequest.Method.GET, MachineLearning.BASE_PATH + "data_frame/analytics/{" + + DataFrameAnalyticsConfig.ID.getPreferredName() + "}/_explain", this); + controller.registerHandler(RestRequest.Method.POST, MachineLearning.BASE_PATH + "data_frame/analytics/{" + + DataFrameAnalyticsConfig.ID.getPreferredName() + "}/_explain", this); + } + + @Override + public String getName() { + return "ml_explain_data_frame_analytics_action"; + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException { + final String jobId = restRequest.param(DataFrameAnalyticsConfig.ID.getPreferredName()); + + if (Strings.isNullOrEmpty(jobId) && restRequest.hasContentOrSourceParam() == false) { + throw ExceptionsHelper.badRequestException("Please provide a job [{}] or the config object", + DataFrameAnalyticsConfig.ID.getPreferredName()); + } + + if (Strings.isNullOrEmpty(jobId) == false && restRequest.hasContentOrSourceParam()) { + throw ExceptionsHelper.badRequestException("Please provide either a job [{}] or the config object but not both", + DataFrameAnalyticsConfig.ID.getPreferredName()); + } + + // We need to consume the body before returning + PutDataFrameAnalyticsAction.Request explainRequestFromBody = Strings.isNullOrEmpty(jobId) ? + PutDataFrameAnalyticsAction.Request.parseRequestForExplain(restRequest.contentOrSourceParamParser()) : null; + + return channel -> { + RestToXContentListener listener = new RestToXContentListener<>(channel); + + if (explainRequestFromBody != null) { + client.execute(ExplainDataFrameAnalyticsAction.INSTANCE, explainRequestFromBody, listener); + } else { + GetDataFrameAnalyticsAction.Request getRequest = new GetDataFrameAnalyticsAction.Request(jobId); + getRequest.setAllowNoResources(false); + client.execute(GetDataFrameAnalyticsAction.INSTANCE, getRequest, ActionListener.wrap( + getResponse -> { + List jobs = getResponse.getResources().results(); + if (jobs.size() > 1) { + listener.onFailure(ExceptionsHelper.badRequestException("expected only one config but matched {}", + jobs.stream().map(DataFrameAnalyticsConfig::getId).collect(Collectors.toList()))); + } else { + PutDataFrameAnalyticsAction.Request explainRequest = new PutDataFrameAnalyticsAction.Request(jobs.get(0)); + client.execute(ExplainDataFrameAnalyticsAction.INSTANCE, explainRequest, listener); + } + }, + listener::onFailure + )); + } + }; + } +} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java index 8f33c9bfbbf..5f7bd650a1c 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.ml.dataframe.extractor; import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.action.fieldcaps.FieldCapabilities; import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse; +import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; import org.elasticsearch.test.ESTestCase; @@ -17,6 +18,7 @@ import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsSource; import org.elasticsearch.xpack.core.ml.dataframe.analyses.Classification; import org.elasticsearch.xpack.core.ml.dataframe.analyses.OutlierDetection; import org.elasticsearch.xpack.core.ml.dataframe.analyses.Regression; +import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection; import org.elasticsearch.xpack.ml.extractor.ExtractedField; import org.elasticsearch.xpack.ml.extractor.ExtractedFields; import org.elasticsearch.xpack.ml.test.SearchHitBuilder; @@ -25,6 +27,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -48,12 +51,15 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List allFields = extractedFields.getAllFields(); + List allFields = fieldExtraction.v1().getAllFields(); assertThat(allFields.size(), equalTo(1)); assertThat(allFields.get(0).getName(), equalTo("some_float")); assertThat(allFields.get(0).getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("some_float", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL)); } public void testDetect_GivenNumericFieldWithMultipleTypes() { @@ -63,12 +69,16 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List allFields = extractedFields.getAllFields(); + List allFields = fieldExtraction.v1().getAllFields(); assertThat(allFields.size(), equalTo(1)); assertThat(allFields.get(0).getName(), equalTo("some_number")); assertThat(allFields.get(0).getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + + assertFieldSelectionContains(fieldExtraction.v2(), FieldSelection.included("some_number", + new HashSet<>(Arrays.asList("long", "integer", "short", "byte", "double", "float", "half_float", "scaled_float")), false, + FieldSelection.FeatureType.NUMERICAL)); } public void testDetect_GivenOutlierDetectionAndNonNumericField() { @@ -105,14 +115,22 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List allFields = extractedFields.getAllFields(); + List allFields = fieldExtraction.v1().getAllFields(); assertThat(allFields.size(), equalTo(3)); assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toSet()), containsInAnyOrder("some_float", "some_long", "some_boolean")); assertThat(allFields.stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), contains(equalTo(ExtractedField.Method.DOC_VALUE))); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("some_boolean", Collections.singleton("boolean"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.included("some_float", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.excluded("some_keyword", Collections.singleton("keyword"), "unsupported type; " + + "supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]"), + FieldSelection.included("some_long", Collections.singleton("long"), false, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenRegressionAndMultipleFields() { @@ -126,14 +144,22 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildRegressionConfig("foo"), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List allFields = extractedFields.getAllFields(); + List allFields = fieldExtraction.v1().getAllFields(); assertThat(allFields.size(), equalTo(5)); assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toList()), containsInAnyOrder("foo", "some_float", "some_keyword", "some_long", "some_boolean")); assertThat(allFields.stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), contains(equalTo(ExtractedField.Method.DOC_VALUE))); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("foo", Collections.singleton("double"), true, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.included("some_boolean", Collections.singleton("boolean"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.included("some_float", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.included("some_keyword", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.included("some_long", Collections.singleton("long"), false, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenRegressionAndRequiredFieldMissing() { @@ -191,11 +217,16 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(analyzedFields), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List allFields = extractedFields.getAllFields(); + List allFields = fieldExtraction.v1().getAllFields(); assertThat(allFields.size(), equalTo(1)); assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toList()), contains("bar")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("bar", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.excluded("foo", Collections.singleton("float"), "field in excludes list") + ); } public void testDetect_GivenRegressionAndRequiredFieldHasInvalidType() { @@ -258,14 +289,15 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { public void testDetect_GivenIncludedIgnoredField() { FieldCapabilitiesResponse fieldCapabilities = new MockFieldCapsResponseBuilder() - .addAggregatableField("_id", "float").build(); + .addAggregatableField("_id", "float") + .build(); FetchSourceContext analyzedFields = new FetchSourceContext(true, new String[]{"_id"}, new String[0]); ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(analyzedFields), false, 100, fieldCapabilities, Collections.emptyMap()); ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, () -> extractedFieldsDetector.detect()); - assertThat(e.getMessage(), equalTo("field [_id] cannot be analyzed")); + assertThat(e.getMessage(), equalTo("No field [_id] could be detected")); } public void testDetect_ShouldSortFieldsAlphabetically() { @@ -285,9 +317,9 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(sortedFields)); } @@ -333,11 +365,17 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(desiredFields), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(Arrays.asList("my_field1", "your_field2"))); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("my_field1", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.excluded("my_field1_nope", Collections.singleton("float"), "field in excludes list"), + FieldSelection.included("your_field2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenIncludedFieldHasUnsupportedType() { @@ -384,11 +422,18 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), true, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(Arrays.asList("my_field1", "your_field2"))); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("my_field1", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.included("your_field2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.excluded("your_keyword", Collections.singleton("keyword"), "unsupported type; supported types " + + "are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]") + ); } public void testDetect_GivenIncludedResultsField() { @@ -434,12 +479,12 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), true, 4, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3"))); - assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), + assertThat(fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), contains(equalTo(ExtractedField.Method.DOC_VALUE))); } @@ -453,12 +498,12 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), true, 3, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3"))); - assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), + assertThat(fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), contains(equalTo(ExtractedField.Method.DOC_VALUE))); } @@ -472,12 +517,12 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), true, 2, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3"))); - assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), + assertThat(fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), contains(equalTo(ExtractedField.Method.SOURCE))); } @@ -488,14 +533,18 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List allFields = extractedFields.getAllFields(); + List allFields = fieldExtraction.v1().getAllFields(); assertThat(allFields.size(), equalTo(1)); ExtractedField booleanField = allFields.get(0); assertThat(booleanField.getTypes(), contains("boolean")); assertThat(booleanField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("some_boolean", Collections.singleton("boolean"), false, FieldSelection.FeatureType.NUMERICAL) + ); + SearchHit hit = new SearchHitBuilder(42).addField("some_boolean", true).build(); assertThat(booleanField.value(hit), arrayContaining(1)); @@ -514,14 +563,18 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildClassificationConfig("some_boolean"), false, 100, fieldCapabilities, Collections.singletonMap("some_boolean", 2L)); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List allFields = extractedFields.getAllFields(); + List allFields = fieldExtraction.v1().getAllFields(); assertThat(allFields.size(), equalTo(1)); ExtractedField booleanField = allFields.get(0); assertThat(booleanField.getTypes(), contains("boolean")); assertThat(booleanField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("some_boolean", Collections.singleton("boolean"), true, FieldSelection.FeatureType.CATEGORICAL) + ); + SearchHit hit = new SearchHitBuilder(42).addField("some_boolean", true).build(); assertThat(booleanField.value(hit), arrayContaining("true")); @@ -546,12 +599,26 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildRegressionConfig("a_float"), true, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(5)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(5)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("a_float", "keyword_1", "text_1.keyword", "text_2.keyword", "text_without_keyword")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("a_float", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.included("keyword_1", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("keyword_1.text", Collections.singleton("text"), + "[keyword_1] is preferred because it is aggregatable"), + FieldSelection.excluded("text_1", Collections.singleton("text"), + "[text_1.keyword] is preferred because it is aggregatable"), + FieldSelection.included("text_1.keyword", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("text_2", Collections.singleton("text"), + "[text_2.keyword] is preferred because it is aggregatable"), + FieldSelection.included("text_2.keyword", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.included("text_without_keyword", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL) + ); } public void testDetect_GivenMultiFieldAndParentIsRequired() { @@ -563,12 +630,19 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildClassificationConfig("field_1"), true, 100, fieldCapabilities, Collections.singletonMap("field_1", 2L)); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(2)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("field_1", "field_2")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("field_1", Collections.singleton("keyword"), true, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("field_1.keyword", Collections.singleton("keyword"), + "[field_1] is required instead"), + FieldSelection.included("field_2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenMultiFieldAndMultiFieldIsRequired() { @@ -581,12 +655,19 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildClassificationConfig("field_1.keyword"), true, 100, fieldCapabilities, Collections.singletonMap("field_1.keyword", 2L)); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(2)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("field_1.keyword", "field_2")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.excluded("field_1", Collections.singleton("keyword"), + "[field_1.keyword] is required instead"), + FieldSelection.included("field_1.keyword", Collections.singleton("keyword"), true, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.included("field_2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenSeveralMultiFields_ShouldPickFirstSorted() { @@ -600,12 +681,21 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildRegressionConfig("field_2"), true, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(2)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("field_1.keyword_1", "field_2")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.excluded("field_1", Collections.singleton("text"), + "[field_1.keyword_1] is preferred because it is aggregatable"), + FieldSelection.included("field_1.keyword_1", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("field_1.keyword_2", Collections.singleton("keyword"), "[field_1.keyword_1] came first"), + FieldSelection.excluded("field_1.keyword_3", Collections.singleton("keyword"), "[field_1.keyword_1] came first"), + FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenMultiFields_OverDocValueLimit() { @@ -617,12 +707,19 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildRegressionConfig("field_2"), true, 0, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(2)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("field_1", "field_2")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("field_1", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("field_1.keyword_1", Collections.singleton("keyword"), + "[field_1] is preferred because it supports fetching from source"), + FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenParentAndMultiFieldBothAggregatable() { @@ -635,12 +732,20 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildRegressionConfig("field_2.double"), true, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(2)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("field_1", "field_2.double")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("field_1", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("field_1.keyword", Collections.singleton("keyword"), + "[field_1] is preferred because it is aggregatable"), + FieldSelection.included("field_2.double", Collections.singleton("double"), true, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.excluded("field_2.keyword", Collections.singleton("float"), "[field_2.double] is required instead") + ); } public void testDetect_GivenParentAndMultiFieldNoneAggregatable() { @@ -652,12 +757,19 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildRegressionConfig("field_2"), true, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(2)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("field_1", "field_2")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("field_1", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("field_1.text", Collections.singleton("text"), + "[field_1] is preferred because none of the multi-fields are aggregatable"), + FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenMultiFields_AndExplicitlyIncludedFields() { @@ -670,12 +782,18 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildRegressionConfig("field_2", analyzedFields), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(2)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("field_1", "field_2")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("field_1", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("field_1.keyword", Collections.singleton("keyword"), "field not in includes list"), + FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL) + ); } private static DataFrameAnalyticsConfig buildOutlierDetectionConfig() { @@ -715,6 +833,21 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { .build(); } + /** + * We assert each field individually to get useful error messages in case of failure + */ + private static void assertFieldSelectionContains(List actual, FieldSelection... expected) { + assertThat(actual.size(), equalTo(expected.length)); + for (int i = 0; i < expected.length; i++) { + assertThat("i = " + i, actual.get(i).getName(), equalTo(expected[i].getName())); + assertThat("i = " + i, actual.get(i).getMappingTypes(), equalTo(expected[i].getMappingTypes())); + assertThat("i = " + i, actual.get(i).isIncluded(), equalTo(expected[i].isIncluded())); + assertThat("i = " + i, actual.get(i).isRequired(), equalTo(expected[i].isRequired())); + assertThat("i = " + i, actual.get(i).getFeatureType(), equalTo(expected[i].getFeatureType())); + assertThat("i = " + i, actual.get(i).getReason(), equalTo(expected[i].getReason())); + } + } + private static class MockFieldCapsResponseBuilder { private final Map> fieldCaps = new HashMap<>(); diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.estimate_memory_usage.json b/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.estimate_memory_usage.json deleted file mode 100644 index 99bd6527de3..00000000000 --- a/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.estimate_memory_usage.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "ml.estimate_memory_usage": { - "documentation": { - "url": "http://www.elastic.co/guide/en/elasticsearch/reference/current/estimate-memory-usage-dfanalytics.html" - }, - "stability": "experimental", - "url": { - "paths" : [ - { - "path" : "/_ml/data_frame/analytics/_estimate_memory_usage", - "methods": [ "POST" ], - "parts": {} - } - ] - }, - "body": { - "description" : "Memory usage estimation definition", - "required" : true - } - } -} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.explain_data_frame_analytics.json b/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.explain_data_frame_analytics.json new file mode 100644 index 00000000000..6969cf9a49f --- /dev/null +++ b/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.explain_data_frame_analytics.json @@ -0,0 +1,31 @@ +{ + "ml.explain_data_frame_analytics": { + "documentation": { + "url": "http://www.elastic.co/guide/en/elasticsearch/reference/current/explain-dfanalytics.html" + }, + "stability": "experimental", + "url": { + "paths" : [ + { + "path" : "/_ml/data_frame/analytics/_explain", + "methods": [ "GET", "POST" ], + "parts": {} + }, + { + "path" : "/_ml/data_frame/analytics/{id}/_explain", + "methods": [ "GET", "POST" ], + "parts":{ + "id":{ + "type":"string", + "description":"The ID of the data frame analytics to explain" + } + } + } + ] + }, + "body": { + "description" : "The data frame analytics config to explain", + "required" : false + } + } +} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml deleted file mode 100644 index 39fe8005fa8..00000000000 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml +++ /dev/null @@ -1,84 +0,0 @@ ---- -setup: - - do: - indices.create: - index: index-source - body: - mappings: - properties: - x: - type: float - y: - type: float - ---- -"Test memory usage estimation for empty data frame": - - do: - catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/ - ml.estimate_memory_usage: - body: - source: { index: "index-source" } - analysis: { outlier_detection: {} } - - - do: - index: - index: index-source - refresh: true - body: { x: 1 } - - match: { result: "created" } - - # Note that value for "y" is missing and outlier detection analysis does not support missing values. - # Hence, the data frame is still considered empty. - - do: - catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/ - ml.estimate_memory_usage: - body: - source: { index: "index-source" } - analysis: { outlier_detection: {} } - ---- -"Test memory usage estimation for non-empty data frame": - - do: - index: - index: index-source - refresh: true - body: { x: 1, y: 10 } - - match: { result: "created" } - - - do: - ml.estimate_memory_usage: - body: - source: { index: "index-source" } - analysis: { outlier_detection: {} } - - match: { expected_memory_without_disk: "3kb" } - - match: { expected_memory_with_disk: "3kb" } - - - do: - index: - index: index-source - refresh: true - body: { x: 2, y: 20 } - - match: { result: "created" } - - - do: - ml.estimate_memory_usage: - body: - source: { index: "index-source" } - analysis: { outlier_detection: {} } - - match: { expected_memory_without_disk: "4kb" } - - match: { expected_memory_with_disk: "4kb" } - - - do: - index: - index: index-source - refresh: true - body: { x: 3, y: 30 } - - match: { result: "created" } - - - do: - ml.estimate_memory_usage: - body: - source: { index: "index-source" } - analysis: { outlier_detection: {} } - - match: { expected_memory_without_disk: "6kb" } - - match: { expected_memory_with_disk: "5kb" } diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/explain_data_frame_analytics.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/explain_data_frame_analytics.yml new file mode 100644 index 00000000000..f4296427256 --- /dev/null +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/explain_data_frame_analytics.yml @@ -0,0 +1,308 @@ +--- +"Test neither job id nor body": + - do: + catch: /Please provide a job \[id\] or the config object/ + ml.explain_data_frame_analytics: + id: "" + +--- +"Test both job id and body": + - do: + catch: /Please provide either a job \[id\] or the config object but not both/ + ml.explain_data_frame_analytics: + id: "foo" + body: + source: { index: "index-source" } + analysis: { outlier_detection: {} } + +--- +"Test missing job": + - do: + catch: missing + ml.explain_data_frame_analytics: + id: "no_such_job" + +--- +"Test id that matches multiple jobs": + + - do: + indices.create: + index: index-source + + - do: + ml.put_data_frame_analytics: + id: "foo-1" + body: > + { + "source": { + "index": "index-source" + }, + "dest": { + "index": "index-dest" + }, + "analysis": {"outlier_detection":{}} + } + + - do: + ml.put_data_frame_analytics: + id: "foo-2" + body: > + { + "source": { + "index": "index-source" + }, + "dest": { + "index": "index-dest" + }, + "analysis": {"outlier_detection":{}} + } + + - do: + catch: /expected only one config but matched \[foo-1, foo-2\]/ + ml.explain_data_frame_analytics: + id: "foo-*" + +--- +"Test empty data frame given body": + + - do: + indices.create: + index: index-source + body: + mappings: + properties: + x: + type: float + y: + type: float + + - do: + catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/ + ml.explain_data_frame_analytics: + body: + source: { index: "index-source" } + analysis: { outlier_detection: {} } + + - do: + index: + index: index-source + refresh: true + body: { x: 1 } + - match: { result: "created" } + + # Note that value for "y" is missing and outlier detection analysis does not support missing values. + # Hence, the data frame is still considered empty. + - do: + catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/ + ml.explain_data_frame_analytics: + body: + source: { index: "index-source" } + analysis: { outlier_detection: {} } + +--- +"Test non-empty data frame given body": + + - do: + indices.create: + index: index-source + body: + mappings: + properties: + x: + type: float + y: + type: float + + - do: + index: + index: index-source + refresh: true + body: { x: 1, y: 10 } + - match: { result: "created" } + + - do: + ml.explain_data_frame_analytics: + body: + source: { index: "index-source" } + analysis: { outlier_detection: {} } + - match: { memory_estimation.expected_memory_without_disk: "3kb" } + - match: { memory_estimation.expected_memory_with_disk: "3kb" } + - length: { field_selection: 2 } + - match: { field_selection.0.name: "x" } + - match: { field_selection.0.mapping_types: ["float"] } + - match: { field_selection.0.is_included: true } + - match: { field_selection.0.is_required: false } + - match: { field_selection.0.feature_type: "numerical" } + - is_false: field_selection.0.reason + - match: { field_selection.1.name: "y" } + - match: { field_selection.1.mapping_types: ["float"] } + - match: { field_selection.1.is_included: true } + - match: { field_selection.1.is_required: false } + - match: { field_selection.1.feature_type: "numerical" } + - is_false: field_selection.1.reason + + - do: + index: + index: index-source + refresh: true + body: { x: 2, y: 20 } + - match: { result: "created" } + + - do: + ml.explain_data_frame_analytics: + body: + source: { index: "index-source" } + analysis: { outlier_detection: {} } + - match: { memory_estimation.expected_memory_without_disk: "4kb" } + - match: { memory_estimation.expected_memory_with_disk: "4kb" } + + - do: + index: + index: index-source + refresh: true + body: { x: 3, y: 30 } + - match: { result: "created" } + + - do: + ml.explain_data_frame_analytics: + body: + source: { index: "index-source" } + analysis: { outlier_detection: {} } + - match: { memory_estimation.expected_memory_without_disk: "6kb" } + - match: { memory_estimation.expected_memory_with_disk: "5kb" } + +--- +"Test field_selection given body": + + - do: + indices.create: + index: index-source + body: + mappings: + properties: + field_1: + type: integer + field_2: + type: double + field_3: + type: date + + - do: + index: + index: index-source + refresh: true + body: { field_1: 3, field_2: 3.14, field_3: "2019-11-11T00:00:00", field_4: "blah" } + - match: { result: "created" } + + - do: + ml.explain_data_frame_analytics: + body: + source: { index: "index-source" } + analysis: { regression: { dependent_variable: "field_1" } } + - is_true: memory_estimation.expected_memory_without_disk + - is_true: memory_estimation.expected_memory_with_disk + - length: { field_selection: 5 } + - match: { field_selection.0.name: "field_1" } + - match: { field_selection.0.mapping_types: ["integer"] } + - match: { field_selection.0.is_included: true } + - match: { field_selection.0.is_required: true } + - match: { field_selection.0.feature_type: "numerical" } + - is_false: field_selection.0.reason + - match: { field_selection.1.name: "field_2" } + - match: { field_selection.1.mapping_types: ["double"] } + - match: { field_selection.1.is_included: true } + - match: { field_selection.1.is_required: false } + - match: { field_selection.1.feature_type: "numerical" } + - is_false: field_selection.1.reason + - match: { field_selection.2.name: "field_3" } + - match: { field_selection.2.mapping_types: ["date"] } + - match: { field_selection.2.is_included: false } + - match: { field_selection.2.is_required: false } + - is_false: field_selection.2.feature_type + - match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text]" } + - match: { field_selection.3.name: "field_4" } + - match: { field_selection.3.mapping_types: ["text"] } + - match: { field_selection.3.is_included: false } + - match: { field_selection.3.is_required: false } + - is_false: field_selection.3.feature_type + - match: { field_selection.3.reason: "[field_4.keyword] is preferred because it is aggregatable" } + - match: { field_selection.4.name: "field_4.keyword" } + - match: { field_selection.4.mapping_types: ["keyword"] } + - match: { field_selection.4.is_included: true } + - match: { field_selection.4.is_required: false } + - match: { field_selection.4.feature_type: "categorical" } + - is_false: field_selection.4.reason + +--- +"Test field_selection given job": + + - do: + indices.create: + index: index-source + body: + mappings: + properties: + field_1: + type: integer + field_2: + type: double + field_3: + type: date + + - do: + index: + index: index-source + refresh: true + body: { field_1: 3, field_2: 3.14, field_3: "2019-11-11T00:00:00", field_4: "blah" } + - match: { result: "created" } + + - do: + ml.put_data_frame_analytics: + id: "got-a-job-for-this-one" + body: > + { + "source": { + "index": "index-source" + }, + "dest": { + "index": "index-dest" + }, + "analysis": {"regression":{ "dependent_variable": "field_1" }} + } + + - do: + ml.explain_data_frame_analytics: + id: "got-a-job-for-this-one" + - is_true: memory_estimation.expected_memory_without_disk + - is_true: memory_estimation.expected_memory_with_disk + - length: { field_selection: 5 } + - match: { field_selection.0.name: "field_1" } + - match: { field_selection.0.mapping_types: ["integer"] } + - match: { field_selection.0.is_included: true } + - match: { field_selection.0.is_required: true } + - match: { field_selection.0.feature_type: "numerical" } + - is_false: field_selection.0.reason + - match: { field_selection.1.name: "field_2" } + - match: { field_selection.1.mapping_types: ["double"] } + - match: { field_selection.1.is_included: true } + - match: { field_selection.1.is_required: false } + - match: { field_selection.1.feature_type: "numerical" } + - is_false: field_selection.1.reason + - match: { field_selection.2.name: "field_3" } + - match: { field_selection.2.mapping_types: ["date"] } + - match: { field_selection.2.is_included: false } + - match: { field_selection.2.is_required: false } + - is_false: field_selection.2.feature_type + - match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text]" } + - match: { field_selection.3.name: "field_4" } + - match: { field_selection.3.mapping_types: ["text"] } + - match: { field_selection.3.is_included: false } + - match: { field_selection.3.is_required: false } + - is_false: field_selection.3.feature_type + - match: { field_selection.3.reason: "[field_4.keyword] is preferred because it is aggregatable" } + - match: { field_selection.4.name: "field_4.keyword" } + - match: { field_selection.4.mapping_types: ["keyword"] } + - match: { field_selection.4.is_included: true } + - match: { field_selection.4.is_required: false } + - match: { field_selection.4.feature_type: "categorical" } + - is_false: field_selection.4.reason