diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java index 2fc23acd134..0a1a18eeb44 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/MLRequestConverters.java @@ -29,6 +29,7 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.client.RequestConverters.EndpointBuilder; import org.elasticsearch.client.core.PageParams; import org.elasticsearch.client.ml.CloseJobRequest; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest; import org.elasticsearch.client.ml.DeleteCalendarEventRequest; import org.elasticsearch.client.ml.DeleteCalendarJobRequest; import org.elasticsearch.client.ml.DeleteCalendarRequest; @@ -701,12 +702,17 @@ final class MLRequestConverters { return request; } - static Request estimateMemoryUsage(PutDataFrameAnalyticsRequest estimateRequest) throws IOException { - String endpoint = new EndpointBuilder() - .addPathPartAsIs("_ml", "data_frame", "analytics", "_estimate_memory_usage") - .build(); - Request request = new Request(HttpPost.METHOD_NAME, endpoint); - request.setEntity(createEntity(estimateRequest, REQUEST_BODY_CONTENT_TYPE)); + static Request explainDataFrameAnalytics(ExplainDataFrameAnalyticsRequest explainRequest) throws IOException { + EndpointBuilder endpoint = new EndpointBuilder().addPathPartAsIs("_ml", "data_frame", "analytics"); + if (explainRequest.getId() != null) { + endpoint.addPathPart(explainRequest.getId()); + } + endpoint.addPathPartAsIs("_explain"); + + Request request = new Request(HttpPost.METHOD_NAME, endpoint.build()); + if (explainRequest.getConfig() != null) { + request.setEntity(createEntity(explainRequest.getConfig(), REQUEST_BODY_CONTENT_TYPE)); + } return request; } diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java index 2ddc8839f96..468cd535c01 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java @@ -22,6 +22,8 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.client.ml.CloseJobRequest; import org.elasticsearch.client.ml.CloseJobResponse; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsResponse; import org.elasticsearch.client.ml.DeleteCalendarEventRequest; import org.elasticsearch.client.ml.DeleteCalendarJobRequest; import org.elasticsearch.client.ml.DeleteCalendarRequest; @@ -34,7 +36,6 @@ import org.elasticsearch.client.ml.DeleteForecastRequest; import org.elasticsearch.client.ml.DeleteJobRequest; import org.elasticsearch.client.ml.DeleteJobResponse; import org.elasticsearch.client.ml.DeleteModelSnapshotRequest; -import org.elasticsearch.client.ml.EstimateMemoryUsageResponse; import org.elasticsearch.client.ml.EvaluateDataFrameRequest; import org.elasticsearch.client.ml.EvaluateDataFrameResponse; import org.elasticsearch.client.ml.FindFileStructureRequest; @@ -2249,46 +2250,46 @@ public final class MachineLearningClient { } /** - * Estimates memory usage for the given Data Frame Analytics + * Explains the given Data Frame Analytics *

* For additional info - * see - * Estimate Memory Usage for Data Frame Analytics documentation + * see + * Explain Data Frame Analytics documentation * - * @param request The {@link PutDataFrameAnalyticsRequest} + * @param request The {@link ExplainDataFrameAnalyticsRequest} * @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized - * @return {@link EstimateMemoryUsageResponse} response object + * @return {@link ExplainDataFrameAnalyticsResponse} response object * @throws IOException when there is a serialization issue sending the request or receiving the response */ - public EstimateMemoryUsageResponse estimateMemoryUsage(PutDataFrameAnalyticsRequest request, - RequestOptions options) throws IOException { + public ExplainDataFrameAnalyticsResponse explainDataFrameAnalytics(ExplainDataFrameAnalyticsRequest request, + RequestOptions options) throws IOException { return restHighLevelClient.performRequestAndParseEntity( request, - MLRequestConverters::estimateMemoryUsage, + MLRequestConverters::explainDataFrameAnalytics, options, - EstimateMemoryUsageResponse::fromXContent, + ExplainDataFrameAnalyticsResponse::fromXContent, Collections.emptySet()); } /** - * Estimates memory usage for the given Data Frame Analytics asynchronously and notifies listener upon completion + * Explains the given Data Frame Analytics asynchronously and notifies listener upon completion *

* For additional info - * see - * Estimate Memory Usage for Data Frame Analytics documentation + * see + * Explain Data Frame Analytics documentation * - * @param request The {@link PutDataFrameAnalyticsRequest} + * @param request The {@link ExplainDataFrameAnalyticsRequest} * @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized * @param listener Listener to be notified upon request completion * @return cancellable that may be used to cancel the request */ - public Cancellable estimateMemoryUsageAsync(PutDataFrameAnalyticsRequest request, RequestOptions options, - ActionListener listener) { + public Cancellable explainDataFrameAnalyticsAsync(ExplainDataFrameAnalyticsRequest request, RequestOptions options, + ActionListener listener) { return restHighLevelClient.performRequestAsyncAndParseEntity( request, - MLRequestConverters::estimateMemoryUsage, + MLRequestConverters::explainDataFrameAnalytics, options, - EstimateMemoryUsageResponse::fromXContent, + ExplainDataFrameAnalyticsResponse::fromXContent, listener, Collections.emptySet()); } diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequest.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequest.java new file mode 100644 index 00000000000..880e87b2eea --- /dev/null +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequest.java @@ -0,0 +1,72 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.client.ml; + +import org.elasticsearch.client.Validatable; +import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsConfig; +import org.elasticsearch.common.Nullable; + +import java.util.Objects; + +/** + * Request to explain the following about a data frame analytics job: + *

+ */ +public class ExplainDataFrameAnalyticsRequest implements Validatable { + + private final String id; + private final DataFrameAnalyticsConfig config; + + public ExplainDataFrameAnalyticsRequest(String id) { + this.id = Objects.requireNonNull(id); + this.config = null; + } + + public ExplainDataFrameAnalyticsRequest(DataFrameAnalyticsConfig config) { + this.id = null; + this.config = Objects.requireNonNull(config); + } + + @Nullable + public String getId() { + return id; + } + + @Nullable + public DataFrameAnalyticsConfig getConfig() { + return config; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + ExplainDataFrameAnalyticsRequest other = (ExplainDataFrameAnalyticsRequest) o; + return Objects.equals(id, other.id) && Objects.equals(config, other.config); + } + + @Override + public int hashCode() { + return Objects.hash(id, config); + } +} diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponse.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponse.java new file mode 100644 index 00000000000..5879ffc7154 --- /dev/null +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponse.java @@ -0,0 +1,94 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.client.ml; + +import org.elasticsearch.client.ml.dataframe.explain.FieldSelection; +import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +public class ExplainDataFrameAnalyticsResponse implements ToXContentObject { + + public static final ParseField TYPE = new ParseField("explain_data_frame_analytics_response"); + + public static final ParseField FIELD_SELECTION = new ParseField("field_selection"); + public static final ParseField MEMORY_ESTIMATION = new ParseField("memory_estimation"); + + public static ExplainDataFrameAnalyticsResponse fromXContent(XContentParser parser) throws IOException { + return PARSER.parse(parser, null); + } + + @SuppressWarnings("unchecked") + static final ConstructingObjectParser PARSER = + new ConstructingObjectParser<>( + TYPE.getPreferredName(), true, + args -> new ExplainDataFrameAnalyticsResponse((List) args[0], (MemoryEstimation) args[1])); + + static { + PARSER.declareObjectArray(ConstructingObjectParser.constructorArg(), FieldSelection.PARSER, FIELD_SELECTION); + PARSER.declareObject(ConstructingObjectParser.constructorArg(), MemoryEstimation.PARSER, MEMORY_ESTIMATION); + } + + private final List fieldSelection; + private final MemoryEstimation memoryEstimation; + + public ExplainDataFrameAnalyticsResponse(List fieldSelection, MemoryEstimation memoryEstimation) { + this.fieldSelection = Objects.requireNonNull(fieldSelection); + this.memoryEstimation = Objects.requireNonNull(memoryEstimation); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(FIELD_SELECTION.getPreferredName(), fieldSelection); + builder.field(MEMORY_ESTIMATION.getPreferredName(), memoryEstimation); + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object other) { + if (this == other) return true; + if (other == null || getClass() != other.getClass()) return false; + + ExplainDataFrameAnalyticsResponse that = (ExplainDataFrameAnalyticsResponse) other; + return Objects.equals(fieldSelection, that.fieldSelection) + && Objects.equals(memoryEstimation, that.memoryEstimation); + } + + @Override + public int hashCode() { + return Objects.hash(fieldSelection, memoryEstimation); + } + + public MemoryEstimation getMemoryEstimation() { + return memoryEstimation; + } + + public List getFieldSelection() { + return fieldSelection; + } +} diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelection.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelection.java new file mode 100644 index 00000000000..4483b6fa5e0 --- /dev/null +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelection.java @@ -0,0 +1,163 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.client.ml.dataframe.explain; + +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.ObjectParser; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Objects; +import java.util.Set; + +public class FieldSelection implements ToXContentObject { + + private static final ParseField NAME = new ParseField("name"); + private static final ParseField MAPPING_TYPES = new ParseField("mapping_types"); + private static final ParseField IS_INCLUDED = new ParseField("is_included"); + private static final ParseField IS_REQUIRED = new ParseField("is_required"); + private static final ParseField FEATURE_TYPE = new ParseField("feature_type"); + private static final ParseField REASON = new ParseField("reason"); + + public enum FeatureType { + CATEGORICAL, NUMERICAL; + + public static FeatureType fromString(String value) { + return FeatureType.valueOf(value.toUpperCase(Locale.ROOT)); + } + + @Override + public String toString() { + return name().toLowerCase(Locale.ROOT); + } + } + + @SuppressWarnings("unchecked") + public static ConstructingObjectParser PARSER = new ConstructingObjectParser<>("field_selection", true, + a -> new FieldSelection((String) a[0], new HashSet<>((List) a[1]), (boolean) a[2], (boolean) a[3], (FeatureType) a[4], + (String) a[5])); + + static { + PARSER.declareString(ConstructingObjectParser.constructorArg(), NAME); + PARSER.declareStringArray(ConstructingObjectParser.constructorArg(), MAPPING_TYPES); + PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_INCLUDED); + PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_REQUIRED); + PARSER.declareField(ConstructingObjectParser.optionalConstructorArg(), p -> { + if (p.currentToken() == XContentParser.Token.VALUE_STRING) { + return FeatureType.fromString(p.text()); + } + throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]"); + }, FEATURE_TYPE, ObjectParser.ValueType.STRING); + PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), REASON); + } + + private final String name; + private final Set mappingTypes; + private final boolean isIncluded; + private final boolean isRequired; + private final FeatureType featureType; + private final String reason; + + public static FieldSelection included(String name, Set mappingTypes, boolean isRequired, FeatureType featureType) { + return new FieldSelection(name, mappingTypes, true, isRequired, featureType, null); + } + + public static FieldSelection excluded(String name, Set mappingTypes, String reason) { + return new FieldSelection(name, mappingTypes, false, false, null, reason); + } + + FieldSelection(String name, Set mappingTypes, boolean isIncluded, boolean isRequired, @Nullable FeatureType featureType, + @Nullable String reason) { + this.name = Objects.requireNonNull(name); + this.mappingTypes = Collections.unmodifiableSet(mappingTypes); + this.isIncluded = isIncluded; + this.isRequired = isRequired; + this.featureType = featureType; + this.reason = reason; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(NAME.getPreferredName(), name); + builder.field(MAPPING_TYPES.getPreferredName(), mappingTypes); + builder.field(IS_INCLUDED.getPreferredName(), isIncluded); + builder.field(IS_REQUIRED.getPreferredName(), isRequired); + if (featureType != null) { + builder.field(FEATURE_TYPE.getPreferredName(), featureType); + } + if (reason != null) { + builder.field(REASON.getPreferredName(), reason); + } + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + FieldSelection that = (FieldSelection) o; + return Objects.equals(name, that.name) + && Objects.equals(mappingTypes, that.mappingTypes) + && isIncluded == that.isIncluded + && isRequired == that.isRequired + && Objects.equals(featureType, that.featureType) + && Objects.equals(reason, that.reason); + } + + @Override + public int hashCode() { + return Objects.hash(name, mappingTypes, isIncluded, isRequired, featureType, reason); + } + + public String getName() { + return name; + } + + public Set getMappingTypes() { + return mappingTypes; + } + + public boolean isIncluded() { + return isIncluded; + } + + public boolean isRequired() { + return isRequired; + } + + @Nullable + public FeatureType getFeatureType() { + return featureType; + } + + @Nullable + public String getReason() { + return reason; + } +} diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponse.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimation.java similarity index 81% rename from client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponse.java rename to client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimation.java index c97cc545cdb..9151b8ce5dd 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponse.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimation.java @@ -16,8 +16,7 @@ * specific language governing permissions and limitations * under the License. */ - -package org.elasticsearch.client.ml; +package org.elasticsearch.client.ml.dataframe.explain; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.ParseField; @@ -26,23 +25,19 @@ import org.elasticsearch.common.xcontent.ConstructingObjectParser; import org.elasticsearch.common.xcontent.ObjectParser; import org.elasticsearch.common.xcontent.ToXContentObject; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentParser; import java.io.IOException; import java.util.Objects; import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; -public class EstimateMemoryUsageResponse implements ToXContentObject { - +public class MemoryEstimation implements ToXContentObject { + public static final ParseField EXPECTED_MEMORY_WITHOUT_DISK = new ParseField("expected_memory_without_disk"); public static final ParseField EXPECTED_MEMORY_WITH_DISK = new ParseField("expected_memory_with_disk"); - static final ConstructingObjectParser PARSER = - new ConstructingObjectParser<>( - "estimate_memory_usage_response", - true, - args -> new EstimateMemoryUsageResponse((ByteSizeValue) args[0], (ByteSizeValue) args[1])); + public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>("memory_estimation", true, + a -> new MemoryEstimation((ByteSizeValue) a[0], (ByteSizeValue) a[1])); static { PARSER.declareField( @@ -57,14 +52,10 @@ public class EstimateMemoryUsageResponse implements ToXContentObject { ObjectParser.ValueType.VALUE); } - public static EstimateMemoryUsageResponse fromXContent(XContentParser parser) { - return PARSER.apply(parser, null); - } - private final ByteSizeValue expectedMemoryWithoutDisk; private final ByteSizeValue expectedMemoryWithDisk; - public EstimateMemoryUsageResponse(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) { + public MemoryEstimation(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) { this.expectedMemoryWithoutDisk = expectedMemoryWithoutDisk; this.expectedMemoryWithDisk = expectedMemoryWithDisk; } @@ -99,7 +90,7 @@ public class EstimateMemoryUsageResponse implements ToXContentObject { return false; } - EstimateMemoryUsageResponse that = (EstimateMemoryUsageResponse) other; + MemoryEstimation that = (MemoryEstimation) other; return Objects.equals(expectedMemoryWithoutDisk, that.expectedMemoryWithoutDisk) && Objects.equals(expectedMemoryWithDisk, that.expectedMemoryWithDisk); } diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java index db59054cdb8..633e5363ff1 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/MLRequestConvertersTests.java @@ -25,6 +25,7 @@ import org.apache.http.client.methods.HttpPost; import org.apache.http.client.methods.HttpPut; import org.elasticsearch.client.core.PageParams; import org.elasticsearch.client.ml.CloseJobRequest; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest; import org.elasticsearch.client.ml.DeleteCalendarEventRequest; import org.elasticsearch.client.ml.DeleteCalendarJobRequest; import org.elasticsearch.client.ml.DeleteCalendarRequest; @@ -788,14 +789,25 @@ public class MLRequestConvertersTests extends ESTestCase { } } - public void testEstimateMemoryUsage() throws IOException { - PutDataFrameAnalyticsRequest estimateRequest = new PutDataFrameAnalyticsRequest(randomDataFrameAnalyticsConfig()); - Request request = MLRequestConverters.estimateMemoryUsage(estimateRequest); - assertEquals(HttpPost.METHOD_NAME, request.getMethod()); - assertEquals("/_ml/data_frame/analytics/_estimate_memory_usage", request.getEndpoint()); - try (XContentParser parser = createParser(JsonXContent.jsonXContent, request.getEntity().getContent())) { - DataFrameAnalyticsConfig parsedConfig = DataFrameAnalyticsConfig.fromXContent(parser); - assertThat(parsedConfig, equalTo(estimateRequest.getConfig())); + public void testExplainDataFrameAnalytics() throws IOException { + // Request with config + { + ExplainDataFrameAnalyticsRequest estimateRequest = new ExplainDataFrameAnalyticsRequest(randomDataFrameAnalyticsConfig()); + Request request = MLRequestConverters.explainDataFrameAnalytics(estimateRequest); + assertEquals(HttpPost.METHOD_NAME, request.getMethod()); + assertEquals("/_ml/data_frame/analytics/_explain", request.getEndpoint()); + try (XContentParser parser = createParser(JsonXContent.jsonXContent, request.getEntity().getContent())) { + DataFrameAnalyticsConfig parsedConfig = DataFrameAnalyticsConfig.fromXContent(parser); + assertThat(parsedConfig, equalTo(estimateRequest.getConfig())); + } + } + // Request with id + { + ExplainDataFrameAnalyticsRequest estimateRequest = new ExplainDataFrameAnalyticsRequest("foo"); + Request request = MLRequestConverters.explainDataFrameAnalytics(estimateRequest); + assertEquals(HttpPost.METHOD_NAME, request.getMethod()); + assertEquals("/_ml/data_frame/analytics/foo/_explain", request.getEndpoint()); + assertNull(request.getEntity()); } } diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java index 361b3674550..efb62b3f526 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java @@ -32,6 +32,8 @@ import org.elasticsearch.client.indices.CreateIndexRequest; import org.elasticsearch.client.indices.GetIndexRequest; import org.elasticsearch.client.ml.CloseJobRequest; import org.elasticsearch.client.ml.CloseJobResponse; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsResponse; import org.elasticsearch.client.ml.DeleteCalendarEventRequest; import org.elasticsearch.client.ml.DeleteCalendarJobRequest; import org.elasticsearch.client.ml.DeleteCalendarRequest; @@ -44,7 +46,6 @@ import org.elasticsearch.client.ml.DeleteForecastRequest; import org.elasticsearch.client.ml.DeleteJobRequest; import org.elasticsearch.client.ml.DeleteJobResponse; import org.elasticsearch.client.ml.DeleteModelSnapshotRequest; -import org.elasticsearch.client.ml.EstimateMemoryUsageResponse; import org.elasticsearch.client.ml.EvaluateDataFrameRequest; import org.elasticsearch.client.ml.EvaluateDataFrameResponse; import org.elasticsearch.client.ml.FindFileStructureRequest; @@ -140,6 +141,8 @@ import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.Binar import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric; import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetric; import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetric; +import org.elasticsearch.client.ml.dataframe.explain.FieldSelection; +import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation; import org.elasticsearch.client.ml.filestructurefinder.FileStructure; import org.elasticsearch.client.ml.inference.TrainedModelConfig; import org.elasticsearch.client.ml.inference.TrainedModelDefinition; @@ -1996,8 +1999,8 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase { highLevelClient().indices().create(new CreateIndexRequest(indexName).mapping(mapping), RequestOptions.DEFAULT); } - public void testEstimateMemoryUsage() throws IOException { - String indexName = "estimate-test-index"; + public void testExplainDataFrameAnalytics() throws IOException { + String indexName = "explain-df-test-index"; createIndex(indexName, mappingForSoftClassification()); BulkRequest bulk1 = new BulkRequest() .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); @@ -2007,8 +2010,8 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase { highLevelClient().bulk(bulk1, RequestOptions.DEFAULT); MachineLearningClient machineLearningClient = highLevelClient().machineLearning(); - PutDataFrameAnalyticsRequest estimateMemoryUsageRequest = - new PutDataFrameAnalyticsRequest( + ExplainDataFrameAnalyticsRequest explainRequest = + new ExplainDataFrameAnalyticsRequest( DataFrameAnalyticsConfig.builder() .setSource(DataFrameAnalyticsSource.builder().setIndex(indexName).build()) .setAnalysis(OutlierDetection.createDefault()) @@ -2019,11 +2022,16 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase { ByteSizeValue upperBound = new ByteSizeValue(1, ByteSizeUnit.GB); // Data Frame has 10 rows, expect that the returned estimates fall within (1kB, 1GB) range. - EstimateMemoryUsageResponse response1 = - execute( - estimateMemoryUsageRequest, machineLearningClient::estimateMemoryUsage, machineLearningClient::estimateMemoryUsageAsync); - assertThat(response1.getExpectedMemoryWithoutDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound))); - assertThat(response1.getExpectedMemoryWithDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound))); + ExplainDataFrameAnalyticsResponse response1 = execute(explainRequest, machineLearningClient::explainDataFrameAnalytics, + machineLearningClient::explainDataFrameAnalyticsAsync); + + MemoryEstimation memoryEstimation1 = response1.getMemoryEstimation(); + assertThat(memoryEstimation1.getExpectedMemoryWithoutDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound))); + assertThat(memoryEstimation1.getExpectedMemoryWithDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound))); + + List fieldSelection = response1.getFieldSelection(); + assertThat(fieldSelection.size(), equalTo(3)); + assertThat(fieldSelection.stream().map(FieldSelection::getName).collect(Collectors.toList()), contains("dataset", "label", "p")); BulkRequest bulk2 = new BulkRequest() .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); @@ -2033,15 +2041,16 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase { highLevelClient().bulk(bulk2, RequestOptions.DEFAULT); // Data Frame now has 100 rows, expect that the returned estimates will be greater than or equal to the previous ones. - EstimateMemoryUsageResponse response2 = + ExplainDataFrameAnalyticsResponse response2 = execute( - estimateMemoryUsageRequest, machineLearningClient::estimateMemoryUsage, machineLearningClient::estimateMemoryUsageAsync); + explainRequest, machineLearningClient::explainDataFrameAnalytics, machineLearningClient::explainDataFrameAnalyticsAsync); + MemoryEstimation memoryEstimation2 = response2.getMemoryEstimation(); assertThat( - response2.getExpectedMemoryWithoutDisk(), - allOf(greaterThanOrEqualTo(response1.getExpectedMemoryWithoutDisk()), lessThan(upperBound))); + memoryEstimation2.getExpectedMemoryWithoutDisk(), + allOf(greaterThanOrEqualTo(memoryEstimation1.getExpectedMemoryWithoutDisk()), lessThan(upperBound))); assertThat( - response2.getExpectedMemoryWithDisk(), - allOf(greaterThanOrEqualTo(response1.getExpectedMemoryWithDisk()), lessThan(upperBound))); + memoryEstimation2.getExpectedMemoryWithDisk(), + allOf(greaterThanOrEqualTo(memoryEstimation1.getExpectedMemoryWithDisk()), lessThan(upperBound))); } public void testGetTrainedModels() throws Exception { diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java index da12420535f..8a118672d95 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java @@ -36,6 +36,8 @@ import org.elasticsearch.client.core.PageParams; import org.elasticsearch.client.indices.CreateIndexRequest; import org.elasticsearch.client.ml.CloseJobRequest; import org.elasticsearch.client.ml.CloseJobResponse; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest; +import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsResponse; import org.elasticsearch.client.ml.DeleteCalendarEventRequest; import org.elasticsearch.client.ml.DeleteCalendarJobRequest; import org.elasticsearch.client.ml.DeleteCalendarRequest; @@ -48,7 +50,6 @@ import org.elasticsearch.client.ml.DeleteForecastRequest; import org.elasticsearch.client.ml.DeleteJobRequest; import org.elasticsearch.client.ml.DeleteJobResponse; import org.elasticsearch.client.ml.DeleteModelSnapshotRequest; -import org.elasticsearch.client.ml.EstimateMemoryUsageResponse; import org.elasticsearch.client.ml.EvaluateDataFrameRequest; import org.elasticsearch.client.ml.EvaluateDataFrameResponse; import org.elasticsearch.client.ml.FindFileStructureRequest; @@ -155,6 +156,8 @@ import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.Confu import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric.ConfusionMatrix; import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetric; import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetric; +import org.elasticsearch.client.ml.dataframe.explain.FieldSelection; +import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation; import org.elasticsearch.client.ml.filestructurefinder.FileStructure; import org.elasticsearch.client.ml.inference.TrainedModelConfig; import org.elasticsearch.client.ml.inference.TrainedModelDefinition; @@ -213,6 +216,7 @@ import java.util.zip.GZIPOutputStream; import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.closeTo; +import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; @@ -3460,10 +3464,10 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase { } } - public void testEstimateMemoryUsage() throws Exception { - createIndex("estimate-test-source-index"); + public void testExplainDataFrameAnalytics() throws Exception { + createIndex("explain-df-test-source-index"); BulkRequest bulkRequest = - new BulkRequest("estimate-test-source-index") + new BulkRequest("explain-df-test-source-index") .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); for (int i = 0; i < 10; ++i) { bulkRequest.add(new IndexRequest().source(XContentType.JSON, "timestamp", 123456789L, "total", 10L)); @@ -3471,22 +3475,33 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase { RestHighLevelClient client = highLevelClient(); client.bulk(bulkRequest, RequestOptions.DEFAULT); { - // tag::estimate-memory-usage-request + // tag::explain-data-frame-analytics-id-request + ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest("existing_job_id"); // <1> + // end::explain-data-frame-analytics-id-request + + // tag::explain-data-frame-analytics-config-request DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder() - .setSource(DataFrameAnalyticsSource.builder().setIndex("estimate-test-source-index").build()) + .setSource(DataFrameAnalyticsSource.builder().setIndex("explain-df-test-source-index").build()) .setAnalysis(OutlierDetection.createDefault()) .build(); - PutDataFrameAnalyticsRequest request = new PutDataFrameAnalyticsRequest(config); // <1> - // end::estimate-memory-usage-request + request = new ExplainDataFrameAnalyticsRequest(config); // <1> + // end::explain-data-frame-analytics-config-request - // tag::estimate-memory-usage-execute - EstimateMemoryUsageResponse response = client.machineLearning().estimateMemoryUsage(request, RequestOptions.DEFAULT); - // end::estimate-memory-usage-execute + // tag::explain-data-frame-analytics-execute + ExplainDataFrameAnalyticsResponse response = client.machineLearning().explainDataFrameAnalytics(request, + RequestOptions.DEFAULT); + // end::explain-data-frame-analytics-execute - // tag::estimate-memory-usage-response - ByteSizeValue expectedMemoryWithoutDisk = response.getExpectedMemoryWithoutDisk(); // <1> - ByteSizeValue expectedMemoryWithDisk = response.getExpectedMemoryWithDisk(); // <2> - // end::estimate-memory-usage-response + // tag::explain-data-frame-analytics-response + List fieldSelection = response.getFieldSelection(); // <1> + MemoryEstimation memoryEstimation = response.getMemoryEstimation(); // <2> + // end::explain-data-frame-analytics-response + + assertThat(fieldSelection.size(), equalTo(2)); + assertThat(fieldSelection.stream().map(FieldSelection::getName).collect(Collectors.toList()), contains("timestamp", "total")); + + ByteSizeValue expectedMemoryWithoutDisk = memoryEstimation.getExpectedMemoryWithoutDisk(); // <1> + ByteSizeValue expectedMemoryWithDisk = memoryEstimation.getExpectedMemoryWithDisk(); // <2> // We are pretty liberal here as this test does not aim at verifying concrete numbers but rather end-to-end user workflow. ByteSizeValue lowerBound = new ByteSizeValue(1, ByteSizeUnit.KB); @@ -3496,14 +3511,14 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase { } { DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder() - .setSource(DataFrameAnalyticsSource.builder().setIndex("estimate-test-source-index").build()) + .setSource(DataFrameAnalyticsSource.builder().setIndex("explain-df-test-source-index").build()) .setAnalysis(OutlierDetection.createDefault()) .build(); - PutDataFrameAnalyticsRequest request = new PutDataFrameAnalyticsRequest(config); - // tag::estimate-memory-usage-execute-listener - ActionListener listener = new ActionListener() { + ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest(config); + // tag::explain-data-frame-analytics-execute-listener + ActionListener listener = new ActionListener() { @Override - public void onResponse(EstimateMemoryUsageResponse response) { + public void onResponse(ExplainDataFrameAnalyticsResponse response) { // <1> } @@ -3512,15 +3527,15 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase { // <2> } }; - // end::estimate-memory-usage-execute-listener + // end::explain-data-frame-analytics-execute-listener // Replace the empty listener by a blocking listener in test final CountDownLatch latch = new CountDownLatch(1); listener = new LatchedActionListener<>(listener, latch); - // tag::estimate-memory-usage-execute-async - client.machineLearning().estimateMemoryUsageAsync(request, RequestOptions.DEFAULT, listener); // <1> - // end::estimate-memory-usage-execute-async + // tag::explain-data-frame-analytics-execute-async + client.machineLearning().explainDataFrameAnalyticsAsync(request, RequestOptions.DEFAULT, listener); // <1> + // end::explain-data-frame-analytics-execute-async assertTrue(latch.await(30L, TimeUnit.SECONDS)); } diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequestTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequestTests.java new file mode 100644 index 00000000000..7273a40e298 --- /dev/null +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsRequestTests.java @@ -0,0 +1,44 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.client.ml; + +import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsConfig; +import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsConfigTests; +import org.elasticsearch.test.ESTestCase; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.nullValue; + +public class ExplainDataFrameAnalyticsRequestTests extends ESTestCase { + + public void testIdConstructor() { + ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest("foo"); + assertThat(request.getId(), equalTo("foo")); + assertThat(request.getConfig(), is(nullValue())); + } + + public void testConfigConstructor() { + DataFrameAnalyticsConfig config = DataFrameAnalyticsConfigTests.randomDataFrameAnalyticsConfig(); + + ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest(config); + assertThat(request.getId(), is(nullValue())); + assertThat(request.getConfig(), equalTo(config)); + } +} diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponseTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponseTests.java new file mode 100644 index 00000000000..f4adbd09ba7 --- /dev/null +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/ExplainDataFrameAnalyticsResponseTests.java @@ -0,0 +1,54 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.client.ml; + +import org.elasticsearch.client.ml.dataframe.explain.FieldSelection; +import org.elasticsearch.client.ml.dataframe.explain.FieldSelectionTests; +import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation; +import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimationTests; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.test.AbstractXContentTestCase; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.IntStream; + +public class ExplainDataFrameAnalyticsResponseTests extends AbstractXContentTestCase { + + @Override + protected ExplainDataFrameAnalyticsResponse createTestInstance() { + int fieldSelectionCount = randomIntBetween(1, 5); + List fieldSelection = new ArrayList<>(fieldSelectionCount); + IntStream.of(fieldSelectionCount).forEach(i -> fieldSelection.add(FieldSelectionTests.createRandom())); + MemoryEstimation memoryEstimation = MemoryEstimationTests.createRandom(); + + return new ExplainDataFrameAnalyticsResponse(fieldSelection, memoryEstimation); + } + + @Override + protected ExplainDataFrameAnalyticsResponse doParseInstance(XContentParser parser) throws IOException { + return ExplainDataFrameAnalyticsResponse.fromXContent(parser); + } + + @Override + protected boolean supportsUnknownFields() { + return true; + } +} diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelectionTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelectionTests.java new file mode 100644 index 00000000000..e76f39b5b85 --- /dev/null +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/FieldSelectionTests.java @@ -0,0 +1,57 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.client.ml.dataframe.explain; + +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.test.AbstractXContentTestCase; + +import java.io.IOException; +import java.util.Set; +import java.util.stream.Collectors; + +public class FieldSelectionTests extends AbstractXContentTestCase { + + public static FieldSelection createRandom() { + Set mappingTypes = randomSubsetOf(randomIntBetween(1, 3), "int", "float", "double", "text", "keyword", "ip") + .stream().collect(Collectors.toSet()); + FieldSelection.FeatureType featureType = randomBoolean() ? null : randomFrom(FieldSelection.FeatureType.values()); + String reason = randomBoolean() ? null : randomAlphaOfLength(20); + return new FieldSelection(randomAlphaOfLength(10), + mappingTypes, + randomBoolean(), + randomBoolean(), + featureType, + reason); + } + + @Override + protected FieldSelection createTestInstance() { + return createRandom(); + } + + @Override + protected FieldSelection doParseInstance(XContentParser parser) throws IOException { + return FieldSelection.PARSER.apply(parser, null); + } + + @Override + protected boolean supportsUnknownFields() { + return true; + } +} diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponseTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimationTests.java similarity index 68% rename from client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponseTests.java rename to client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimationTests.java index f8f2746204d..884736e573e 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/EstimateMemoryUsageResponseTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/explain/MemoryEstimationTests.java @@ -7,7 +7,7 @@ * not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.client.ml; +package org.elasticsearch.client.ml.dataframe.explain; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.xcontent.XContentParser; @@ -24,22 +24,22 @@ import org.elasticsearch.test.AbstractXContentTestCase; import java.io.IOException; -public class EstimateMemoryUsageResponseTests extends AbstractXContentTestCase { +public class MemoryEstimationTests extends AbstractXContentTestCase { - public static EstimateMemoryUsageResponse randomResponse() { - return new EstimateMemoryUsageResponse( + public static MemoryEstimation createRandom() { + return new MemoryEstimation( randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null, randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null); } @Override - protected EstimateMemoryUsageResponse createTestInstance() { - return randomResponse(); + protected MemoryEstimation createTestInstance() { + return createRandom(); } @Override - protected EstimateMemoryUsageResponse doParseInstance(XContentParser parser) throws IOException { - return EstimateMemoryUsageResponse.fromXContent(parser); + protected MemoryEstimation doParseInstance(XContentParser parser) throws IOException { + return MemoryEstimation.PARSER.apply(parser, null); } @Override diff --git a/docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc b/docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc deleted file mode 100644 index 8b7ae0f55c8..00000000000 --- a/docs/java-rest/high-level/ml/estimate-memory-usage.asciidoc +++ /dev/null @@ -1,36 +0,0 @@ --- -:api: estimate-memory-usage -:request: PutDataFrameAnalyticsRequest -:response: EstimateMemoryUsageResponse --- -[role="xpack"] -[id="{upid}-{api}"] -=== Estimate memory usage API - -Estimates memory usage of {dfanalytics}. -Estimation results can be used when deciding the appropriate value for `model_memory_limit` setting later on. - -The API accepts an +{request}+ object and returns an +{response}+. - -[id="{upid}-{api}-request"] -==== Estimate memory usage request - -["source","java",subs="attributes,callouts,macros"] --------------------------------------------------- -include-tagged::{doc-tests-file}[{api}-request] --------------------------------------------------- -<1> Constructing a new request containing a {dataframe-analytics-config} for which memory usage estimation should be performed - -include::../execution.asciidoc[] - -[id="{upid}-{api}-response"] -==== Response - -The returned +{response}+ contains the memory usage estimates. - -["source","java",subs="attributes,callouts,macros"] --------------------------------------------------- -include-tagged::{doc-tests-file}[{api}-response] --------------------------------------------------- -<1> Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory (i.e. without overflowing to disk). -<2> Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}. \ No newline at end of file diff --git a/docs/java-rest/high-level/ml/explain-data-frame-analytics.asciidoc b/docs/java-rest/high-level/ml/explain-data-frame-analytics.asciidoc new file mode 100644 index 00000000000..3c41531d222 --- /dev/null +++ b/docs/java-rest/high-level/ml/explain-data-frame-analytics.asciidoc @@ -0,0 +1,48 @@ +-- +:api: explain-data-frame-analytics +:request: ExplainDataFrameAnalyticsRequest +:response: ExplainDataFrameAnalyticsResponse +-- +[role="xpack"] +[id="{upid}-{api}"] +=== Explain {dfanalytics}} API + +Explains the following about a {dataframe-analytics-config}: + +* field selection: which fields are included or not in the analysis +* memory estimation: how much memory is estimated to be required. The estimate can be used when deciding the appropriate value for `model_memory_limit` setting later on. + +The API accepts an +{request}+ object and returns an +{response}+. + +[id="{upid}-{api}-request"] +==== Explain {dfanalytics} request + +The request can be constructed with the id of an existing {dfanalytics-job}. + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-id-request] +-------------------------------------------------- +<1> Constructing a new request with the id of an existing {dfanalytics-job} + +It can also be constructed with a {dataframe-analytics-config} to explain it before creating it. + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-config-request] +-------------------------------------------------- +<1> Constructing a new request containing a {dataframe-analytics-config} + +include::../execution.asciidoc[] + +[id="{upid}-{api}-response"] +==== Response + +The returned +{response}+ contains the field selection and the memory usage estimation. + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests-file}[{api}-response] +-------------------------------------------------- +<1> A list where each item explains whether a field was selected for analysis or not +<2> The memory estimation for the {dfanalytics-job} diff --git a/docs/java-rest/high-level/supported-apis.asciidoc b/docs/java-rest/high-level/supported-apis.asciidoc index 770866a0755..d691a3ac34b 100644 --- a/docs/java-rest/high-level/supported-apis.asciidoc +++ b/docs/java-rest/high-level/supported-apis.asciidoc @@ -300,7 +300,7 @@ The Java High Level REST Client supports the following Machine Learning APIs: * <<{upid}-start-data-frame-analytics>> * <<{upid}-stop-data-frame-analytics>> * <<{upid}-evaluate-data-frame>> -* <<{upid}-estimate-memory-usage>> +* <<{upid}-explain-data-frame-analytics>> * <<{upid}-get-trained-models>> * <<{upid}-put-filter>> * <<{upid}-get-filters>> @@ -353,7 +353,7 @@ include::ml/delete-data-frame-analytics.asciidoc[] include::ml/start-data-frame-analytics.asciidoc[] include::ml/stop-data-frame-analytics.asciidoc[] include::ml/evaluate-data-frame.asciidoc[] -include::ml/estimate-memory-usage.asciidoc[] +include::ml/explain-data-frame-analytics.asciidoc[] include::ml/get-trained-models.asciidoc[] include::ml/put-filter.asciidoc[] include::ml/get-filters.asciidoc[] diff --git a/docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc b/docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc deleted file mode 100644 index 64db472dfd1..00000000000 --- a/docs/reference/ml/df-analytics/apis/estimate-memory-usage-dfanalytics.asciidoc +++ /dev/null @@ -1,80 +0,0 @@ -[role="xpack"] -[testenv="platinum"] -[[estimate-memory-usage-dfanalytics]] -=== Estimate memory usage API - -[subs="attributes"] -++++ -Estimate memory usage for {dfanalytics-jobs} -++++ - -Estimates memory usage for the given {dataframe-analytics-config}. - -experimental[] - -[[ml-estimate-memory-usage-dfanalytics-request]] -==== {api-request-title} - -`POST _ml/data_frame/analytics/_estimate_memory_usage` - -[[ml-estimate-memory-usage-dfanalytics-prereq]] -==== {api-prereq-title} - -* You must have `monitor_ml` privilege to use this API. For more -information, see <> and <>. - -[[ml-estimate-memory-usage-dfanalytics-desc]] -==== {api-description-title} - -This API estimates memory usage for the given {dataframe-analytics-config} before the {dfanalytics-job} is even created. - -Serves as an advice on how to set `model_memory_limit` when creating {dfanalytics-job}. - -[[ml-estimate-memory-usage-dfanalytics-request-body]] -==== {api-request-body-title} - -`data_frame_analytics_config`:: - (Required, object) Intended configuration of {dfanalytics-job}. For more information, see - <>. - Note that `id` and `dest` don't need to be provided in the context of this API. - -[[ml-estimate-memory-usage-dfanalytics-results]] -==== {api-response-body-title} - -`expected_memory_without_disk`:: - (string) Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory - (i.e. without overflowing to disk). - -`expected_memory_with_disk`:: - (string) Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}. - `expected_memory_with_disk` is usually smaller than `expected_memory_without_disk` as using disk allows to - limit the main memory needed to perform {dfanalytics}. - -[[ml-estimate-memory-usage-dfanalytics-example]] -==== {api-examples-title} - -[source,console] --------------------------------------------------- -POST _ml/data_frame/analytics/_estimate_memory_usage -{ - "data_frame_analytics_config": { - "source": { - "index": "logdata" - }, - "analysis": { - "outlier_detection": {} - } - } -} --------------------------------------------------- -// TEST[skip:TBD] - -The API returns the following results: - -[source,console-result] ----- -{ - "expected_memory_without_disk": "128MB", - "expected_memory_with_disk": "32MB" -} ----- diff --git a/docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc b/docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc new file mode 100644 index 00000000000..c9ee565e9b2 --- /dev/null +++ b/docs/reference/ml/df-analytics/apis/explain-dfanalytics.asciidoc @@ -0,0 +1,159 @@ +[role="xpack"] +[testenv="platinum"] +[[explain-dfanalytics]] +=== Explain {dfanalytics} API + +[subs="attributes"] +++++ +Explain {dfanalytics} API +++++ + +Explains a {dataframe-analytics-config}. + +experimental[] + +[[ml-explain-dfanalytics-request]] +==== {api-request-title} + +`GET _ml/data_frame/analytics/_explain` + + +`POST _ml/data_frame/analytics/_explain` + + +`GET _ml/data_frame/analytics//_explain` + + +`POST _ml/data_frame/analytics//_explain` + +[[ml-explain-dfanalytics-prereq]] +==== {api-prereq-title} + +* You must have `monitor_ml` privilege to use this API. For more +information, see <> and <>. + +[[ml-explain-dfanalytics-desc]] +==== {api-description-title} + +This API provides explanations for a {dataframe-analytics-config} that either exists already or one that has not been created yet. +The following explanations are provided: + +* which fields are included or not in the analysis and why +* how much memory is estimated to be required. The estimate can be used when deciding the appropriate value for `model_memory_limit` setting later on. +about either an existing {dfanalytics-job} or one that has not been created yet. + +[[ml-explain-dfanalytics-path-params]] +==== {api-path-parms-title} + +``:: + (Optional, string) A numerical character string that uniquely identifies the existing + {dfanalytics-job} to explain. This identifier can contain lowercase alphanumeric + characters (a-z and 0-9), hyphens, and underscores. It must start and end with + alphanumeric characters. + +[[ml-explain-dfanalytics-request-body]] +==== {api-request-body-title} + +`data_frame_analytics_config`:: + (Optional, object) Intended configuration of {dfanalytics-job}. For more information, see + <>. + Note that `id` and `dest` don't need to be provided in the context of this API. + +[[ml-explain-dfanalytics-results]] +==== {api-response-body-title} + +The API returns a response that contains the following: + +`field_selection`:: + (array) An array of objects that explain selection for each field, sorted by the field names. + Each object in the array has the following properties: + + `name`::: + (string) The field name. + + `mapping_types`::: + (string) The mapping types of the field. + + `is_included`::: + (boolean) Whether the field is selected to be included in the analysis. + + `is_required`::: + (boolean) Whether the field is required. + + `feature_type`::: + (string) The feature type of this field for the analysis. May be `categorical` or `numerical`. + + `reason`::: + (string) The reason a field is not selected to be included in the analysis. + +`memory_estimation`:: + (object) An object containing the memory estimates. The object has the following properties: + + `expected_memory_without_disk`::: + (string) Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory + (i.e. without overflowing to disk). + + `expected_memory_with_disk`::: + (string) Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}. + `expected_memory_with_disk` is usually smaller than `expected_memory_without_disk` as using disk allows to + limit the main memory needed to perform {dfanalytics}. + +[[ml-explain-dfanalytics-example]] +==== {api-examples-title} + +[source,console] +-------------------------------------------------- +POST _ml/data_frame/analytics/_explain +{ + "data_frame_analytics_config": { + "source": { + "index": "houses_sold_last_10_yrs" + }, + "analysis": { + "regression": { + "dependent_variable": "price" + } + } + } +} +-------------------------------------------------- +// TEST[skip:TBD] + +The API returns the following results: + +[source,console-result] +---- +{ + "field_selection": [ + { + "field": "number_of_bedrooms", + "mappings_types": ["integer"], + "is_included": true, + "is_required": false, + "feature_type": "numerical" + }, + { + "field": "postcode", + "mappings_types": ["text"], + "is_included": false, + "is_required": false, + "reason": "[postcode.keyword] is preferred because it is aggregatable" + }, + { + "field": "postcode.keyword", + "mappings_types": ["keyword"], + "is_included": true, + "is_required": false, + "feature_type": "categorical" + }, + { + "field": "price", + "mappings_types": ["float"], + "is_included": true, + "is_required": true, + "feature_type": "numerical" + } + ], + "memory_estimation": { + "expected_memory_without_disk": "128MB", + "expected_memory_with_disk": "32MB" + } +} +---- diff --git a/docs/reference/ml/df-analytics/apis/index.asciidoc b/docs/reference/ml/df-analytics/apis/index.asciidoc index 30e909f3ffa..6bf63e7ddb8 100644 --- a/docs/reference/ml/df-analytics/apis/index.asciidoc +++ b/docs/reference/ml/df-analytics/apis/index.asciidoc @@ -5,16 +5,16 @@ You can use the following APIs to perform {ml} {dfanalytics} activities. -* <> +* <> * <> * <> * <> * <> * <> * <> -* <> +* <> -See also <>. +See also <>. //CREATE include::put-dfanalytics.asciidoc[] @@ -23,7 +23,7 @@ include::delete-dfanalytics.asciidoc[] //EVALUATE include::evaluate-dfanalytics.asciidoc[] //ESTIMATE_MEMORY_USAGE -include::estimate-memory-usage-dfanalytics.asciidoc[] +include::explain-dfanalytics.asciidoc[] //GET include::get-dfanalytics.asciidoc[] include::get-dfanalytics-stats.asciidoc[] diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java index 8caac9d6e20..d99dd1ec233 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java @@ -79,6 +79,7 @@ import org.elasticsearch.xpack.core.ml.MachineLearningFeatureSetUsage; import org.elasticsearch.xpack.core.ml.MlMetadata; import org.elasticsearch.xpack.core.ml.MlTasks; import org.elasticsearch.xpack.core.ml.action.CloseJobAction; +import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction; import org.elasticsearch.xpack.core.ml.action.DeleteCalendarAction; import org.elasticsearch.xpack.core.ml.action.DeleteCalendarEventAction; import org.elasticsearch.xpack.core.ml.action.DeleteDataFrameAnalyticsAction; @@ -89,7 +90,6 @@ import org.elasticsearch.xpack.core.ml.action.DeleteForecastAction; import org.elasticsearch.xpack.core.ml.action.DeleteJobAction; import org.elasticsearch.xpack.core.ml.action.DeleteModelSnapshotAction; import org.elasticsearch.xpack.core.ml.action.DeleteTrainedModelAction; -import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction; import org.elasticsearch.xpack.core.ml.action.EvaluateDataFrameAction; import org.elasticsearch.xpack.core.ml.action.FinalizeJobExecutionAction; import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction; @@ -158,6 +158,10 @@ import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.P import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.Recall; import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.ScoreByThresholdResult; import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.SoftClassificationMetric; +import org.elasticsearch.xpack.core.ml.inference.preprocessing.FrequencyEncoding; +import org.elasticsearch.xpack.core.ml.inference.preprocessing.OneHotEncoding; +import org.elasticsearch.xpack.core.ml.inference.preprocessing.PreProcessor; +import org.elasticsearch.xpack.core.ml.inference.preprocessing.TargetMeanEncoding; import org.elasticsearch.xpack.core.ml.inference.results.ClassificationInferenceResults; import org.elasticsearch.xpack.core.ml.inference.results.InferenceResults; import org.elasticsearch.xpack.core.ml.inference.results.RegressionInferenceResults; @@ -171,10 +175,6 @@ import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ensemble.OutputAgg import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ensemble.WeightedMode; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ensemble.WeightedSum; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.tree.Tree; -import org.elasticsearch.xpack.core.ml.inference.preprocessing.FrequencyEncoding; -import org.elasticsearch.xpack.core.ml.inference.preprocessing.OneHotEncoding; -import org.elasticsearch.xpack.core.ml.inference.preprocessing.PreProcessor; -import org.elasticsearch.xpack.core.ml.inference.preprocessing.TargetMeanEncoding; import org.elasticsearch.xpack.core.ml.job.config.JobTaskState; import org.elasticsearch.xpack.core.monitoring.MonitoringFeatureSetUsage; import org.elasticsearch.xpack.core.rollup.RollupFeatureSetUsage; @@ -381,7 +381,7 @@ public class XPackClientPlugin extends Plugin implements ActionPlugin, NetworkPl StartDataFrameAnalyticsAction.INSTANCE, StopDataFrameAnalyticsAction.INSTANCE, EvaluateDataFrameAction.INSTANCE, - EstimateMemoryUsageAction.INSTANCE, + ExplainDataFrameAnalyticsAction.INSTANCE, InternalInferModelAction.INSTANCE, GetTrainedModelsAction.INSTANCE, DeleteTrainedModelAction.INSTANCE, diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageAction.java deleted file mode 100644 index 529db21cced..00000000000 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageAction.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -package org.elasticsearch.xpack.core.ml.action; - -import org.elasticsearch.action.ActionResponse; -import org.elasticsearch.action.ActionType; -import org.elasticsearch.common.Nullable; -import org.elasticsearch.common.ParseField; -import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.unit.ByteSizeValue; -import org.elasticsearch.common.xcontent.ConstructingObjectParser; -import org.elasticsearch.common.xcontent.ObjectParser; -import org.elasticsearch.common.xcontent.ToXContentObject; -import org.elasticsearch.common.xcontent.XContentBuilder; - -import java.io.IOException; -import java.util.Objects; - -import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; - -public class EstimateMemoryUsageAction extends ActionType { - - public static final EstimateMemoryUsageAction INSTANCE = new EstimateMemoryUsageAction(); - public static final String NAME = "cluster:admin/xpack/ml/data_frame/analytics/estimate_memory_usage"; - - private EstimateMemoryUsageAction() { - super(NAME, EstimateMemoryUsageAction.Response::new); - } - - public static class Response extends ActionResponse implements ToXContentObject { - - public static final ParseField TYPE = new ParseField("memory_usage_estimation_result"); - - public static final ParseField EXPECTED_MEMORY_WITHOUT_DISK = new ParseField("expected_memory_without_disk"); - public static final ParseField EXPECTED_MEMORY_WITH_DISK = new ParseField("expected_memory_with_disk"); - - static final ConstructingObjectParser PARSER = - new ConstructingObjectParser<>( - TYPE.getPreferredName(), - args -> new Response((ByteSizeValue) args[0], (ByteSizeValue) args[1])); - - static { - PARSER.declareField( - optionalConstructorArg(), - (p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName()), - EXPECTED_MEMORY_WITHOUT_DISK, - ObjectParser.ValueType.VALUE); - PARSER.declareField( - optionalConstructorArg(), - (p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITH_DISK.getPreferredName()), - EXPECTED_MEMORY_WITH_DISK, - ObjectParser.ValueType.VALUE); - } - - private final ByteSizeValue expectedMemoryWithoutDisk; - private final ByteSizeValue expectedMemoryWithDisk; - - public Response(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) { - this.expectedMemoryWithoutDisk = expectedMemoryWithoutDisk; - this.expectedMemoryWithDisk = expectedMemoryWithDisk; - } - - public Response(StreamInput in) throws IOException { - super(in); - this.expectedMemoryWithoutDisk = in.readOptionalWriteable(ByteSizeValue::new); - this.expectedMemoryWithDisk = in.readOptionalWriteable(ByteSizeValue::new); - } - - public ByteSizeValue getExpectedMemoryWithoutDisk() { - return expectedMemoryWithoutDisk; - } - - public ByteSizeValue getExpectedMemoryWithDisk() { - return expectedMemoryWithDisk; - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeOptionalWriteable(expectedMemoryWithoutDisk); - out.writeOptionalWriteable(expectedMemoryWithDisk); - } - - @Override - public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(); - if (expectedMemoryWithoutDisk != null) { - builder.field(EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName(), expectedMemoryWithoutDisk.getStringRep()); - } - if (expectedMemoryWithDisk != null) { - builder.field(EXPECTED_MEMORY_WITH_DISK.getPreferredName(), expectedMemoryWithDisk.getStringRep()); - } - builder.endObject(); - return builder; - } - - @Override - public boolean equals(Object other) { - if (this == other) { - return true; - } - if (other == null || getClass() != other.getClass()) { - return false; - } - - Response that = (Response) other; - return Objects.equals(expectedMemoryWithoutDisk, that.expectedMemoryWithoutDisk) - && Objects.equals(expectedMemoryWithDisk, that.expectedMemoryWithDisk); - } - - @Override - public int hashCode() { - return Objects.hash(expectedMemoryWithoutDisk, expectedMemoryWithDisk); - } - } -} diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsAction.java new file mode 100644 index 00000000000..46888ea27a7 --- /dev/null +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsAction.java @@ -0,0 +1,101 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.core.ml.action; + +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.action.ActionType; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection; +import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimation; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +public class ExplainDataFrameAnalyticsAction extends ActionType { + + public static final ExplainDataFrameAnalyticsAction INSTANCE = new ExplainDataFrameAnalyticsAction(); + public static final String NAME = "cluster:admin/xpack/ml/data_frame/analytics/explain"; + + private ExplainDataFrameAnalyticsAction() { + super(NAME, ExplainDataFrameAnalyticsAction.Response::new); + } + + public static class Response extends ActionResponse implements ToXContentObject { + + public static final ParseField TYPE = new ParseField("explain_data_frame_analytics_response"); + + public static final ParseField FIELD_SELECTION = new ParseField("field_selection"); + public static final ParseField MEMORY_ESTIMATION = new ParseField("memory_estimation"); + + static final ConstructingObjectParser PARSER = + new ConstructingObjectParser<>( + TYPE.getPreferredName(), + args -> new Response((List) args[0], (MemoryEstimation) args[1])); + + static { + PARSER.declareObjectArray(ConstructingObjectParser.constructorArg(), FieldSelection.PARSER, FIELD_SELECTION); + PARSER.declareObject(ConstructingObjectParser.constructorArg(), MemoryEstimation.PARSER, MEMORY_ESTIMATION); + } + + private final List fieldSelection; + private final MemoryEstimation memoryEstimation; + + public Response(List fieldSelection, MemoryEstimation memoryEstimation) { + this.fieldSelection = Objects.requireNonNull(fieldSelection); + this.memoryEstimation = Objects.requireNonNull(memoryEstimation); + } + + public Response(StreamInput in) throws IOException { + super(in); + this.fieldSelection = in.readList(FieldSelection::new); + this.memoryEstimation = new MemoryEstimation(in); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeList(fieldSelection); + memoryEstimation.writeTo(out); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(FIELD_SELECTION.getPreferredName(), fieldSelection); + builder.field(MEMORY_ESTIMATION.getPreferredName(), memoryEstimation); + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object other) { + if (this == other) return true; + if (other == null || getClass() != other.getClass()) return false; + + Response that = (Response) other; + return Objects.equals(fieldSelection, that.fieldSelection) + && Objects.equals(memoryEstimation, that.memoryEstimation); + } + + @Override + public int hashCode() { + return Objects.hash(fieldSelection, memoryEstimation); + } + + public MemoryEstimation getMemoryEstimation() { + return memoryEstimation; + } + + public List getFieldSelection() { + return fieldSelection; + } + } +} diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/PutDataFrameAnalyticsAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/PutDataFrameAnalyticsAction.java index 6860162d793..5bce41d8a4a 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/PutDataFrameAnalyticsAction.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/PutDataFrameAnalyticsAction.java @@ -51,13 +51,14 @@ public class PutDataFrameAnalyticsAction extends ActionType PARSER = new ConstructingObjectParser<>("field_selection", + a -> new FieldSelection((String) a[0], new HashSet<>((List) a[1]), (boolean) a[2], (boolean) a[3], (FeatureType) a[4], + (String) a[5])); + + static { + PARSER.declareString(ConstructingObjectParser.constructorArg(), NAME); + PARSER.declareStringArray(ConstructingObjectParser.constructorArg(), MAPPING_TYPES); + PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_INCLUDED); + PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_REQUIRED); + PARSER.declareField(ConstructingObjectParser.optionalConstructorArg(), p -> { + if (p.currentToken() == XContentParser.Token.VALUE_STRING) { + return FeatureType.fromString(p.text()); + } + throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]"); + }, FEATURE_TYPE, ObjectParser.ValueType.STRING); + PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), REASON); + } + + private final String name; + private final Set mappingTypes; + private final boolean isIncluded; + private final boolean isRequired; + private final FeatureType featureType; + private final String reason; + + public static FieldSelection included(String name, Set mappingTypes, boolean isRequired, FeatureType featureType) { + return new FieldSelection(name, mappingTypes, true, isRequired, featureType, null); + } + + public static FieldSelection excluded(String name, Set mappingTypes, String reason) { + return new FieldSelection(name, mappingTypes, false, false, null, reason); + } + + FieldSelection(String name, Set mappingTypes, boolean isIncluded, boolean isRequired, @Nullable FeatureType featureType, + @Nullable String reason) { + this.name = Objects.requireNonNull(name); + this.mappingTypes = Collections.unmodifiableSet(mappingTypes); + this.isIncluded = isIncluded; + this.isRequired = isRequired; + this.featureType = featureType; + this.reason = reason; + } + + public FieldSelection(StreamInput in) throws IOException { + this.name = in.readString(); + this.mappingTypes = Collections.unmodifiableSet(in.readSet(StreamInput::readString)); + this.isIncluded = in.readBoolean(); + this.isRequired = in.readBoolean(); + boolean hasFeatureType = in.readBoolean(); + + if (hasFeatureType) { + this.featureType = in.readEnum(FeatureType.class); + } else { + this.featureType = null; + } + + this.reason = in.readOptionalString(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(name); + out.writeCollection(mappingTypes, StreamOutput::writeString); + out.writeBoolean(isIncluded); + out.writeBoolean(isRequired); + + if (featureType == null) { + out.writeBoolean(false); + } else { + out.writeBoolean(true); + out.writeEnum(featureType); + } + out.writeOptionalString(reason); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(NAME.getPreferredName(), name); + builder.field(MAPPING_TYPES.getPreferredName(), mappingTypes); + builder.field(IS_INCLUDED.getPreferredName(), isIncluded); + builder.field(IS_REQUIRED.getPreferredName(), isRequired); + if (featureType != null) { + builder.field(FEATURE_TYPE.getPreferredName(), featureType); + } + if (reason != null) { + builder.field(REASON.getPreferredName(), reason); + } + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + FieldSelection that = (FieldSelection) o; + return Objects.equals(name, that.name) + && Objects.equals(mappingTypes, that.mappingTypes) + && isIncluded == that.isIncluded + && isRequired == that.isRequired + && Objects.equals(featureType, that.featureType) + && Objects.equals(reason, that.reason); + } + + @Override + public int hashCode() { + return Objects.hash(name, mappingTypes, isIncluded, isRequired, featureType, reason); + } + + public String getName() { + return name; + } + + public Set getMappingTypes() { + return mappingTypes; + } + + public boolean isIncluded() { + return isIncluded; + } + + public boolean isRequired() { + return isRequired; + } + + @Nullable + public FeatureType getFeatureType() { + return featureType; + } + + @Nullable + public String getReason() { + return reason; + } +} diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimation.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimation.java new file mode 100644 index 00000000000..7972c6a9ee0 --- /dev/null +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimation.java @@ -0,0 +1,103 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.core.ml.dataframe.explain; + +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.ObjectParser; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Objects; + +import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; + +public class MemoryEstimation implements ToXContentObject, Writeable { + + public static final ParseField EXPECTED_MEMORY_WITHOUT_DISK = new ParseField("expected_memory_without_disk"); + public static final ParseField EXPECTED_MEMORY_WITH_DISK = new ParseField("expected_memory_with_disk"); + + public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>("memory_estimation", + a -> new MemoryEstimation((ByteSizeValue) a[0], (ByteSizeValue) a[1])); + + static { + PARSER.declareField( + optionalConstructorArg(), + (p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName()), + EXPECTED_MEMORY_WITHOUT_DISK, + ObjectParser.ValueType.VALUE); + PARSER.declareField( + optionalConstructorArg(), + (p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITH_DISK.getPreferredName()), + EXPECTED_MEMORY_WITH_DISK, + ObjectParser.ValueType.VALUE); + } + + private final ByteSizeValue expectedMemoryWithoutDisk; + private final ByteSizeValue expectedMemoryWithDisk; + + public MemoryEstimation(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) { + this.expectedMemoryWithoutDisk = expectedMemoryWithoutDisk; + this.expectedMemoryWithDisk = expectedMemoryWithDisk; + } + + public MemoryEstimation(StreamInput in) throws IOException { + this.expectedMemoryWithoutDisk = in.readOptionalWriteable(ByteSizeValue::new); + this.expectedMemoryWithDisk = in.readOptionalWriteable(ByteSizeValue::new); + } + + public ByteSizeValue getExpectedMemoryWithoutDisk() { + return expectedMemoryWithoutDisk; + } + + public ByteSizeValue getExpectedMemoryWithDisk() { + return expectedMemoryWithDisk; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeOptionalWriteable(expectedMemoryWithoutDisk); + out.writeOptionalWriteable(expectedMemoryWithDisk); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + if (expectedMemoryWithoutDisk != null) { + builder.field(EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName(), expectedMemoryWithoutDisk.getStringRep()); + } + if (expectedMemoryWithDisk != null) { + builder.field(EXPECTED_MEMORY_WITH_DISK.getPreferredName(), expectedMemoryWithDisk.getStringRep()); + } + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (other == null || getClass() != other.getClass()) { + return false; + } + + MemoryEstimation that = (MemoryEstimation) other; + return Objects.equals(expectedMemoryWithoutDisk, that.expectedMemoryWithoutDisk) + && Objects.equals(expectedMemoryWithDisk, that.expectedMemoryWithDisk); + } + + @Override + public int hashCode() { + return Objects.hash(expectedMemoryWithoutDisk, expectedMemoryWithDisk); + } +} diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageActionResponseTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageActionResponseTests.java deleted file mode 100644 index 1bc8d8970ea..00000000000 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/EstimateMemoryUsageActionResponseTests.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -package org.elasticsearch.xpack.core.ml.action; - -import org.elasticsearch.common.io.stream.Writeable; -import org.elasticsearch.common.unit.ByteSizeUnit; -import org.elasticsearch.common.unit.ByteSizeValue; -import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.test.AbstractSerializingTestCase; -import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction.Response; - -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.nullValue; - -public class EstimateMemoryUsageActionResponseTests extends AbstractSerializingTestCase { - - @Override - protected Response createTestInstance() { - return new Response( - randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null, - randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null); - } - - @Override - protected Writeable.Reader instanceReader() { - return Response::new; - } - - @Override - protected Response doParseInstance(XContentParser parser) { - return Response.PARSER.apply(parser, null); - } - - public void testConstructor_NullValues() { - Response response = new Response(null, null); - assertThat(response.getExpectedMemoryWithoutDisk(), nullValue()); - assertThat(response.getExpectedMemoryWithDisk(), nullValue()); - } - - public void testConstructor_SmallValues() { - Response response = new Response(new ByteSizeValue(120, ByteSizeUnit.KB), new ByteSizeValue(30, ByteSizeUnit.KB)); - assertThat(response.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(120, ByteSizeUnit.KB))); - assertThat(response.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(30, ByteSizeUnit.KB))); - } - - public void testConstructor() { - Response response = new Response(new ByteSizeValue(20, ByteSizeUnit.MB), new ByteSizeValue(10, ByteSizeUnit.MB)); - assertThat(response.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(20, ByteSizeUnit.MB))); - assertThat(response.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(10, ByteSizeUnit.MB))); - } -} diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsActionResponseTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsActionResponseTests.java new file mode 100644 index 00000000000..ea1aca3916c --- /dev/null +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/action/ExplainDataFrameAnalyticsActionResponseTests.java @@ -0,0 +1,42 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.core.ml.action; + +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.test.AbstractSerializingTestCase; +import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction.Response; +import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection; +import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelectionTests; +import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimation; +import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimationTests; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.IntStream; + +public class ExplainDataFrameAnalyticsActionResponseTests extends AbstractSerializingTestCase { + + @Override + protected Response createTestInstance() { + int fieldSelectionCount = randomIntBetween(1, 5); + List fieldSelection = new ArrayList<>(fieldSelectionCount); + IntStream.of(fieldSelectionCount).forEach(i -> fieldSelection.add(FieldSelectionTests.createRandom())); + MemoryEstimation memoryEstimation = MemoryEstimationTests.createRandom(); + + return new Response(fieldSelection, memoryEstimation); + } + + @Override + protected Writeable.Reader instanceReader() { + return Response::new; + } + + @Override + protected Response doParseInstance(XContentParser parser) { + return Response.PARSER.apply(parser, null); + } +} diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java index 3266f488daf..d8c52c83902 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java @@ -279,32 +279,32 @@ public class DataFrameAnalyticsConfigTests extends AbstractSerializingTestCase { + + public static FieldSelection createRandom() { + Set mappingTypes = randomSubsetOf(randomIntBetween(1, 3), "int", "float", "double", "text", "keyword", "ip") + .stream().collect(Collectors.toSet()); + FieldSelection.FeatureType featureType = randomBoolean() ? null : randomFrom(FieldSelection.FeatureType.values()); + String reason = randomBoolean() ? null : randomAlphaOfLength(20); + return new FieldSelection(randomAlphaOfLength(10), + mappingTypes, + randomBoolean(), + randomBoolean(), + featureType, + reason); + } + + @Override + protected FieldSelection createTestInstance() { + return createRandom(); + } + + @Override + protected FieldSelection doParseInstance(XContentParser parser) throws IOException { + return FieldSelection.PARSER.apply(parser, null); + } + + @Override + protected Writeable.Reader instanceReader() { + return FieldSelection::new; + } +} diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimationTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimationTests.java new file mode 100644 index 00000000000..dc9e20bd86a --- /dev/null +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/explain/MemoryEstimationTests.java @@ -0,0 +1,61 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.core.ml.dataframe.explain; + +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.unit.ByteSizeUnit; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.test.AbstractSerializingTestCase; + +import java.io.IOException; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.nullValue; + +public class MemoryEstimationTests extends AbstractSerializingTestCase { + + public static MemoryEstimation createRandom() { + return new MemoryEstimation( + randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null, + randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null); + } + + @Override + protected MemoryEstimation createTestInstance() { + return createRandom(); + } + + @Override + protected Writeable.Reader instanceReader() { + return MemoryEstimation::new; + } + + @Override + protected MemoryEstimation doParseInstance(XContentParser parser) throws IOException { + return MemoryEstimation.PARSER.apply(parser, null); + } + + public void testConstructor_NullValues() { + MemoryEstimation memoryEstimation = new MemoryEstimation(null, null); + assertThat(memoryEstimation.getExpectedMemoryWithoutDisk(), nullValue()); + assertThat(memoryEstimation.getExpectedMemoryWithDisk(), nullValue()); + } + + public void testConstructor_SmallValues() { + MemoryEstimation memoryEstimation = new MemoryEstimation( + new ByteSizeValue(120, ByteSizeUnit.KB), new ByteSizeValue(30, ByteSizeUnit.KB)); + assertThat(memoryEstimation.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(120, ByteSizeUnit.KB))); + assertThat(memoryEstimation.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(30, ByteSizeUnit.KB))); + } + + public void testConstructor() { + MemoryEstimation memoryEstimation = new MemoryEstimation( + new ByteSizeValue(20, ByteSizeUnit.MB), new ByteSizeValue(10, ByteSizeUnit.MB)); + assertThat(memoryEstimation.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(20, ByteSizeUnit.MB))); + assertThat(memoryEstimation.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(10, ByteSizeUnit.MB))); + } +} diff --git a/x-pack/plugin/ml/qa/ml-with-security/build.gradle b/x-pack/plugin/ml/qa/ml-with-security/build.gradle index 961dc944ea7..38beb1d1908 100644 --- a/x-pack/plugin/ml/qa/ml-with-security/build.gradle +++ b/x-pack/plugin/ml/qa/ml-with-security/build.gradle @@ -92,7 +92,6 @@ integTest.runner { 'ml/data_frame_analytics_crud/Test put classification given num_top_classes is greater than 1k', 'ml/data_frame_analytics_crud/Test put classification given training_percent is less than one', 'ml/data_frame_analytics_crud/Test put classification given training_percent is greater than hundred', - 'ml/data_frame_analytics_memory_usage_estimation/Test memory usage estimation for empty data frame', 'ml/evaluate_data_frame/Test given missing index', 'ml/evaluate_data_frame/Test given index does not exist', 'ml/evaluate_data_frame/Test given missing evaluation', @@ -113,6 +112,10 @@ integTest.runner { 'ml/evaluate_data_frame/Test regression given evaluation with empty metrics', 'ml/evaluate_data_frame/Test regression given missing actual_field', 'ml/evaluate_data_frame/Test regression given missing predicted_field', + 'ml/explain_data_frame_analytics/Test neither job id nor body', + 'ml/explain_data_frame_analytics/Test both job id and body', + 'ml/explain_data_frame_analytics/Test missing job', + 'ml/explain_data_frame_analytics/Test empty data frame given body', 'ml/delete_job_force/Test cannot force delete a non-existent job', 'ml/delete_model_snapshot/Test delete snapshot missing snapshotId', 'ml/delete_model_snapshot/Test delete snapshot missing job_id', diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index 964bc719cbd..0293a367473 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -65,6 +65,7 @@ import org.elasticsearch.xpack.core.XPackSettings; import org.elasticsearch.xpack.core.ml.MachineLearningField; import org.elasticsearch.xpack.core.ml.MlMetaIndex; import org.elasticsearch.xpack.core.ml.action.CloseJobAction; +import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction; import org.elasticsearch.xpack.core.ml.action.DeleteCalendarAction; import org.elasticsearch.xpack.core.ml.action.DeleteCalendarEventAction; import org.elasticsearch.xpack.core.ml.action.DeleteDataFrameAnalyticsAction; @@ -75,7 +76,6 @@ import org.elasticsearch.xpack.core.ml.action.DeleteForecastAction; import org.elasticsearch.xpack.core.ml.action.DeleteJobAction; import org.elasticsearch.xpack.core.ml.action.DeleteModelSnapshotAction; import org.elasticsearch.xpack.core.ml.action.DeleteTrainedModelAction; -import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction; import org.elasticsearch.xpack.core.ml.action.EvaluateDataFrameAction; import org.elasticsearch.xpack.core.ml.action.FinalizeJobExecutionAction; import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction; @@ -98,8 +98,8 @@ import org.elasticsearch.xpack.core.ml.action.GetOverallBucketsAction; import org.elasticsearch.xpack.core.ml.action.GetRecordsAction; import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction; import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsStatsAction; -import org.elasticsearch.xpack.core.ml.action.IsolateDatafeedAction; import org.elasticsearch.xpack.core.ml.action.InternalInferModelAction; +import org.elasticsearch.xpack.core.ml.action.IsolateDatafeedAction; import org.elasticsearch.xpack.core.ml.action.KillProcessAction; import org.elasticsearch.xpack.core.ml.action.MlInfoAction; import org.elasticsearch.xpack.core.ml.action.OpenJobAction; @@ -136,6 +136,7 @@ import org.elasticsearch.xpack.core.ml.job.persistence.ElasticsearchMappings; import org.elasticsearch.xpack.core.ml.notifications.AuditorField; import org.elasticsearch.xpack.core.template.TemplateUtils; import org.elasticsearch.xpack.ml.action.TransportCloseJobAction; +import org.elasticsearch.xpack.ml.action.TransportExplainDataFrameAnalyticsAction; import org.elasticsearch.xpack.ml.action.TransportDeleteCalendarAction; import org.elasticsearch.xpack.ml.action.TransportDeleteCalendarEventAction; import org.elasticsearch.xpack.ml.action.TransportDeleteDataFrameAnalyticsAction; @@ -146,7 +147,6 @@ import org.elasticsearch.xpack.ml.action.TransportDeleteForecastAction; import org.elasticsearch.xpack.ml.action.TransportDeleteJobAction; import org.elasticsearch.xpack.ml.action.TransportDeleteModelSnapshotAction; import org.elasticsearch.xpack.ml.action.TransportDeleteTrainedModelAction; -import org.elasticsearch.xpack.ml.action.TransportEstimateMemoryUsageAction; import org.elasticsearch.xpack.ml.action.TransportEvaluateDataFrameAction; import org.elasticsearch.xpack.ml.action.TransportFinalizeJobExecutionAction; import org.elasticsearch.xpack.ml.action.TransportFindFileStructureAction; @@ -167,9 +167,9 @@ import org.elasticsearch.xpack.ml.action.TransportGetJobsStatsAction; import org.elasticsearch.xpack.ml.action.TransportGetModelSnapshotsAction; import org.elasticsearch.xpack.ml.action.TransportGetOverallBucketsAction; import org.elasticsearch.xpack.ml.action.TransportGetRecordsAction; +import org.elasticsearch.xpack.ml.action.TransportGetTrainedModelsAction; import org.elasticsearch.xpack.ml.action.TransportGetTrainedModelsStatsAction; import org.elasticsearch.xpack.ml.action.TransportInternalInferModelAction; -import org.elasticsearch.xpack.ml.action.TransportGetTrainedModelsAction; import org.elasticsearch.xpack.ml.action.TransportIsolateDatafeedAction; import org.elasticsearch.xpack.ml.action.TransportKillProcessAction; import org.elasticsearch.xpack.ml.action.TransportMlInfoAction; @@ -258,8 +258,8 @@ import org.elasticsearch.xpack.ml.rest.datafeeds.RestPutDatafeedAction; import org.elasticsearch.xpack.ml.rest.datafeeds.RestStartDatafeedAction; import org.elasticsearch.xpack.ml.rest.datafeeds.RestStopDatafeedAction; import org.elasticsearch.xpack.ml.rest.datafeeds.RestUpdateDatafeedAction; +import org.elasticsearch.xpack.ml.rest.dataframe.RestExplainDataFrameAnalyticsAction; import org.elasticsearch.xpack.ml.rest.dataframe.RestDeleteDataFrameAnalyticsAction; -import org.elasticsearch.xpack.ml.rest.dataframe.RestEstimateMemoryUsageAction; import org.elasticsearch.xpack.ml.rest.dataframe.RestEvaluateDataFrameAction; import org.elasticsearch.xpack.ml.rest.dataframe.RestGetDataFrameAnalyticsAction; import org.elasticsearch.xpack.ml.rest.dataframe.RestGetDataFrameAnalyticsStatsAction; @@ -759,7 +759,7 @@ public class MachineLearning extends Plugin implements ActionPlugin, AnalysisPlu new RestStartDataFrameAnalyticsAction(restController), new RestStopDataFrameAnalyticsAction(restController), new RestEvaluateDataFrameAction(restController), - new RestEstimateMemoryUsageAction(restController), + new RestExplainDataFrameAnalyticsAction(restController), new RestGetTrainedModelsAction(restController), new RestDeleteTrainedModelAction(restController), new RestGetTrainedModelsStatsAction(restController) @@ -829,7 +829,7 @@ public class MachineLearning extends Plugin implements ActionPlugin, AnalysisPlu new ActionHandler<>(StartDataFrameAnalyticsAction.INSTANCE, TransportStartDataFrameAnalyticsAction.class), new ActionHandler<>(StopDataFrameAnalyticsAction.INSTANCE, TransportStopDataFrameAnalyticsAction.class), new ActionHandler<>(EvaluateDataFrameAction.INSTANCE, TransportEvaluateDataFrameAction.class), - new ActionHandler<>(EstimateMemoryUsageAction.INSTANCE, TransportEstimateMemoryUsageAction.class), + new ActionHandler<>(ExplainDataFrameAnalyticsAction.INSTANCE, TransportExplainDataFrameAnalyticsAction.class), new ActionHandler<>(InternalInferModelAction.INSTANCE, TransportInternalInferModelAction.class), new ActionHandler<>(GetTrainedModelsAction.INSTANCE, TransportGetTrainedModelsAction.class), new ActionHandler<>(DeleteTrainedModelAction.INSTANCE, TransportDeleteTrainedModelAction.class), diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateMemoryUsageAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateMemoryUsageAction.java deleted file mode 100644 index a82db7c4f97..00000000000 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportEstimateMemoryUsageAction.java +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -package org.elasticsearch.xpack.ml.action; - -import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.ActionListenerResponseHandler; -import org.elasticsearch.action.support.ActionFilters; -import org.elasticsearch.action.support.HandledTransportAction; -import org.elasticsearch.client.node.NodeClient; -import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.node.DiscoveryNode; -import org.elasticsearch.cluster.service.ClusterService; -import org.elasticsearch.common.inject.Inject; -import org.elasticsearch.tasks.Task; -import org.elasticsearch.transport.TransportService; -import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction; -import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction; -import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; -import org.elasticsearch.xpack.ml.MachineLearning; -import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractorFactory; -import org.elasticsearch.xpack.ml.dataframe.process.MemoryUsageEstimationProcessManager; - -import java.util.Objects; -import java.util.Optional; - -/** - * Estimates memory usage for the given data frame analytics spec. - * Redirects to a different node if the current node is *not* an ML node. - */ -public class TransportEstimateMemoryUsageAction - extends HandledTransportAction { - - private final TransportService transportService; - private final ClusterService clusterService; - private final NodeClient client; - private final MemoryUsageEstimationProcessManager processManager; - - @Inject - public TransportEstimateMemoryUsageAction(TransportService transportService, - ActionFilters actionFilters, - ClusterService clusterService, - NodeClient client, - MemoryUsageEstimationProcessManager processManager) { - super(EstimateMemoryUsageAction.NAME, transportService, actionFilters, PutDataFrameAnalyticsAction.Request::new); - this.transportService = transportService; - this.clusterService = Objects.requireNonNull(clusterService); - this.client = Objects.requireNonNull(client); - this.processManager = Objects.requireNonNull(processManager); - } - - @Override - protected void doExecute(Task task, - PutDataFrameAnalyticsAction.Request request, - ActionListener listener) { - DiscoveryNode localNode = clusterService.localNode(); - if (MachineLearning.isMlNode(localNode)) { - doEstimateMemoryUsage(createTaskIdForMemoryEstimation(task), request, listener); - } else { - redirectToMlNode(request, listener); - } - } - - /** - * Creates unique task id for the memory estimation process. This id is useful when logging. - */ - private static String createTaskIdForMemoryEstimation(Task task) { - return "memory_usage_estimation_" + task.getId(); - } - - /** - * Performs memory usage estimation. - * Memory usage estimation spawns an ML C++ process which is only available on ML nodes. That's why this method can only be called on - * the ML node. - */ - private void doEstimateMemoryUsage(String taskId, - PutDataFrameAnalyticsAction.Request request, - ActionListener listener) { - DataFrameDataExtractorFactory.createForSourceIndices( - client, - taskId, - true, // We are not interested in first-time run validations here - request.getConfig(), - ActionListener.wrap( - dataExtractorFactory -> { - processManager.runJobAsync( - taskId, - request.getConfig(), - dataExtractorFactory, - ActionListener.wrap( - result -> listener.onResponse( - new EstimateMemoryUsageAction.Response( - result.getExpectedMemoryWithoutDisk(), result.getExpectedMemoryWithDisk())), - listener::onFailure - ) - ); - }, - listener::onFailure - ) - ); - } - - /** - * Finds the first available ML node in the cluster and redirects the request to this node. - */ - private void redirectToMlNode(PutDataFrameAnalyticsAction.Request request, - ActionListener listener) { - Optional node = findMlNode(clusterService.state()); - if (node.isPresent()) { - transportService.sendRequest( - node.get(), actionName, request, new ActionListenerResponseHandler<>(listener, EstimateMemoryUsageAction.Response::new)); - } else { - listener.onFailure(ExceptionsHelper.badRequestException("No ML node to run on")); - } - } - - /** - * Finds the first available ML node in the cluster state. - */ - private static Optional findMlNode(ClusterState clusterState) { - for (DiscoveryNode node : clusterState.getNodes()) { - if (MachineLearning.isMlNode(node)) { - return Optional.of(node); - } - } - return Optional.empty(); - } -} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportExplainDataFrameAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportExplainDataFrameAnalyticsAction.java new file mode 100644 index 00000000000..7f19deb8d5b --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportExplainDataFrameAnalyticsAction.java @@ -0,0 +1,156 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.action; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.ActionListenerResponseHandler; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.HandledTransportAction; +import org.elasticsearch.client.node.NodeClient; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.collect.Tuple; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.license.LicenseUtils; +import org.elasticsearch.license.XPackLicenseState; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.transport.TransportService; +import org.elasticsearch.xpack.core.XPackField; +import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction; +import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction; +import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection; +import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimation; +import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; +import org.elasticsearch.xpack.ml.MachineLearning; +import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractorFactory; +import org.elasticsearch.xpack.ml.dataframe.extractor.ExtractedFieldsDetector; +import org.elasticsearch.xpack.ml.dataframe.extractor.ExtractedFieldsDetectorFactory; +import org.elasticsearch.xpack.ml.dataframe.process.MemoryUsageEstimationProcessManager; +import org.elasticsearch.xpack.ml.extractor.ExtractedFields; + +import java.util.List; +import java.util.Objects; +import java.util.Optional; + +/** + * Provides explanations on aspects of the given data frame analytics spec like memory estimation, field selection, etc. + * Redirects to a different node if the current node is *not* an ML node. + */ +public class TransportExplainDataFrameAnalyticsAction + extends HandledTransportAction { + + private final XPackLicenseState licenseState; + private final TransportService transportService; + private final ClusterService clusterService; + private final NodeClient client; + private final MemoryUsageEstimationProcessManager processManager; + + @Inject + public TransportExplainDataFrameAnalyticsAction(TransportService transportService, + ActionFilters actionFilters, + ClusterService clusterService, + NodeClient client, + XPackLicenseState licenseState, + MemoryUsageEstimationProcessManager processManager) { + super(ExplainDataFrameAnalyticsAction.NAME, transportService, actionFilters, PutDataFrameAnalyticsAction.Request::new); + this.transportService = transportService; + this.clusterService = Objects.requireNonNull(clusterService); + this.client = Objects.requireNonNull(client); + this.licenseState = licenseState; + this.processManager = Objects.requireNonNull(processManager); + } + + @Override + protected void doExecute(Task task, + PutDataFrameAnalyticsAction.Request request, + ActionListener listener) { + if (licenseState.isMachineLearningAllowed() == false) { + listener.onFailure(LicenseUtils.newComplianceException(XPackField.MACHINE_LEARNING)); + return; + } + + DiscoveryNode localNode = clusterService.localNode(); + if (MachineLearning.isMlNode(localNode)) { + explain(task, request, listener); + } else { + redirectToMlNode(request, listener); + } + } + + private void explain(Task task, PutDataFrameAnalyticsAction.Request request, + ActionListener listener) { + ExtractedFieldsDetectorFactory extractedFieldsDetectorFactory = new ExtractedFieldsDetectorFactory(client); + extractedFieldsDetectorFactory.createFromSource(request.getConfig(), true, ActionListener.wrap( + extractedFieldsDetector -> { + explain(task, request, extractedFieldsDetector, listener); + }, + listener::onFailure + )); + } + + private void explain(Task task, PutDataFrameAnalyticsAction.Request request, ExtractedFieldsDetector extractedFieldsDetector, + ActionListener listener) { + Tuple> fieldExtraction = extractedFieldsDetector.detect(); + + ActionListener memoryEstimationListener = ActionListener.wrap( + memoryEstimation -> listener.onResponse(new ExplainDataFrameAnalyticsAction.Response(fieldExtraction.v2(), memoryEstimation)), + listener::onFailure + ); + + estimateMemoryUsage(task, request, fieldExtraction.v1(), memoryEstimationListener); + } + + /** + * Performs memory usage estimation. + * Memory usage estimation spawns an ML C++ process which is only available on ML nodes. That's why this method can only be called on + * the ML node. + */ + private void estimateMemoryUsage(Task task, + PutDataFrameAnalyticsAction.Request request, + ExtractedFields extractedFields, + ActionListener listener) { + final String estimateMemoryTaskId = "memory_usage_estimation_" + task.getId(); + DataFrameDataExtractorFactory extractorFactory = DataFrameDataExtractorFactory.createForSourceIndices( + client, estimateMemoryTaskId, request.getConfig(), extractedFields); + processManager.runJobAsync( + estimateMemoryTaskId, + request.getConfig(), + extractorFactory, + ActionListener.wrap( + result -> listener.onResponse( + new MemoryEstimation(result.getExpectedMemoryWithoutDisk(), result.getExpectedMemoryWithDisk())), + listener::onFailure + ) + ); + } + + /** + * Finds the first available ML node in the cluster and redirects the request to this node. + */ + private void redirectToMlNode(PutDataFrameAnalyticsAction.Request request, + ActionListener listener) { + Optional node = findMlNode(clusterService.state()); + if (node.isPresent()) { + transportService.sendRequest(node.get(), actionName, request, + new ActionListenerResponseHandler<>(listener, ExplainDataFrameAnalyticsAction.Response::new)); + } else { + listener.onFailure(ExceptionsHelper.badRequestException("No ML node to run on")); + } + } + + /** + * Finds the first available ML node in the cluster state. + */ + private static Optional findMlNode(ClusterState clusterState) { + for (DiscoveryNode node : clusterState.getNodes()) { + if (MachineLearning.isMlNode(node)) { + return Optional.of(node); + } + } + return Optional.empty(); + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java index 1740a7fb532..af67750ee6d 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java @@ -29,6 +29,7 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.index.IndexNotFoundException; import org.elasticsearch.license.LicenseUtils; @@ -47,7 +48,7 @@ import org.elasticsearch.xpack.core.ClientHelper; import org.elasticsearch.xpack.core.XPackField; import org.elasticsearch.xpack.core.ml.MlMetadata; import org.elasticsearch.xpack.core.ml.MlTasks; -import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction; +import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction; import org.elasticsearch.xpack.core.ml.action.GetDataFrameAnalyticsStatsAction; import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction; import org.elasticsearch.xpack.core.ml.action.StartDataFrameAnalyticsAction; @@ -66,6 +67,7 @@ import org.elasticsearch.xpack.ml.dataframe.SourceDestValidator; import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractorFactory; import org.elasticsearch.xpack.ml.dataframe.extractor.ExtractedFieldsDetectorFactory; import org.elasticsearch.xpack.ml.dataframe.persistence.DataFrameAnalyticsConfigProvider; +import org.elasticsearch.xpack.ml.extractor.ExtractedFields; import org.elasticsearch.xpack.ml.job.JobNodeSelector; import org.elasticsearch.xpack.ml.notifications.DataFrameAnalyticsAuditor; import org.elasticsearch.xpack.ml.process.MlMemoryTracker; @@ -190,20 +192,18 @@ public class TransportStartDataFrameAnalyticsAction final String jobId = startContext.config.getId(); // Tell the job tracker to refresh the memory requirement for this job and all other jobs that have persistent tasks - ActionListener estimateMemoryUsageListener = ActionListener.wrap( - estimateMemoryUsageResponse -> { - auditor.info( - jobId, - Messages.getMessage( - Messages.DATA_FRAME_ANALYTICS_AUDIT_ESTIMATED_MEMORY_USAGE, - estimateMemoryUsageResponse.getExpectedMemoryWithoutDisk())); + ActionListener explainListener = ActionListener.wrap( + explainResponse -> { + ByteSizeValue expectedMemoryWithoutDisk = explainResponse.getMemoryEstimation().getExpectedMemoryWithoutDisk(); + auditor.info(jobId, + Messages.getMessage(Messages.DATA_FRAME_ANALYTICS_AUDIT_ESTIMATED_MEMORY_USAGE, expectedMemoryWithoutDisk)); // Validate that model memory limit is sufficient to run the analysis if (startContext.config.getModelMemoryLimit() - .compareTo(estimateMemoryUsageResponse.getExpectedMemoryWithoutDisk()) < 0) { + .compareTo(expectedMemoryWithoutDisk) < 0) { ElasticsearchStatusException e = ExceptionsHelper.badRequestException( "Cannot start because the configured model memory limit [{}] is lower than the expected memory usage [{}]", - startContext.config.getModelMemoryLimit(), estimateMemoryUsageResponse.getExpectedMemoryWithoutDisk()); + startContext.config.getModelMemoryLimit(), expectedMemoryWithoutDisk); listener.onFailure(e); return; } @@ -215,13 +215,13 @@ public class TransportStartDataFrameAnalyticsAction listener::onFailure ); - PutDataFrameAnalyticsAction.Request estimateMemoryUsageRequest = new PutDataFrameAnalyticsAction.Request(startContext.config); + PutDataFrameAnalyticsAction.Request explainRequest = new PutDataFrameAnalyticsAction.Request(startContext.config); ClientHelper.executeAsyncWithOrigin( client, ClientHelper.ML_ORIGIN, - EstimateMemoryUsageAction.INSTANCE, - estimateMemoryUsageRequest, - estimateMemoryUsageListener); + ExplainDataFrameAnalyticsAction.INSTANCE, + explainRequest, + explainListener); } @@ -277,7 +277,11 @@ public class TransportStartDataFrameAnalyticsAction // Validate extraction is possible boolean isTaskRestarting = startContext.startingState != DataFrameAnalyticsTask.StartingState.FIRST_TIME; new ExtractedFieldsDetectorFactory(client).createFromSource(startContext.config, isTaskRestarting, ActionListener.wrap( - extractedFieldsDetector -> toValidateDestEmptyListener.onResponse(startContext), finalListener::onFailure)); + extractedFieldsDetector -> { + startContext.extractedFields = extractedFieldsDetector.detect().v1(); + toValidateDestEmptyListener.onResponse(startContext); + }, + finalListener::onFailure)); }, finalListener::onFailure ); @@ -294,33 +298,27 @@ public class TransportStartDataFrameAnalyticsAction } private void validateSourceIndexHasRows(StartContext startContext, ActionListener listener) { - boolean isTaskRestarting = startContext.startingState != DataFrameAnalyticsTask.StartingState.FIRST_TIME; - DataFrameDataExtractorFactory.createForSourceIndices(client, + DataFrameDataExtractorFactory extractorFactory = DataFrameDataExtractorFactory.createForSourceIndices(client, "validate_source_index_has_rows-" + startContext.config.getId(), - isTaskRestarting, startContext.config, - ActionListener.wrap( - dataFrameDataExtractorFactory -> - dataFrameDataExtractorFactory - .newExtractor(false) - .collectDataSummaryAsync(ActionListener.wrap( - dataSummary -> { - if (dataSummary.rows == 0) { - listener.onFailure(ExceptionsHelper.badRequestException( - "Unable to start {} as no documents in the source indices [{}] contained all the fields " - + "selected for analysis. If you are relying on automatic field selection then there are " - + "currently mapped fields that do not exist in any indexed documents, and you will have " - + "to switch to explicit field selection and include only fields that exist in indexed " - + "documents.", - startContext.config.getId(), - Strings.arrayToCommaDelimitedString(startContext.config.getSource().getIndex()) - )); - } else { - listener.onResponse(startContext); - } - }, - listener::onFailure - )), + startContext.extractedFields); + extractorFactory.newExtractor(false) + .collectDataSummaryAsync(ActionListener.wrap( + dataSummary -> { + if (dataSummary.rows == 0) { + listener.onFailure(ExceptionsHelper.badRequestException( + "Unable to start {} as no documents in the source indices [{}] contained all the fields " + + "selected for analysis. If you are relying on automatic field selection then there are " + + "currently mapped fields that do not exist in any indexed documents, and you will have " + + "to switch to explicit field selection and include only fields that exist in indexed " + + "documents.", + startContext.config.getId(), + Strings.arrayToCommaDelimitedString(startContext.config.getSource().getIndex()) + )); + } else { + listener.onResponse(startContext); + } + }, listener::onFailure )); } @@ -402,6 +400,7 @@ public class TransportStartDataFrameAnalyticsAction private final DataFrameAnalyticsConfig config; private final List progressOnStart; private final DataFrameAnalyticsTask.StartingState startingState; + private volatile ExtractedFields extractedFields; private StartContext(DataFrameAnalyticsConfig config, List progressOnStart) { this.config = config; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorFactory.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorFactory.java index ce21973ca91..f8afd229098 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorFactory.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/DataFrameDataExtractorFactory.java @@ -29,7 +29,7 @@ public class DataFrameDataExtractorFactory { private final Map headers; private final boolean includeRowsWithMissingValues; - private DataFrameDataExtractorFactory(Client client, String analyticsId, List indices, ExtractedFields extractedFields, + public DataFrameDataExtractorFactory(Client client, String analyticsId, List indices, ExtractedFields extractedFields, Map headers, boolean includeRowsWithMissingValues) { this.client = Objects.requireNonNull(client); this.analyticsId = Objects.requireNonNull(analyticsId); @@ -66,32 +66,19 @@ public class DataFrameDataExtractorFactory { } /** - * Validate and create a new extractor factory + * Create a new extractor factory * * The source index must exist and contain at least 1 compatible field or validations will fail. * * @param client ES Client used to make calls against the cluster * @param taskId The task id - * @param isTaskRestarting Whether the task is restarting or it is running for the first time * @param config The config from which to create the extractor factory - * @param listener The listener to notify on creation or failure + * @param extractedFields The fields to extract */ - public static void createForSourceIndices(Client client, - String taskId, - boolean isTaskRestarting, - DataFrameAnalyticsConfig config, - ActionListener listener) { - ExtractedFieldsDetectorFactory extractedFieldsDetectorFactory = new ExtractedFieldsDetectorFactory(client); - extractedFieldsDetectorFactory.createFromSource(config, isTaskRestarting, ActionListener.wrap( - extractedFieldsDetector -> { - ExtractedFields extractedFields = extractedFieldsDetector.detect(); - DataFrameDataExtractorFactory extractorFactory = new DataFrameDataExtractorFactory(client, taskId, - Arrays.asList(config.getSource().getIndex()), extractedFields, config.getHeaders(), - config.getAnalysis().supportsMissingValues()); - listener.onResponse(extractorFactory); - }, - listener::onFailure - )); + public static DataFrameDataExtractorFactory createForSourceIndices(Client client, String taskId, DataFrameAnalyticsConfig config, + ExtractedFields extractedFields) { + return new DataFrameDataExtractorFactory(client, taskId, Arrays.asList(config.getSource().getIndex()), extractedFields, + config.getHeaders(), config.getAnalysis().supportsMissingValues()); } /** @@ -111,7 +98,7 @@ public class DataFrameDataExtractorFactory { ExtractedFieldsDetectorFactory extractedFieldsDetectorFactory = new ExtractedFieldsDetectorFactory(client); extractedFieldsDetectorFactory.createFromDest(config, isTaskRestarting, ActionListener.wrap( extractedFieldsDetector -> { - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + ExtractedFields extractedFields = extractedFieldsDetector.detect().v1(); DataFrameDataExtractorFactory extractorFactory = new DataFrameDataExtractorFactory(client, config.getId(), Collections.singletonList(config.getDest().getIndex()), extractedFields, config.getHeaders(), config.getAnalysis().supportsMissingValues()); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java index 5d94b57aca5..682cc94433c 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetector.java @@ -11,6 +11,7 @@ import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.action.fieldcaps.FieldCapabilities; import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.regex.Regex; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.BooleanFieldMapper; @@ -19,6 +20,7 @@ import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsDest; import org.elasticsearch.xpack.core.ml.dataframe.analyses.RequiredField; import org.elasticsearch.xpack.core.ml.dataframe.analyses.Types; +import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection; import org.elasticsearch.xpack.core.ml.job.messages.Messages; import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; import org.elasticsearch.xpack.core.ml.utils.NameResolver; @@ -29,13 +31,12 @@ import org.elasticsearch.xpack.ml.extractor.ExtractedFields; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.HashSet; +import java.util.Comparator; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.Optional; import java.util.Set; import java.util.TreeSet; import java.util.stream.Collectors; @@ -57,9 +58,8 @@ public class ExtractedFieldsDetector { private final FieldCapabilitiesResponse fieldCapabilitiesResponse; private final Map fieldCardinalities; - ExtractedFieldsDetector(String[] index, DataFrameAnalyticsConfig config, boolean isTaskRestarting, - int docValueFieldsLimit, FieldCapabilitiesResponse fieldCapabilitiesResponse, - Map fieldCardinalities) { + ExtractedFieldsDetector(String[] index, DataFrameAnalyticsConfig config, boolean isTaskRestarting, int docValueFieldsLimit, + FieldCapabilitiesResponse fieldCapabilitiesResponse, Map fieldCardinalities) { this.index = Objects.requireNonNull(index); this.config = Objects.requireNonNull(config); this.isTaskRestarting = isTaskRestarting; @@ -68,8 +68,30 @@ public class ExtractedFieldsDetector { this.fieldCardinalities = Objects.requireNonNull(fieldCardinalities); } - public ExtractedFields detect() { - Set fields = getIncludedFields(); + public Tuple> detect() { + TreeSet fieldSelection = new TreeSet<>(Comparator.comparing(FieldSelection::getName)); + Set fields = getIncludedFields(fieldSelection); + checkFieldsHaveCompatibleTypes(fields); + checkRequiredFields(fields); + checkFieldsWithCardinalityLimit(); + ExtractedFields extractedFields = detectExtractedFields(fields, fieldSelection); + addIncludedFields(extractedFields, fieldSelection); + + return Tuple.tuple(extractedFields, Collections.unmodifiableList(new ArrayList<>(fieldSelection))); + } + + private Set getIncludedFields(Set fieldSelection) { + Set fields = new TreeSet<>(fieldCapabilitiesResponse.get().keySet()); + fields.removeAll(IGNORE_FIELDS); + checkResultsFieldIsNotPresent(); + removeFieldsUnderResultsField(fields); + FetchSourceContext analyzedFields = config.getAnalyzedFields(); + + // If the user has not explicitly included fields we'll include all compatible fields + if (analyzedFields == null || analyzedFields.includes().length == 0) { + removeFieldsWithIncompatibleTypes(fields, fieldSelection); + } + includeAndExcludeFields(fields, fieldSelection); if (fields.isEmpty()) { throw ExceptionsHelper.badRequestException("No compatible fields could be detected in index {}. Supported types are {}.", @@ -77,26 +99,19 @@ public class ExtractedFieldsDetector { getSupportedTypes()); } - checkNoIgnoredFields(fields); - checkFieldsHaveCompatibleTypes(fields); - checkRequiredFields(fields); - checkFieldsWithCardinalityLimit(); - return detectExtractedFields(fields); + return fields; } - private Set getIncludedFields() { - Set fields = new HashSet<>(fieldCapabilitiesResponse.get().keySet()); - checkResultsFieldIsNotPresent(); - removeFieldsUnderResultsField(fields); - FetchSourceContext analyzedFields = config.getAnalyzedFields(); - - // If the user has not explicitly included fields we'll include all compatible fields - if (analyzedFields == null || analyzedFields.includes().length == 0) { - fields.removeAll(IGNORE_FIELDS); - removeFieldsWithIncompatibleTypes(fields); + private void removeFieldsUnderResultsField(Set fields) { + String resultsField = config.getDest().getResultsField(); + Iterator fieldsIterator = fields.iterator(); + while (fieldsIterator.hasNext()) { + String field = fieldsIterator.next(); + if (field.startsWith(resultsField + ".")) { + fieldsIterator.remove(); + } } - includeAndExcludeFields(fields); - return fields; + fields.removeIf(field -> field.startsWith(resultsField + ".")); } private void checkResultsFieldIsNotPresent() { @@ -117,16 +132,21 @@ public class ExtractedFieldsDetector { } } - private void removeFieldsUnderResultsField(Set fields) { - // Ignore fields under the results object - fields.removeIf(field -> field.startsWith(config.getDest().getResultsField() + ".")); + private void addExcludedField(String field, String reason, Set fieldSelection) { + fieldSelection.add(FieldSelection.excluded(field, getMappingTypes(field), reason)); } - private void removeFieldsWithIncompatibleTypes(Set fields) { + private Set getMappingTypes(String field) { + Map fieldCaps = fieldCapabilitiesResponse.getField(field); + return fieldCaps == null ? Collections.emptySet() : fieldCaps.keySet(); + } + + private void removeFieldsWithIncompatibleTypes(Set fields, Set fieldSelection) { Iterator fieldsIterator = fields.iterator(); while (fieldsIterator.hasNext()) { String field = fieldsIterator.next(); if (hasCompatibleType(field) == false) { + addExcludedField(field, "unsupported type; supported types are " + getSupportedTypes(), fieldSelection); fieldsIterator.remove(); } } @@ -163,7 +183,7 @@ public class ExtractedFieldsDetector { return supportedTypes; } - private void includeAndExcludeFields(Set fields) { + private void includeAndExcludeFields(Set fields, Set fieldSelection) { FetchSourceContext analyzedFields = config.getAnalyzedFields(); if (analyzedFields == null) { return; @@ -188,18 +208,30 @@ public class ExtractedFieldsDetector { Messages.getMessage(Messages.DATA_FRAME_ANALYTICS_BAD_FIELD_FILTER, ex))) .expand(excludes, true); - fields.retainAll(includedSet); - fields.removeAll(excludedSet); + applyIncludesExcludes(fields, includedSet, excludedSet, fieldSelection); } catch (ResourceNotFoundException ex) { // Re-wrap our exception so that we throw the same exception type when there are no fields. throw ExceptionsHelper.badRequestException(ex.getMessage()); } } - private void checkNoIgnoredFields(Set fields) { - Optional ignoreField = IGNORE_FIELDS.stream().filter(fields::contains).findFirst(); - if (ignoreField.isPresent()) { - throw ExceptionsHelper.badRequestException("field [{}] cannot be analyzed", ignoreField.get()); + private void applyIncludesExcludes(Set fields, Set includes, Set excludes, + Set fieldSelection) { + Iterator fieldsIterator = fields.iterator(); + while (fieldsIterator.hasNext()) { + String field = fieldsIterator.next(); + if (includes.contains(field)) { + if (IGNORE_FIELDS.contains(field)) { + throw ExceptionsHelper.badRequestException("field [{}] cannot be analyzed", field); + } + } else { + fieldsIterator.remove(); + addExcludedField(field, "field not in includes list", fieldSelection); + } + if (excludes.contains(field)) { + fieldsIterator.remove(); + addExcludedField(field, "field in excludes list", fieldSelection); + } } } @@ -247,13 +279,10 @@ public class ExtractedFieldsDetector { } } - private ExtractedFields detectExtractedFields(Set fields) { - List sortedFields = new ArrayList<>(fields); - // We sort the fields to ensure the checksum for each document is deterministic - Collections.sort(sortedFields); - ExtractedFields extractedFields = ExtractedFields.build(sortedFields, Collections.emptySet(), fieldCapabilitiesResponse); + private ExtractedFields detectExtractedFields(Set fields, Set fieldSelection) { + ExtractedFields extractedFields = ExtractedFields.build(fields, Collections.emptySet(), fieldCapabilitiesResponse); boolean preferSource = extractedFields.getDocValueFields().size() > docValueFieldsLimit; - extractedFields = deduplicateMultiFields(extractedFields, preferSource); + extractedFields = deduplicateMultiFields(extractedFields, preferSource, fieldSelection); if (preferSource) { extractedFields = fetchFromSourceIfSupported(extractedFields); if (extractedFields.getDocValueFields().size() > docValueFieldsLimit) { @@ -266,7 +295,8 @@ public class ExtractedFieldsDetector { return extractedFields; } - private ExtractedFields deduplicateMultiFields(ExtractedFields extractedFields, boolean preferSource) { + private ExtractedFields deduplicateMultiFields(ExtractedFields extractedFields, boolean preferSource, + Set fieldSelection) { Set requiredFields = config.getAnalysis().getRequiredFields().stream().map(RequiredField::getName) .collect(Collectors.toSet()); Map nameOrParentToField = new LinkedHashMap<>(); @@ -276,43 +306,53 @@ public class ExtractedFieldsDetector { if (existingField != null) { ExtractedField parent = currentField.isMultiField() ? existingField : currentField; ExtractedField multiField = currentField.isMultiField() ? currentField : existingField; - nameOrParentToField.put(nameOrParent, chooseMultiFieldOrParent(preferSource, requiredFields, parent, multiField)); + nameOrParentToField.put(nameOrParent, + chooseMultiFieldOrParent(preferSource, requiredFields, parent, multiField, fieldSelection)); } } return new ExtractedFields(new ArrayList<>(nameOrParentToField.values())); } - private ExtractedField chooseMultiFieldOrParent(boolean preferSource, Set requiredFields, - ExtractedField parent, ExtractedField multiField) { + private ExtractedField chooseMultiFieldOrParent(boolean preferSource, Set requiredFields, ExtractedField parent, + ExtractedField multiField, Set fieldSelection) { // Check requirements first if (requiredFields.contains(parent.getName())) { + addExcludedField(multiField.getName(), "[" + parent.getName() + "] is required instead", fieldSelection); return parent; } if (requiredFields.contains(multiField.getName())) { + addExcludedField(parent.getName(), "[" + multiField.getName() + "] is required instead", fieldSelection); return multiField; } // If both are multi-fields it means there are several. In this case parent is the previous multi-field // we selected. We'll just keep that. if (parent.isMultiField() && multiField.isMultiField()) { + addExcludedField(multiField.getName(), "[" + parent.getName() + "] came first", fieldSelection); return parent; } // If we prefer source only the parent may support it. If it does we pick it immediately. if (preferSource && parent.supportsFromSource()) { + addExcludedField(multiField.getName(), "[" + parent.getName() + "] is preferred because it supports fetching from source", + fieldSelection); return parent; } // If any of the two is a doc_value field let's prefer it as it'd support aggregations. // We check the parent first as it'd be a shorter field name. if (parent.getMethod() == ExtractedField.Method.DOC_VALUE) { + addExcludedField(multiField.getName(), "[" + parent.getName() + "] is preferred because it is aggregatable", fieldSelection); return parent; } if (multiField.getMethod() == ExtractedField.Method.DOC_VALUE) { + addExcludedField(parent.getName(), "[" + multiField.getName() + "] is preferred because it is aggregatable", fieldSelection); return multiField; } // None is aggregatable. Let's pick the parent for its shorter name. + addExcludedField(multiField.getName(), "[" + parent.getName() + "] is preferred because none of the multi-fields are aggregatable", + fieldSelection); return parent; } @@ -343,6 +383,26 @@ public class ExtractedFieldsDetector { return new ExtractedFields(adjusted); } + private void addIncludedFields(ExtractedFields extractedFields, Set fieldSelection) { + Set requiredFields = config.getAnalysis().getRequiredFields().stream().map(RequiredField::getName) + .collect(Collectors.toSet()); + Set categoricalFields = getCategoricalFields(extractedFields); + for (ExtractedField includedField : extractedFields.getAllFields()) { + FieldSelection.FeatureType featureType = categoricalFields.contains(includedField.getName()) ? + FieldSelection.FeatureType.CATEGORICAL : FieldSelection.FeatureType.NUMERICAL; + fieldSelection.add(FieldSelection.included(includedField.getName(), includedField.getTypes(), + requiredFields.contains(includedField.getName()), featureType)); + } + } + + private Set getCategoricalFields(ExtractedFields extractedFields) { + return extractedFields.getAllFields().stream() + .filter(extractedField -> config.getAnalysis().getAllowedCategoricalTypes(extractedField.getName()) + .containsAll(extractedField.getTypes())) + .map(ExtractedField::getName) + .collect(Collectors.toSet()); + } + private static boolean isBoolean(Set types) { return types.size() == 1 && types.contains(BooleanFieldMapper.CONTENT_TYPE); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java index 2e5189eb249..6740f8d4d34 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/MemoryUsageEstimationProcessManager.java @@ -100,9 +100,9 @@ public class MemoryUsageEstimationProcessManager { } finally { process.consumeAndCloseOutputStream(); try { - LOGGER.info("[{}] Closing process", jobId); + LOGGER.debug("[{}] Closing process", jobId); process.close(); - LOGGER.info("[{}] Closed process", jobId); + LOGGER.debug("[{}] Closed process", jobId); } catch (Exception e) { String errorMsg = new ParameterizedMessage( diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestEstimateMemoryUsageAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestEstimateMemoryUsageAction.java deleted file mode 100644 index 25f2bcb4bb8..00000000000 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestEstimateMemoryUsageAction.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -package org.elasticsearch.xpack.ml.rest.dataframe; - -import org.elasticsearch.client.node.NodeClient; -import org.elasticsearch.rest.BaseRestHandler; -import org.elasticsearch.rest.RestController; -import org.elasticsearch.rest.RestRequest; -import org.elasticsearch.rest.action.RestToXContentListener; -import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction; -import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction; -import org.elasticsearch.xpack.ml.MachineLearning; - -import java.io.IOException; - -public class RestEstimateMemoryUsageAction extends BaseRestHandler { - - public RestEstimateMemoryUsageAction(RestController controller) { - controller.registerHandler( - RestRequest.Method.POST, - MachineLearning.BASE_PATH + "data_frame/analytics/_estimate_memory_usage", this); - } - - @Override - public String getName() { - return "ml_estimate_memory_usage_action"; - } - - @Override - protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException { - PutDataFrameAnalyticsAction.Request request = - PutDataFrameAnalyticsAction.Request.parseRequestForMemoryEstimation(restRequest.contentOrSourceParamParser()); - return channel -> client.execute(EstimateMemoryUsageAction.INSTANCE, request, new RestToXContentListener<>(channel)); - } -} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestExplainDataFrameAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestExplainDataFrameAnalyticsAction.java new file mode 100644 index 00000000000..b16bf7b3efb --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/dataframe/RestExplainDataFrameAnalyticsAction.java @@ -0,0 +1,84 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.rest.dataframe; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.client.node.NodeClient; +import org.elasticsearch.common.Strings; +import org.elasticsearch.rest.BaseRestHandler; +import org.elasticsearch.rest.RestController; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.rest.action.RestToXContentListener; +import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction; +import org.elasticsearch.xpack.core.ml.action.GetDataFrameAnalyticsAction; +import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction; +import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; +import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; +import org.elasticsearch.xpack.ml.MachineLearning; + +import java.io.IOException; +import java.util.List; +import java.util.stream.Collectors; + +public class RestExplainDataFrameAnalyticsAction extends BaseRestHandler { + + public RestExplainDataFrameAnalyticsAction(RestController controller) { + controller.registerHandler(RestRequest.Method.GET, MachineLearning.BASE_PATH + "data_frame/analytics/_explain", this); + controller.registerHandler(RestRequest.Method.POST, MachineLearning.BASE_PATH + "data_frame/analytics/_explain", this); + controller.registerHandler(RestRequest.Method.GET, MachineLearning.BASE_PATH + "data_frame/analytics/{" + + DataFrameAnalyticsConfig.ID.getPreferredName() + "}/_explain", this); + controller.registerHandler(RestRequest.Method.POST, MachineLearning.BASE_PATH + "data_frame/analytics/{" + + DataFrameAnalyticsConfig.ID.getPreferredName() + "}/_explain", this); + } + + @Override + public String getName() { + return "ml_explain_data_frame_analytics_action"; + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException { + final String jobId = restRequest.param(DataFrameAnalyticsConfig.ID.getPreferredName()); + + if (Strings.isNullOrEmpty(jobId) && restRequest.hasContentOrSourceParam() == false) { + throw ExceptionsHelper.badRequestException("Please provide a job [{}] or the config object", + DataFrameAnalyticsConfig.ID.getPreferredName()); + } + + if (Strings.isNullOrEmpty(jobId) == false && restRequest.hasContentOrSourceParam()) { + throw ExceptionsHelper.badRequestException("Please provide either a job [{}] or the config object but not both", + DataFrameAnalyticsConfig.ID.getPreferredName()); + } + + // We need to consume the body before returning + PutDataFrameAnalyticsAction.Request explainRequestFromBody = Strings.isNullOrEmpty(jobId) ? + PutDataFrameAnalyticsAction.Request.parseRequestForExplain(restRequest.contentOrSourceParamParser()) : null; + + return channel -> { + RestToXContentListener listener = new RestToXContentListener<>(channel); + + if (explainRequestFromBody != null) { + client.execute(ExplainDataFrameAnalyticsAction.INSTANCE, explainRequestFromBody, listener); + } else { + GetDataFrameAnalyticsAction.Request getRequest = new GetDataFrameAnalyticsAction.Request(jobId); + getRequest.setAllowNoResources(false); + client.execute(GetDataFrameAnalyticsAction.INSTANCE, getRequest, ActionListener.wrap( + getResponse -> { + List jobs = getResponse.getResources().results(); + if (jobs.size() > 1) { + listener.onFailure(ExceptionsHelper.badRequestException("expected only one config but matched {}", + jobs.stream().map(DataFrameAnalyticsConfig::getId).collect(Collectors.toList()))); + } else { + PutDataFrameAnalyticsAction.Request explainRequest = new PutDataFrameAnalyticsAction.Request(jobs.get(0)); + client.execute(ExplainDataFrameAnalyticsAction.INSTANCE, explainRequest, listener); + } + }, + listener::onFailure + )); + } + }; + } +} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java index 8f33c9bfbbf..5f7bd650a1c 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/extractor/ExtractedFieldsDetectorTests.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.ml.dataframe.extractor; import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.action.fieldcaps.FieldCapabilities; import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse; +import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; import org.elasticsearch.test.ESTestCase; @@ -17,6 +18,7 @@ import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsSource; import org.elasticsearch.xpack.core.ml.dataframe.analyses.Classification; import org.elasticsearch.xpack.core.ml.dataframe.analyses.OutlierDetection; import org.elasticsearch.xpack.core.ml.dataframe.analyses.Regression; +import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection; import org.elasticsearch.xpack.ml.extractor.ExtractedField; import org.elasticsearch.xpack.ml.extractor.ExtractedFields; import org.elasticsearch.xpack.ml.test.SearchHitBuilder; @@ -25,6 +27,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -48,12 +51,15 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List allFields = extractedFields.getAllFields(); + List allFields = fieldExtraction.v1().getAllFields(); assertThat(allFields.size(), equalTo(1)); assertThat(allFields.get(0).getName(), equalTo("some_float")); assertThat(allFields.get(0).getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("some_float", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL)); } public void testDetect_GivenNumericFieldWithMultipleTypes() { @@ -63,12 +69,16 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List allFields = extractedFields.getAllFields(); + List allFields = fieldExtraction.v1().getAllFields(); assertThat(allFields.size(), equalTo(1)); assertThat(allFields.get(0).getName(), equalTo("some_number")); assertThat(allFields.get(0).getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + + assertFieldSelectionContains(fieldExtraction.v2(), FieldSelection.included("some_number", + new HashSet<>(Arrays.asList("long", "integer", "short", "byte", "double", "float", "half_float", "scaled_float")), false, + FieldSelection.FeatureType.NUMERICAL)); } public void testDetect_GivenOutlierDetectionAndNonNumericField() { @@ -105,14 +115,22 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List allFields = extractedFields.getAllFields(); + List allFields = fieldExtraction.v1().getAllFields(); assertThat(allFields.size(), equalTo(3)); assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toSet()), containsInAnyOrder("some_float", "some_long", "some_boolean")); assertThat(allFields.stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), contains(equalTo(ExtractedField.Method.DOC_VALUE))); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("some_boolean", Collections.singleton("boolean"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.included("some_float", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.excluded("some_keyword", Collections.singleton("keyword"), "unsupported type; " + + "supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]"), + FieldSelection.included("some_long", Collections.singleton("long"), false, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenRegressionAndMultipleFields() { @@ -126,14 +144,22 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildRegressionConfig("foo"), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List allFields = extractedFields.getAllFields(); + List allFields = fieldExtraction.v1().getAllFields(); assertThat(allFields.size(), equalTo(5)); assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toList()), containsInAnyOrder("foo", "some_float", "some_keyword", "some_long", "some_boolean")); assertThat(allFields.stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), contains(equalTo(ExtractedField.Method.DOC_VALUE))); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("foo", Collections.singleton("double"), true, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.included("some_boolean", Collections.singleton("boolean"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.included("some_float", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.included("some_keyword", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.included("some_long", Collections.singleton("long"), false, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenRegressionAndRequiredFieldMissing() { @@ -191,11 +217,16 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(analyzedFields), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List allFields = extractedFields.getAllFields(); + List allFields = fieldExtraction.v1().getAllFields(); assertThat(allFields.size(), equalTo(1)); assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toList()), contains("bar")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("bar", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.excluded("foo", Collections.singleton("float"), "field in excludes list") + ); } public void testDetect_GivenRegressionAndRequiredFieldHasInvalidType() { @@ -258,14 +289,15 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { public void testDetect_GivenIncludedIgnoredField() { FieldCapabilitiesResponse fieldCapabilities = new MockFieldCapsResponseBuilder() - .addAggregatableField("_id", "float").build(); + .addAggregatableField("_id", "float") + .build(); FetchSourceContext analyzedFields = new FetchSourceContext(true, new String[]{"_id"}, new String[0]); ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(analyzedFields), false, 100, fieldCapabilities, Collections.emptyMap()); ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, () -> extractedFieldsDetector.detect()); - assertThat(e.getMessage(), equalTo("field [_id] cannot be analyzed")); + assertThat(e.getMessage(), equalTo("No field [_id] could be detected")); } public void testDetect_ShouldSortFieldsAlphabetically() { @@ -285,9 +317,9 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(sortedFields)); } @@ -333,11 +365,17 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(desiredFields), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(Arrays.asList("my_field1", "your_field2"))); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("my_field1", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.excluded("my_field1_nope", Collections.singleton("float"), "field in excludes list"), + FieldSelection.included("your_field2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenIncludedFieldHasUnsupportedType() { @@ -384,11 +422,18 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), true, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(Arrays.asList("my_field1", "your_field2"))); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("my_field1", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.included("your_field2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.excluded("your_keyword", Collections.singleton("keyword"), "unsupported type; supported types " + + "are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]") + ); } public void testDetect_GivenIncludedResultsField() { @@ -434,12 +479,12 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), true, 4, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3"))); - assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), + assertThat(fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), contains(equalTo(ExtractedField.Method.DOC_VALUE))); } @@ -453,12 +498,12 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), true, 3, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3"))); - assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), + assertThat(fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), contains(equalTo(ExtractedField.Method.DOC_VALUE))); } @@ -472,12 +517,12 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), true, 2, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3"))); - assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), + assertThat(fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()), contains(equalTo(ExtractedField.Method.SOURCE))); } @@ -488,14 +533,18 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List allFields = extractedFields.getAllFields(); + List allFields = fieldExtraction.v1().getAllFields(); assertThat(allFields.size(), equalTo(1)); ExtractedField booleanField = allFields.get(0); assertThat(booleanField.getTypes(), contains("boolean")); assertThat(booleanField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("some_boolean", Collections.singleton("boolean"), false, FieldSelection.FeatureType.NUMERICAL) + ); + SearchHit hit = new SearchHitBuilder(42).addField("some_boolean", true).build(); assertThat(booleanField.value(hit), arrayContaining(1)); @@ -514,14 +563,18 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildClassificationConfig("some_boolean"), false, 100, fieldCapabilities, Collections.singletonMap("some_boolean", 2L)); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - List allFields = extractedFields.getAllFields(); + List allFields = fieldExtraction.v1().getAllFields(); assertThat(allFields.size(), equalTo(1)); ExtractedField booleanField = allFields.get(0); assertThat(booleanField.getTypes(), contains("boolean")); assertThat(booleanField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE)); + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("some_boolean", Collections.singleton("boolean"), true, FieldSelection.FeatureType.CATEGORICAL) + ); + SearchHit hit = new SearchHitBuilder(42).addField("some_boolean", true).build(); assertThat(booleanField.value(hit), arrayContaining("true")); @@ -546,12 +599,26 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildRegressionConfig("a_float"), true, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(5)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(5)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("a_float", "keyword_1", "text_1.keyword", "text_2.keyword", "text_without_keyword")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("a_float", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.included("keyword_1", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("keyword_1.text", Collections.singleton("text"), + "[keyword_1] is preferred because it is aggregatable"), + FieldSelection.excluded("text_1", Collections.singleton("text"), + "[text_1.keyword] is preferred because it is aggregatable"), + FieldSelection.included("text_1.keyword", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("text_2", Collections.singleton("text"), + "[text_2.keyword] is preferred because it is aggregatable"), + FieldSelection.included("text_2.keyword", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.included("text_without_keyword", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL) + ); } public void testDetect_GivenMultiFieldAndParentIsRequired() { @@ -563,12 +630,19 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildClassificationConfig("field_1"), true, 100, fieldCapabilities, Collections.singletonMap("field_1", 2L)); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(2)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("field_1", "field_2")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("field_1", Collections.singleton("keyword"), true, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("field_1.keyword", Collections.singleton("keyword"), + "[field_1] is required instead"), + FieldSelection.included("field_2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenMultiFieldAndMultiFieldIsRequired() { @@ -581,12 +655,19 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildClassificationConfig("field_1.keyword"), true, 100, fieldCapabilities, Collections.singletonMap("field_1.keyword", 2L)); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(2)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("field_1.keyword", "field_2")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.excluded("field_1", Collections.singleton("keyword"), + "[field_1.keyword] is required instead"), + FieldSelection.included("field_1.keyword", Collections.singleton("keyword"), true, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.included("field_2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenSeveralMultiFields_ShouldPickFirstSorted() { @@ -600,12 +681,21 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildRegressionConfig("field_2"), true, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(2)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("field_1.keyword_1", "field_2")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.excluded("field_1", Collections.singleton("text"), + "[field_1.keyword_1] is preferred because it is aggregatable"), + FieldSelection.included("field_1.keyword_1", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("field_1.keyword_2", Collections.singleton("keyword"), "[field_1.keyword_1] came first"), + FieldSelection.excluded("field_1.keyword_3", Collections.singleton("keyword"), "[field_1.keyword_1] came first"), + FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenMultiFields_OverDocValueLimit() { @@ -617,12 +707,19 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildRegressionConfig("field_2"), true, 0, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(2)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("field_1", "field_2")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("field_1", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("field_1.keyword_1", Collections.singleton("keyword"), + "[field_1] is preferred because it supports fetching from source"), + FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenParentAndMultiFieldBothAggregatable() { @@ -635,12 +732,20 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildRegressionConfig("field_2.double"), true, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(2)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("field_1", "field_2.double")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("field_1", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("field_1.keyword", Collections.singleton("keyword"), + "[field_1] is preferred because it is aggregatable"), + FieldSelection.included("field_2.double", Collections.singleton("double"), true, FieldSelection.FeatureType.NUMERICAL), + FieldSelection.excluded("field_2.keyword", Collections.singleton("float"), "[field_2.double] is required instead") + ); } public void testDetect_GivenParentAndMultiFieldNoneAggregatable() { @@ -652,12 +757,19 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildRegressionConfig("field_2"), true, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(2)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("field_1", "field_2")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("field_1", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("field_1.text", Collections.singleton("text"), + "[field_1] is preferred because none of the multi-fields are aggregatable"), + FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL) + ); } public void testDetect_GivenMultiFields_AndExplicitlyIncludedFields() { @@ -670,12 +782,18 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector( SOURCE_INDEX, buildRegressionConfig("field_2", analyzedFields), false, 100, fieldCapabilities, Collections.emptyMap()); - ExtractedFields extractedFields = extractedFieldsDetector.detect(); + Tuple> fieldExtraction = extractedFieldsDetector.detect(); - assertThat(extractedFields.getAllFields().size(), equalTo(2)); - List extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName) + assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2)); + List extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName) .collect(Collectors.toList()); assertThat(extractedFieldNames, contains("field_1", "field_2")); + + assertFieldSelectionContains(fieldExtraction.v2(), + FieldSelection.included("field_1", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL), + FieldSelection.excluded("field_1.keyword", Collections.singleton("keyword"), "field not in includes list"), + FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL) + ); } private static DataFrameAnalyticsConfig buildOutlierDetectionConfig() { @@ -715,6 +833,21 @@ public class ExtractedFieldsDetectorTests extends ESTestCase { .build(); } + /** + * We assert each field individually to get useful error messages in case of failure + */ + private static void assertFieldSelectionContains(List actual, FieldSelection... expected) { + assertThat(actual.size(), equalTo(expected.length)); + for (int i = 0; i < expected.length; i++) { + assertThat("i = " + i, actual.get(i).getName(), equalTo(expected[i].getName())); + assertThat("i = " + i, actual.get(i).getMappingTypes(), equalTo(expected[i].getMappingTypes())); + assertThat("i = " + i, actual.get(i).isIncluded(), equalTo(expected[i].isIncluded())); + assertThat("i = " + i, actual.get(i).isRequired(), equalTo(expected[i].isRequired())); + assertThat("i = " + i, actual.get(i).getFeatureType(), equalTo(expected[i].getFeatureType())); + assertThat("i = " + i, actual.get(i).getReason(), equalTo(expected[i].getReason())); + } + } + private static class MockFieldCapsResponseBuilder { private final Map> fieldCaps = new HashMap<>(); diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.estimate_memory_usage.json b/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.estimate_memory_usage.json deleted file mode 100644 index 99bd6527de3..00000000000 --- a/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.estimate_memory_usage.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "ml.estimate_memory_usage": { - "documentation": { - "url": "http://www.elastic.co/guide/en/elasticsearch/reference/current/estimate-memory-usage-dfanalytics.html" - }, - "stability": "experimental", - "url": { - "paths" : [ - { - "path" : "/_ml/data_frame/analytics/_estimate_memory_usage", - "methods": [ "POST" ], - "parts": {} - } - ] - }, - "body": { - "description" : "Memory usage estimation definition", - "required" : true - } - } -} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.explain_data_frame_analytics.json b/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.explain_data_frame_analytics.json new file mode 100644 index 00000000000..6969cf9a49f --- /dev/null +++ b/x-pack/plugin/src/test/resources/rest-api-spec/api/ml.explain_data_frame_analytics.json @@ -0,0 +1,31 @@ +{ + "ml.explain_data_frame_analytics": { + "documentation": { + "url": "http://www.elastic.co/guide/en/elasticsearch/reference/current/explain-dfanalytics.html" + }, + "stability": "experimental", + "url": { + "paths" : [ + { + "path" : "/_ml/data_frame/analytics/_explain", + "methods": [ "GET", "POST" ], + "parts": {} + }, + { + "path" : "/_ml/data_frame/analytics/{id}/_explain", + "methods": [ "GET", "POST" ], + "parts":{ + "id":{ + "type":"string", + "description":"The ID of the data frame analytics to explain" + } + } + } + ] + }, + "body": { + "description" : "The data frame analytics config to explain", + "required" : false + } + } +} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml deleted file mode 100644 index 39fe8005fa8..00000000000 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_memory_usage_estimation.yml +++ /dev/null @@ -1,84 +0,0 @@ ---- -setup: - - do: - indices.create: - index: index-source - body: - mappings: - properties: - x: - type: float - y: - type: float - ---- -"Test memory usage estimation for empty data frame": - - do: - catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/ - ml.estimate_memory_usage: - body: - source: { index: "index-source" } - analysis: { outlier_detection: {} } - - - do: - index: - index: index-source - refresh: true - body: { x: 1 } - - match: { result: "created" } - - # Note that value for "y" is missing and outlier detection analysis does not support missing values. - # Hence, the data frame is still considered empty. - - do: - catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/ - ml.estimate_memory_usage: - body: - source: { index: "index-source" } - analysis: { outlier_detection: {} } - ---- -"Test memory usage estimation for non-empty data frame": - - do: - index: - index: index-source - refresh: true - body: { x: 1, y: 10 } - - match: { result: "created" } - - - do: - ml.estimate_memory_usage: - body: - source: { index: "index-source" } - analysis: { outlier_detection: {} } - - match: { expected_memory_without_disk: "3kb" } - - match: { expected_memory_with_disk: "3kb" } - - - do: - index: - index: index-source - refresh: true - body: { x: 2, y: 20 } - - match: { result: "created" } - - - do: - ml.estimate_memory_usage: - body: - source: { index: "index-source" } - analysis: { outlier_detection: {} } - - match: { expected_memory_without_disk: "4kb" } - - match: { expected_memory_with_disk: "4kb" } - - - do: - index: - index: index-source - refresh: true - body: { x: 3, y: 30 } - - match: { result: "created" } - - - do: - ml.estimate_memory_usage: - body: - source: { index: "index-source" } - analysis: { outlier_detection: {} } - - match: { expected_memory_without_disk: "6kb" } - - match: { expected_memory_with_disk: "5kb" } diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/explain_data_frame_analytics.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/explain_data_frame_analytics.yml new file mode 100644 index 00000000000..f4296427256 --- /dev/null +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/explain_data_frame_analytics.yml @@ -0,0 +1,308 @@ +--- +"Test neither job id nor body": + - do: + catch: /Please provide a job \[id\] or the config object/ + ml.explain_data_frame_analytics: + id: "" + +--- +"Test both job id and body": + - do: + catch: /Please provide either a job \[id\] or the config object but not both/ + ml.explain_data_frame_analytics: + id: "foo" + body: + source: { index: "index-source" } + analysis: { outlier_detection: {} } + +--- +"Test missing job": + - do: + catch: missing + ml.explain_data_frame_analytics: + id: "no_such_job" + +--- +"Test id that matches multiple jobs": + + - do: + indices.create: + index: index-source + + - do: + ml.put_data_frame_analytics: + id: "foo-1" + body: > + { + "source": { + "index": "index-source" + }, + "dest": { + "index": "index-dest" + }, + "analysis": {"outlier_detection":{}} + } + + - do: + ml.put_data_frame_analytics: + id: "foo-2" + body: > + { + "source": { + "index": "index-source" + }, + "dest": { + "index": "index-dest" + }, + "analysis": {"outlier_detection":{}} + } + + - do: + catch: /expected only one config but matched \[foo-1, foo-2\]/ + ml.explain_data_frame_analytics: + id: "foo-*" + +--- +"Test empty data frame given body": + + - do: + indices.create: + index: index-source + body: + mappings: + properties: + x: + type: float + y: + type: float + + - do: + catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/ + ml.explain_data_frame_analytics: + body: + source: { index: "index-source" } + analysis: { outlier_detection: {} } + + - do: + index: + index: index-source + refresh: true + body: { x: 1 } + - match: { result: "created" } + + # Note that value for "y" is missing and outlier detection analysis does not support missing values. + # Hence, the data frame is still considered empty. + - do: + catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/ + ml.explain_data_frame_analytics: + body: + source: { index: "index-source" } + analysis: { outlier_detection: {} } + +--- +"Test non-empty data frame given body": + + - do: + indices.create: + index: index-source + body: + mappings: + properties: + x: + type: float + y: + type: float + + - do: + index: + index: index-source + refresh: true + body: { x: 1, y: 10 } + - match: { result: "created" } + + - do: + ml.explain_data_frame_analytics: + body: + source: { index: "index-source" } + analysis: { outlier_detection: {} } + - match: { memory_estimation.expected_memory_without_disk: "3kb" } + - match: { memory_estimation.expected_memory_with_disk: "3kb" } + - length: { field_selection: 2 } + - match: { field_selection.0.name: "x" } + - match: { field_selection.0.mapping_types: ["float"] } + - match: { field_selection.0.is_included: true } + - match: { field_selection.0.is_required: false } + - match: { field_selection.0.feature_type: "numerical" } + - is_false: field_selection.0.reason + - match: { field_selection.1.name: "y" } + - match: { field_selection.1.mapping_types: ["float"] } + - match: { field_selection.1.is_included: true } + - match: { field_selection.1.is_required: false } + - match: { field_selection.1.feature_type: "numerical" } + - is_false: field_selection.1.reason + + - do: + index: + index: index-source + refresh: true + body: { x: 2, y: 20 } + - match: { result: "created" } + + - do: + ml.explain_data_frame_analytics: + body: + source: { index: "index-source" } + analysis: { outlier_detection: {} } + - match: { memory_estimation.expected_memory_without_disk: "4kb" } + - match: { memory_estimation.expected_memory_with_disk: "4kb" } + + - do: + index: + index: index-source + refresh: true + body: { x: 3, y: 30 } + - match: { result: "created" } + + - do: + ml.explain_data_frame_analytics: + body: + source: { index: "index-source" } + analysis: { outlier_detection: {} } + - match: { memory_estimation.expected_memory_without_disk: "6kb" } + - match: { memory_estimation.expected_memory_with_disk: "5kb" } + +--- +"Test field_selection given body": + + - do: + indices.create: + index: index-source + body: + mappings: + properties: + field_1: + type: integer + field_2: + type: double + field_3: + type: date + + - do: + index: + index: index-source + refresh: true + body: { field_1: 3, field_2: 3.14, field_3: "2019-11-11T00:00:00", field_4: "blah" } + - match: { result: "created" } + + - do: + ml.explain_data_frame_analytics: + body: + source: { index: "index-source" } + analysis: { regression: { dependent_variable: "field_1" } } + - is_true: memory_estimation.expected_memory_without_disk + - is_true: memory_estimation.expected_memory_with_disk + - length: { field_selection: 5 } + - match: { field_selection.0.name: "field_1" } + - match: { field_selection.0.mapping_types: ["integer"] } + - match: { field_selection.0.is_included: true } + - match: { field_selection.0.is_required: true } + - match: { field_selection.0.feature_type: "numerical" } + - is_false: field_selection.0.reason + - match: { field_selection.1.name: "field_2" } + - match: { field_selection.1.mapping_types: ["double"] } + - match: { field_selection.1.is_included: true } + - match: { field_selection.1.is_required: false } + - match: { field_selection.1.feature_type: "numerical" } + - is_false: field_selection.1.reason + - match: { field_selection.2.name: "field_3" } + - match: { field_selection.2.mapping_types: ["date"] } + - match: { field_selection.2.is_included: false } + - match: { field_selection.2.is_required: false } + - is_false: field_selection.2.feature_type + - match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text]" } + - match: { field_selection.3.name: "field_4" } + - match: { field_selection.3.mapping_types: ["text"] } + - match: { field_selection.3.is_included: false } + - match: { field_selection.3.is_required: false } + - is_false: field_selection.3.feature_type + - match: { field_selection.3.reason: "[field_4.keyword] is preferred because it is aggregatable" } + - match: { field_selection.4.name: "field_4.keyword" } + - match: { field_selection.4.mapping_types: ["keyword"] } + - match: { field_selection.4.is_included: true } + - match: { field_selection.4.is_required: false } + - match: { field_selection.4.feature_type: "categorical" } + - is_false: field_selection.4.reason + +--- +"Test field_selection given job": + + - do: + indices.create: + index: index-source + body: + mappings: + properties: + field_1: + type: integer + field_2: + type: double + field_3: + type: date + + - do: + index: + index: index-source + refresh: true + body: { field_1: 3, field_2: 3.14, field_3: "2019-11-11T00:00:00", field_4: "blah" } + - match: { result: "created" } + + - do: + ml.put_data_frame_analytics: + id: "got-a-job-for-this-one" + body: > + { + "source": { + "index": "index-source" + }, + "dest": { + "index": "index-dest" + }, + "analysis": {"regression":{ "dependent_variable": "field_1" }} + } + + - do: + ml.explain_data_frame_analytics: + id: "got-a-job-for-this-one" + - is_true: memory_estimation.expected_memory_without_disk + - is_true: memory_estimation.expected_memory_with_disk + - length: { field_selection: 5 } + - match: { field_selection.0.name: "field_1" } + - match: { field_selection.0.mapping_types: ["integer"] } + - match: { field_selection.0.is_included: true } + - match: { field_selection.0.is_required: true } + - match: { field_selection.0.feature_type: "numerical" } + - is_false: field_selection.0.reason + - match: { field_selection.1.name: "field_2" } + - match: { field_selection.1.mapping_types: ["double"] } + - match: { field_selection.1.is_included: true } + - match: { field_selection.1.is_required: false } + - match: { field_selection.1.feature_type: "numerical" } + - is_false: field_selection.1.reason + - match: { field_selection.2.name: "field_3" } + - match: { field_selection.2.mapping_types: ["date"] } + - match: { field_selection.2.is_included: false } + - match: { field_selection.2.is_required: false } + - is_false: field_selection.2.feature_type + - match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text]" } + - match: { field_selection.3.name: "field_4" } + - match: { field_selection.3.mapping_types: ["text"] } + - match: { field_selection.3.is_included: false } + - match: { field_selection.3.is_required: false } + - is_false: field_selection.3.feature_type + - match: { field_selection.3.reason: "[field_4.keyword] is preferred because it is aggregatable" } + - match: { field_selection.4.name: "field_4.keyword" } + - match: { field_selection.4.mapping_types: ["keyword"] } + - match: { field_selection.4.is_included: true } + - match: { field_selection.4.is_required: false } + - match: { field_selection.4.feature_type: "categorical" } + - is_false: field_selection.4.reason