This commit replaces the _estimate_memory_usage API with a new API, the _explain API. The API consolidates information that is useful before creating a data frame analytics job. It includes: - memory estimation - field selection explanation Memory estimation is moved here from what was previously calculated in the _estimate_memory_usage API. Field selection is a new feature that explains to the user whether each available field was selected to be included or not in the analysis. In the case it was not included, it also explains the reason why. Backport of #49455
This commit is contained in:
parent
69f570ea5f
commit
8eaee7cbdc
|
@ -29,6 +29,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.elasticsearch.client.RequestConverters.EndpointBuilder;
|
||||
import org.elasticsearch.client.core.PageParams;
|
||||
import org.elasticsearch.client.ml.CloseJobRequest;
|
||||
import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest;
|
||||
import org.elasticsearch.client.ml.DeleteCalendarEventRequest;
|
||||
import org.elasticsearch.client.ml.DeleteCalendarJobRequest;
|
||||
import org.elasticsearch.client.ml.DeleteCalendarRequest;
|
||||
|
@ -701,12 +702,17 @@ final class MLRequestConverters {
|
|||
return request;
|
||||
}
|
||||
|
||||
static Request estimateMemoryUsage(PutDataFrameAnalyticsRequest estimateRequest) throws IOException {
|
||||
String endpoint = new EndpointBuilder()
|
||||
.addPathPartAsIs("_ml", "data_frame", "analytics", "_estimate_memory_usage")
|
||||
.build();
|
||||
Request request = new Request(HttpPost.METHOD_NAME, endpoint);
|
||||
request.setEntity(createEntity(estimateRequest, REQUEST_BODY_CONTENT_TYPE));
|
||||
static Request explainDataFrameAnalytics(ExplainDataFrameAnalyticsRequest explainRequest) throws IOException {
|
||||
EndpointBuilder endpoint = new EndpointBuilder().addPathPartAsIs("_ml", "data_frame", "analytics");
|
||||
if (explainRequest.getId() != null) {
|
||||
endpoint.addPathPart(explainRequest.getId());
|
||||
}
|
||||
endpoint.addPathPartAsIs("_explain");
|
||||
|
||||
Request request = new Request(HttpPost.METHOD_NAME, endpoint.build());
|
||||
if (explainRequest.getConfig() != null) {
|
||||
request.setEntity(createEntity(explainRequest.getConfig(), REQUEST_BODY_CONTENT_TYPE));
|
||||
}
|
||||
return request;
|
||||
}
|
||||
|
||||
|
|
|
@ -22,6 +22,8 @@ import org.elasticsearch.action.ActionListener;
|
|||
import org.elasticsearch.action.support.master.AcknowledgedResponse;
|
||||
import org.elasticsearch.client.ml.CloseJobRequest;
|
||||
import org.elasticsearch.client.ml.CloseJobResponse;
|
||||
import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest;
|
||||
import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsResponse;
|
||||
import org.elasticsearch.client.ml.DeleteCalendarEventRequest;
|
||||
import org.elasticsearch.client.ml.DeleteCalendarJobRequest;
|
||||
import org.elasticsearch.client.ml.DeleteCalendarRequest;
|
||||
|
@ -34,7 +36,6 @@ import org.elasticsearch.client.ml.DeleteForecastRequest;
|
|||
import org.elasticsearch.client.ml.DeleteJobRequest;
|
||||
import org.elasticsearch.client.ml.DeleteJobResponse;
|
||||
import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
|
||||
import org.elasticsearch.client.ml.EstimateMemoryUsageResponse;
|
||||
import org.elasticsearch.client.ml.EvaluateDataFrameRequest;
|
||||
import org.elasticsearch.client.ml.EvaluateDataFrameResponse;
|
||||
import org.elasticsearch.client.ml.FindFileStructureRequest;
|
||||
|
@ -2249,46 +2250,46 @@ public final class MachineLearningClient {
|
|||
}
|
||||
|
||||
/**
|
||||
* Estimates memory usage for the given Data Frame Analytics
|
||||
* Explains the given Data Frame Analytics
|
||||
* <p>
|
||||
* For additional info
|
||||
* see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/estimate-memory-usage-dfanalytics.html">
|
||||
* Estimate Memory Usage for Data Frame Analytics documentation</a>
|
||||
* see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/explain-dfanalytics.html">
|
||||
* Explain Data Frame Analytics documentation</a>
|
||||
*
|
||||
* @param request The {@link PutDataFrameAnalyticsRequest}
|
||||
* @param request The {@link ExplainDataFrameAnalyticsRequest}
|
||||
* @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
|
||||
* @return {@link EstimateMemoryUsageResponse} response object
|
||||
* @return {@link ExplainDataFrameAnalyticsResponse} response object
|
||||
* @throws IOException when there is a serialization issue sending the request or receiving the response
|
||||
*/
|
||||
public EstimateMemoryUsageResponse estimateMemoryUsage(PutDataFrameAnalyticsRequest request,
|
||||
RequestOptions options) throws IOException {
|
||||
public ExplainDataFrameAnalyticsResponse explainDataFrameAnalytics(ExplainDataFrameAnalyticsRequest request,
|
||||
RequestOptions options) throws IOException {
|
||||
return restHighLevelClient.performRequestAndParseEntity(
|
||||
request,
|
||||
MLRequestConverters::estimateMemoryUsage,
|
||||
MLRequestConverters::explainDataFrameAnalytics,
|
||||
options,
|
||||
EstimateMemoryUsageResponse::fromXContent,
|
||||
ExplainDataFrameAnalyticsResponse::fromXContent,
|
||||
Collections.emptySet());
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimates memory usage for the given Data Frame Analytics asynchronously and notifies listener upon completion
|
||||
* Explains the given Data Frame Analytics asynchronously and notifies listener upon completion
|
||||
* <p>
|
||||
* For additional info
|
||||
* see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/estimate-memory-usage-dfanalytics.html">
|
||||
* Estimate Memory Usage for Data Frame Analytics documentation</a>
|
||||
* see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/explain-dfanalytics.html">
|
||||
* Explain Data Frame Analytics documentation</a>
|
||||
*
|
||||
* @param request The {@link PutDataFrameAnalyticsRequest}
|
||||
* @param request The {@link ExplainDataFrameAnalyticsRequest}
|
||||
* @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
|
||||
* @param listener Listener to be notified upon request completion
|
||||
* @return cancellable that may be used to cancel the request
|
||||
*/
|
||||
public Cancellable estimateMemoryUsageAsync(PutDataFrameAnalyticsRequest request, RequestOptions options,
|
||||
ActionListener<EstimateMemoryUsageResponse> listener) {
|
||||
public Cancellable explainDataFrameAnalyticsAsync(ExplainDataFrameAnalyticsRequest request, RequestOptions options,
|
||||
ActionListener<ExplainDataFrameAnalyticsResponse> listener) {
|
||||
return restHighLevelClient.performRequestAsyncAndParseEntity(
|
||||
request,
|
||||
MLRequestConverters::estimateMemoryUsage,
|
||||
MLRequestConverters::explainDataFrameAnalytics,
|
||||
options,
|
||||
EstimateMemoryUsageResponse::fromXContent,
|
||||
ExplainDataFrameAnalyticsResponse::fromXContent,
|
||||
listener,
|
||||
Collections.emptySet());
|
||||
}
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.client.ml;
|
||||
|
||||
import org.elasticsearch.client.Validatable;
|
||||
import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsConfig;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Request to explain the following about a data frame analytics job:
|
||||
* <ul>
|
||||
* <li>field selection: which fields are included or are not in the analysis</li>
|
||||
* <li>memory estimation: how much memory the job is estimated to require</li>
|
||||
* </ul>
|
||||
*/
|
||||
public class ExplainDataFrameAnalyticsRequest implements Validatable {
|
||||
|
||||
private final String id;
|
||||
private final DataFrameAnalyticsConfig config;
|
||||
|
||||
public ExplainDataFrameAnalyticsRequest(String id) {
|
||||
this.id = Objects.requireNonNull(id);
|
||||
this.config = null;
|
||||
}
|
||||
|
||||
public ExplainDataFrameAnalyticsRequest(DataFrameAnalyticsConfig config) {
|
||||
this.id = null;
|
||||
this.config = Objects.requireNonNull(config);
|
||||
}
|
||||
|
||||
@Nullable
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
public DataFrameAnalyticsConfig getConfig() {
|
||||
return config;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
ExplainDataFrameAnalyticsRequest other = (ExplainDataFrameAnalyticsRequest) o;
|
||||
return Objects.equals(id, other.id) && Objects.equals(config, other.config);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(id, config);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.client.ml;
|
||||
|
||||
import org.elasticsearch.client.ml.dataframe.explain.FieldSelection;
|
||||
import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
|
||||
import org.elasticsearch.common.xcontent.ToXContentObject;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
public class ExplainDataFrameAnalyticsResponse implements ToXContentObject {
|
||||
|
||||
public static final ParseField TYPE = new ParseField("explain_data_frame_analytics_response");
|
||||
|
||||
public static final ParseField FIELD_SELECTION = new ParseField("field_selection");
|
||||
public static final ParseField MEMORY_ESTIMATION = new ParseField("memory_estimation");
|
||||
|
||||
public static ExplainDataFrameAnalyticsResponse fromXContent(XContentParser parser) throws IOException {
|
||||
return PARSER.parse(parser, null);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
static final ConstructingObjectParser<ExplainDataFrameAnalyticsResponse, Void> PARSER =
|
||||
new ConstructingObjectParser<>(
|
||||
TYPE.getPreferredName(), true,
|
||||
args -> new ExplainDataFrameAnalyticsResponse((List<FieldSelection>) args[0], (MemoryEstimation) args[1]));
|
||||
|
||||
static {
|
||||
PARSER.declareObjectArray(ConstructingObjectParser.constructorArg(), FieldSelection.PARSER, FIELD_SELECTION);
|
||||
PARSER.declareObject(ConstructingObjectParser.constructorArg(), MemoryEstimation.PARSER, MEMORY_ESTIMATION);
|
||||
}
|
||||
|
||||
private final List<FieldSelection> fieldSelection;
|
||||
private final MemoryEstimation memoryEstimation;
|
||||
|
||||
public ExplainDataFrameAnalyticsResponse(List<FieldSelection> fieldSelection, MemoryEstimation memoryEstimation) {
|
||||
this.fieldSelection = Objects.requireNonNull(fieldSelection);
|
||||
this.memoryEstimation = Objects.requireNonNull(memoryEstimation);
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject();
|
||||
builder.field(FIELD_SELECTION.getPreferredName(), fieldSelection);
|
||||
builder.field(MEMORY_ESTIMATION.getPreferredName(), memoryEstimation);
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (this == other) return true;
|
||||
if (other == null || getClass() != other.getClass()) return false;
|
||||
|
||||
ExplainDataFrameAnalyticsResponse that = (ExplainDataFrameAnalyticsResponse) other;
|
||||
return Objects.equals(fieldSelection, that.fieldSelection)
|
||||
&& Objects.equals(memoryEstimation, that.memoryEstimation);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(fieldSelection, memoryEstimation);
|
||||
}
|
||||
|
||||
public MemoryEstimation getMemoryEstimation() {
|
||||
return memoryEstimation;
|
||||
}
|
||||
|
||||
public List<FieldSelection> getFieldSelection() {
|
||||
return fieldSelection;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,163 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.client.ml.dataframe.explain;
|
||||
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
|
||||
import org.elasticsearch.common.xcontent.ObjectParser;
|
||||
import org.elasticsearch.common.xcontent.ToXContentObject;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
public class FieldSelection implements ToXContentObject {
|
||||
|
||||
private static final ParseField NAME = new ParseField("name");
|
||||
private static final ParseField MAPPING_TYPES = new ParseField("mapping_types");
|
||||
private static final ParseField IS_INCLUDED = new ParseField("is_included");
|
||||
private static final ParseField IS_REQUIRED = new ParseField("is_required");
|
||||
private static final ParseField FEATURE_TYPE = new ParseField("feature_type");
|
||||
private static final ParseField REASON = new ParseField("reason");
|
||||
|
||||
public enum FeatureType {
|
||||
CATEGORICAL, NUMERICAL;
|
||||
|
||||
public static FeatureType fromString(String value) {
|
||||
return FeatureType.valueOf(value.toUpperCase(Locale.ROOT));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return name().toLowerCase(Locale.ROOT);
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public static ConstructingObjectParser<FieldSelection, Void> PARSER = new ConstructingObjectParser<>("field_selection", true,
|
||||
a -> new FieldSelection((String) a[0], new HashSet<>((List<String>) a[1]), (boolean) a[2], (boolean) a[3], (FeatureType) a[4],
|
||||
(String) a[5]));
|
||||
|
||||
static {
|
||||
PARSER.declareString(ConstructingObjectParser.constructorArg(), NAME);
|
||||
PARSER.declareStringArray(ConstructingObjectParser.constructorArg(), MAPPING_TYPES);
|
||||
PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_INCLUDED);
|
||||
PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_REQUIRED);
|
||||
PARSER.declareField(ConstructingObjectParser.optionalConstructorArg(), p -> {
|
||||
if (p.currentToken() == XContentParser.Token.VALUE_STRING) {
|
||||
return FeatureType.fromString(p.text());
|
||||
}
|
||||
throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]");
|
||||
}, FEATURE_TYPE, ObjectParser.ValueType.STRING);
|
||||
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), REASON);
|
||||
}
|
||||
|
||||
private final String name;
|
||||
private final Set<String> mappingTypes;
|
||||
private final boolean isIncluded;
|
||||
private final boolean isRequired;
|
||||
private final FeatureType featureType;
|
||||
private final String reason;
|
||||
|
||||
public static FieldSelection included(String name, Set<String> mappingTypes, boolean isRequired, FeatureType featureType) {
|
||||
return new FieldSelection(name, mappingTypes, true, isRequired, featureType, null);
|
||||
}
|
||||
|
||||
public static FieldSelection excluded(String name, Set<String> mappingTypes, String reason) {
|
||||
return new FieldSelection(name, mappingTypes, false, false, null, reason);
|
||||
}
|
||||
|
||||
FieldSelection(String name, Set<String> mappingTypes, boolean isIncluded, boolean isRequired, @Nullable FeatureType featureType,
|
||||
@Nullable String reason) {
|
||||
this.name = Objects.requireNonNull(name);
|
||||
this.mappingTypes = Collections.unmodifiableSet(mappingTypes);
|
||||
this.isIncluded = isIncluded;
|
||||
this.isRequired = isRequired;
|
||||
this.featureType = featureType;
|
||||
this.reason = reason;
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject();
|
||||
builder.field(NAME.getPreferredName(), name);
|
||||
builder.field(MAPPING_TYPES.getPreferredName(), mappingTypes);
|
||||
builder.field(IS_INCLUDED.getPreferredName(), isIncluded);
|
||||
builder.field(IS_REQUIRED.getPreferredName(), isRequired);
|
||||
if (featureType != null) {
|
||||
builder.field(FEATURE_TYPE.getPreferredName(), featureType);
|
||||
}
|
||||
if (reason != null) {
|
||||
builder.field(REASON.getPreferredName(), reason);
|
||||
}
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
FieldSelection that = (FieldSelection) o;
|
||||
return Objects.equals(name, that.name)
|
||||
&& Objects.equals(mappingTypes, that.mappingTypes)
|
||||
&& isIncluded == that.isIncluded
|
||||
&& isRequired == that.isRequired
|
||||
&& Objects.equals(featureType, that.featureType)
|
||||
&& Objects.equals(reason, that.reason);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(name, mappingTypes, isIncluded, isRequired, featureType, reason);
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public Set<String> getMappingTypes() {
|
||||
return mappingTypes;
|
||||
}
|
||||
|
||||
public boolean isIncluded() {
|
||||
return isIncluded;
|
||||
}
|
||||
|
||||
public boolean isRequired() {
|
||||
return isRequired;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
public FeatureType getFeatureType() {
|
||||
return featureType;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
public String getReason() {
|
||||
return reason;
|
||||
}
|
||||
}
|
|
@ -16,8 +16,7 @@
|
|||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.client.ml;
|
||||
package org.elasticsearch.client.ml.dataframe.explain;
|
||||
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
|
@ -26,23 +25,19 @@ import org.elasticsearch.common.xcontent.ConstructingObjectParser;
|
|||
import org.elasticsearch.common.xcontent.ObjectParser;
|
||||
import org.elasticsearch.common.xcontent.ToXContentObject;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
|
||||
import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg;
|
||||
|
||||
public class EstimateMemoryUsageResponse implements ToXContentObject {
|
||||
public class MemoryEstimation implements ToXContentObject {
|
||||
|
||||
public static final ParseField EXPECTED_MEMORY_WITHOUT_DISK = new ParseField("expected_memory_without_disk");
|
||||
public static final ParseField EXPECTED_MEMORY_WITH_DISK = new ParseField("expected_memory_with_disk");
|
||||
|
||||
static final ConstructingObjectParser<EstimateMemoryUsageResponse, Void> PARSER =
|
||||
new ConstructingObjectParser<>(
|
||||
"estimate_memory_usage_response",
|
||||
true,
|
||||
args -> new EstimateMemoryUsageResponse((ByteSizeValue) args[0], (ByteSizeValue) args[1]));
|
||||
public static final ConstructingObjectParser<MemoryEstimation, Void> PARSER = new ConstructingObjectParser<>("memory_estimation", true,
|
||||
a -> new MemoryEstimation((ByteSizeValue) a[0], (ByteSizeValue) a[1]));
|
||||
|
||||
static {
|
||||
PARSER.declareField(
|
||||
|
@ -57,14 +52,10 @@ public class EstimateMemoryUsageResponse implements ToXContentObject {
|
|||
ObjectParser.ValueType.VALUE);
|
||||
}
|
||||
|
||||
public static EstimateMemoryUsageResponse fromXContent(XContentParser parser) {
|
||||
return PARSER.apply(parser, null);
|
||||
}
|
||||
|
||||
private final ByteSizeValue expectedMemoryWithoutDisk;
|
||||
private final ByteSizeValue expectedMemoryWithDisk;
|
||||
|
||||
public EstimateMemoryUsageResponse(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) {
|
||||
public MemoryEstimation(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) {
|
||||
this.expectedMemoryWithoutDisk = expectedMemoryWithoutDisk;
|
||||
this.expectedMemoryWithDisk = expectedMemoryWithDisk;
|
||||
}
|
||||
|
@ -99,7 +90,7 @@ public class EstimateMemoryUsageResponse implements ToXContentObject {
|
|||
return false;
|
||||
}
|
||||
|
||||
EstimateMemoryUsageResponse that = (EstimateMemoryUsageResponse) other;
|
||||
MemoryEstimation that = (MemoryEstimation) other;
|
||||
return Objects.equals(expectedMemoryWithoutDisk, that.expectedMemoryWithoutDisk)
|
||||
&& Objects.equals(expectedMemoryWithDisk, that.expectedMemoryWithDisk);
|
||||
}
|
|
@ -25,6 +25,7 @@ import org.apache.http.client.methods.HttpPost;
|
|||
import org.apache.http.client.methods.HttpPut;
|
||||
import org.elasticsearch.client.core.PageParams;
|
||||
import org.elasticsearch.client.ml.CloseJobRequest;
|
||||
import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest;
|
||||
import org.elasticsearch.client.ml.DeleteCalendarEventRequest;
|
||||
import org.elasticsearch.client.ml.DeleteCalendarJobRequest;
|
||||
import org.elasticsearch.client.ml.DeleteCalendarRequest;
|
||||
|
@ -788,14 +789,25 @@ public class MLRequestConvertersTests extends ESTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testEstimateMemoryUsage() throws IOException {
|
||||
PutDataFrameAnalyticsRequest estimateRequest = new PutDataFrameAnalyticsRequest(randomDataFrameAnalyticsConfig());
|
||||
Request request = MLRequestConverters.estimateMemoryUsage(estimateRequest);
|
||||
assertEquals(HttpPost.METHOD_NAME, request.getMethod());
|
||||
assertEquals("/_ml/data_frame/analytics/_estimate_memory_usage", request.getEndpoint());
|
||||
try (XContentParser parser = createParser(JsonXContent.jsonXContent, request.getEntity().getContent())) {
|
||||
DataFrameAnalyticsConfig parsedConfig = DataFrameAnalyticsConfig.fromXContent(parser);
|
||||
assertThat(parsedConfig, equalTo(estimateRequest.getConfig()));
|
||||
public void testExplainDataFrameAnalytics() throws IOException {
|
||||
// Request with config
|
||||
{
|
||||
ExplainDataFrameAnalyticsRequest estimateRequest = new ExplainDataFrameAnalyticsRequest(randomDataFrameAnalyticsConfig());
|
||||
Request request = MLRequestConverters.explainDataFrameAnalytics(estimateRequest);
|
||||
assertEquals(HttpPost.METHOD_NAME, request.getMethod());
|
||||
assertEquals("/_ml/data_frame/analytics/_explain", request.getEndpoint());
|
||||
try (XContentParser parser = createParser(JsonXContent.jsonXContent, request.getEntity().getContent())) {
|
||||
DataFrameAnalyticsConfig parsedConfig = DataFrameAnalyticsConfig.fromXContent(parser);
|
||||
assertThat(parsedConfig, equalTo(estimateRequest.getConfig()));
|
||||
}
|
||||
}
|
||||
// Request with id
|
||||
{
|
||||
ExplainDataFrameAnalyticsRequest estimateRequest = new ExplainDataFrameAnalyticsRequest("foo");
|
||||
Request request = MLRequestConverters.explainDataFrameAnalytics(estimateRequest);
|
||||
assertEquals(HttpPost.METHOD_NAME, request.getMethod());
|
||||
assertEquals("/_ml/data_frame/analytics/foo/_explain", request.getEndpoint());
|
||||
assertNull(request.getEntity());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -32,6 +32,8 @@ import org.elasticsearch.client.indices.CreateIndexRequest;
|
|||
import org.elasticsearch.client.indices.GetIndexRequest;
|
||||
import org.elasticsearch.client.ml.CloseJobRequest;
|
||||
import org.elasticsearch.client.ml.CloseJobResponse;
|
||||
import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest;
|
||||
import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsResponse;
|
||||
import org.elasticsearch.client.ml.DeleteCalendarEventRequest;
|
||||
import org.elasticsearch.client.ml.DeleteCalendarJobRequest;
|
||||
import org.elasticsearch.client.ml.DeleteCalendarRequest;
|
||||
|
@ -44,7 +46,6 @@ import org.elasticsearch.client.ml.DeleteForecastRequest;
|
|||
import org.elasticsearch.client.ml.DeleteJobRequest;
|
||||
import org.elasticsearch.client.ml.DeleteJobResponse;
|
||||
import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
|
||||
import org.elasticsearch.client.ml.EstimateMemoryUsageResponse;
|
||||
import org.elasticsearch.client.ml.EvaluateDataFrameRequest;
|
||||
import org.elasticsearch.client.ml.EvaluateDataFrameResponse;
|
||||
import org.elasticsearch.client.ml.FindFileStructureRequest;
|
||||
|
@ -140,6 +141,8 @@ import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.Binar
|
|||
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric;
|
||||
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetric;
|
||||
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetric;
|
||||
import org.elasticsearch.client.ml.dataframe.explain.FieldSelection;
|
||||
import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation;
|
||||
import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
|
||||
import org.elasticsearch.client.ml.inference.TrainedModelConfig;
|
||||
import org.elasticsearch.client.ml.inference.TrainedModelDefinition;
|
||||
|
@ -1996,8 +1999,8 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
|
|||
highLevelClient().indices().create(new CreateIndexRequest(indexName).mapping(mapping), RequestOptions.DEFAULT);
|
||||
}
|
||||
|
||||
public void testEstimateMemoryUsage() throws IOException {
|
||||
String indexName = "estimate-test-index";
|
||||
public void testExplainDataFrameAnalytics() throws IOException {
|
||||
String indexName = "explain-df-test-index";
|
||||
createIndex(indexName, mappingForSoftClassification());
|
||||
BulkRequest bulk1 = new BulkRequest()
|
||||
.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
|
||||
|
@ -2007,8 +2010,8 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
|
|||
highLevelClient().bulk(bulk1, RequestOptions.DEFAULT);
|
||||
|
||||
MachineLearningClient machineLearningClient = highLevelClient().machineLearning();
|
||||
PutDataFrameAnalyticsRequest estimateMemoryUsageRequest =
|
||||
new PutDataFrameAnalyticsRequest(
|
||||
ExplainDataFrameAnalyticsRequest explainRequest =
|
||||
new ExplainDataFrameAnalyticsRequest(
|
||||
DataFrameAnalyticsConfig.builder()
|
||||
.setSource(DataFrameAnalyticsSource.builder().setIndex(indexName).build())
|
||||
.setAnalysis(OutlierDetection.createDefault())
|
||||
|
@ -2019,11 +2022,16 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
|
|||
ByteSizeValue upperBound = new ByteSizeValue(1, ByteSizeUnit.GB);
|
||||
|
||||
// Data Frame has 10 rows, expect that the returned estimates fall within (1kB, 1GB) range.
|
||||
EstimateMemoryUsageResponse response1 =
|
||||
execute(
|
||||
estimateMemoryUsageRequest, machineLearningClient::estimateMemoryUsage, machineLearningClient::estimateMemoryUsageAsync);
|
||||
assertThat(response1.getExpectedMemoryWithoutDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound)));
|
||||
assertThat(response1.getExpectedMemoryWithDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound)));
|
||||
ExplainDataFrameAnalyticsResponse response1 = execute(explainRequest, machineLearningClient::explainDataFrameAnalytics,
|
||||
machineLearningClient::explainDataFrameAnalyticsAsync);
|
||||
|
||||
MemoryEstimation memoryEstimation1 = response1.getMemoryEstimation();
|
||||
assertThat(memoryEstimation1.getExpectedMemoryWithoutDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound)));
|
||||
assertThat(memoryEstimation1.getExpectedMemoryWithDisk(), allOf(greaterThanOrEqualTo(lowerBound), lessThan(upperBound)));
|
||||
|
||||
List<FieldSelection> fieldSelection = response1.getFieldSelection();
|
||||
assertThat(fieldSelection.size(), equalTo(3));
|
||||
assertThat(fieldSelection.stream().map(FieldSelection::getName).collect(Collectors.toList()), contains("dataset", "label", "p"));
|
||||
|
||||
BulkRequest bulk2 = new BulkRequest()
|
||||
.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
|
||||
|
@ -2033,15 +2041,16 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
|
|||
highLevelClient().bulk(bulk2, RequestOptions.DEFAULT);
|
||||
|
||||
// Data Frame now has 100 rows, expect that the returned estimates will be greater than or equal to the previous ones.
|
||||
EstimateMemoryUsageResponse response2 =
|
||||
ExplainDataFrameAnalyticsResponse response2 =
|
||||
execute(
|
||||
estimateMemoryUsageRequest, machineLearningClient::estimateMemoryUsage, machineLearningClient::estimateMemoryUsageAsync);
|
||||
explainRequest, machineLearningClient::explainDataFrameAnalytics, machineLearningClient::explainDataFrameAnalyticsAsync);
|
||||
MemoryEstimation memoryEstimation2 = response2.getMemoryEstimation();
|
||||
assertThat(
|
||||
response2.getExpectedMemoryWithoutDisk(),
|
||||
allOf(greaterThanOrEqualTo(response1.getExpectedMemoryWithoutDisk()), lessThan(upperBound)));
|
||||
memoryEstimation2.getExpectedMemoryWithoutDisk(),
|
||||
allOf(greaterThanOrEqualTo(memoryEstimation1.getExpectedMemoryWithoutDisk()), lessThan(upperBound)));
|
||||
assertThat(
|
||||
response2.getExpectedMemoryWithDisk(),
|
||||
allOf(greaterThanOrEqualTo(response1.getExpectedMemoryWithDisk()), lessThan(upperBound)));
|
||||
memoryEstimation2.getExpectedMemoryWithDisk(),
|
||||
allOf(greaterThanOrEqualTo(memoryEstimation1.getExpectedMemoryWithDisk()), lessThan(upperBound)));
|
||||
}
|
||||
|
||||
public void testGetTrainedModels() throws Exception {
|
||||
|
|
|
@ -36,6 +36,8 @@ import org.elasticsearch.client.core.PageParams;
|
|||
import org.elasticsearch.client.indices.CreateIndexRequest;
|
||||
import org.elasticsearch.client.ml.CloseJobRequest;
|
||||
import org.elasticsearch.client.ml.CloseJobResponse;
|
||||
import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsRequest;
|
||||
import org.elasticsearch.client.ml.ExplainDataFrameAnalyticsResponse;
|
||||
import org.elasticsearch.client.ml.DeleteCalendarEventRequest;
|
||||
import org.elasticsearch.client.ml.DeleteCalendarJobRequest;
|
||||
import org.elasticsearch.client.ml.DeleteCalendarRequest;
|
||||
|
@ -48,7 +50,6 @@ import org.elasticsearch.client.ml.DeleteForecastRequest;
|
|||
import org.elasticsearch.client.ml.DeleteJobRequest;
|
||||
import org.elasticsearch.client.ml.DeleteJobResponse;
|
||||
import org.elasticsearch.client.ml.DeleteModelSnapshotRequest;
|
||||
import org.elasticsearch.client.ml.EstimateMemoryUsageResponse;
|
||||
import org.elasticsearch.client.ml.EvaluateDataFrameRequest;
|
||||
import org.elasticsearch.client.ml.EvaluateDataFrameResponse;
|
||||
import org.elasticsearch.client.ml.FindFileStructureRequest;
|
||||
|
@ -155,6 +156,8 @@ import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.Confu
|
|||
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric.ConfusionMatrix;
|
||||
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetric;
|
||||
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetric;
|
||||
import org.elasticsearch.client.ml.dataframe.explain.FieldSelection;
|
||||
import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation;
|
||||
import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
|
||||
import org.elasticsearch.client.ml.inference.TrainedModelConfig;
|
||||
import org.elasticsearch.client.ml.inference.TrainedModelDefinition;
|
||||
|
@ -213,6 +216,7 @@ import java.util.zip.GZIPOutputStream;
|
|||
|
||||
import static org.hamcrest.Matchers.allOf;
|
||||
import static org.hamcrest.Matchers.closeTo;
|
||||
import static org.hamcrest.Matchers.contains;
|
||||
import static org.hamcrest.Matchers.containsInAnyOrder;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.greaterThan;
|
||||
|
@ -3460,10 +3464,10 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testEstimateMemoryUsage() throws Exception {
|
||||
createIndex("estimate-test-source-index");
|
||||
public void testExplainDataFrameAnalytics() throws Exception {
|
||||
createIndex("explain-df-test-source-index");
|
||||
BulkRequest bulkRequest =
|
||||
new BulkRequest("estimate-test-source-index")
|
||||
new BulkRequest("explain-df-test-source-index")
|
||||
.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
bulkRequest.add(new IndexRequest().source(XContentType.JSON, "timestamp", 123456789L, "total", 10L));
|
||||
|
@ -3471,22 +3475,33 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
|
|||
RestHighLevelClient client = highLevelClient();
|
||||
client.bulk(bulkRequest, RequestOptions.DEFAULT);
|
||||
{
|
||||
// tag::estimate-memory-usage-request
|
||||
// tag::explain-data-frame-analytics-id-request
|
||||
ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest("existing_job_id"); // <1>
|
||||
// end::explain-data-frame-analytics-id-request
|
||||
|
||||
// tag::explain-data-frame-analytics-config-request
|
||||
DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder()
|
||||
.setSource(DataFrameAnalyticsSource.builder().setIndex("estimate-test-source-index").build())
|
||||
.setSource(DataFrameAnalyticsSource.builder().setIndex("explain-df-test-source-index").build())
|
||||
.setAnalysis(OutlierDetection.createDefault())
|
||||
.build();
|
||||
PutDataFrameAnalyticsRequest request = new PutDataFrameAnalyticsRequest(config); // <1>
|
||||
// end::estimate-memory-usage-request
|
||||
request = new ExplainDataFrameAnalyticsRequest(config); // <1>
|
||||
// end::explain-data-frame-analytics-config-request
|
||||
|
||||
// tag::estimate-memory-usage-execute
|
||||
EstimateMemoryUsageResponse response = client.machineLearning().estimateMemoryUsage(request, RequestOptions.DEFAULT);
|
||||
// end::estimate-memory-usage-execute
|
||||
// tag::explain-data-frame-analytics-execute
|
||||
ExplainDataFrameAnalyticsResponse response = client.machineLearning().explainDataFrameAnalytics(request,
|
||||
RequestOptions.DEFAULT);
|
||||
// end::explain-data-frame-analytics-execute
|
||||
|
||||
// tag::estimate-memory-usage-response
|
||||
ByteSizeValue expectedMemoryWithoutDisk = response.getExpectedMemoryWithoutDisk(); // <1>
|
||||
ByteSizeValue expectedMemoryWithDisk = response.getExpectedMemoryWithDisk(); // <2>
|
||||
// end::estimate-memory-usage-response
|
||||
// tag::explain-data-frame-analytics-response
|
||||
List<FieldSelection> fieldSelection = response.getFieldSelection(); // <1>
|
||||
MemoryEstimation memoryEstimation = response.getMemoryEstimation(); // <2>
|
||||
// end::explain-data-frame-analytics-response
|
||||
|
||||
assertThat(fieldSelection.size(), equalTo(2));
|
||||
assertThat(fieldSelection.stream().map(FieldSelection::getName).collect(Collectors.toList()), contains("timestamp", "total"));
|
||||
|
||||
ByteSizeValue expectedMemoryWithoutDisk = memoryEstimation.getExpectedMemoryWithoutDisk(); // <1>
|
||||
ByteSizeValue expectedMemoryWithDisk = memoryEstimation.getExpectedMemoryWithDisk(); // <2>
|
||||
|
||||
// We are pretty liberal here as this test does not aim at verifying concrete numbers but rather end-to-end user workflow.
|
||||
ByteSizeValue lowerBound = new ByteSizeValue(1, ByteSizeUnit.KB);
|
||||
|
@ -3496,14 +3511,14 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
|
|||
}
|
||||
{
|
||||
DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder()
|
||||
.setSource(DataFrameAnalyticsSource.builder().setIndex("estimate-test-source-index").build())
|
||||
.setSource(DataFrameAnalyticsSource.builder().setIndex("explain-df-test-source-index").build())
|
||||
.setAnalysis(OutlierDetection.createDefault())
|
||||
.build();
|
||||
PutDataFrameAnalyticsRequest request = new PutDataFrameAnalyticsRequest(config);
|
||||
// tag::estimate-memory-usage-execute-listener
|
||||
ActionListener<EstimateMemoryUsageResponse> listener = new ActionListener<EstimateMemoryUsageResponse>() {
|
||||
ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest(config);
|
||||
// tag::explain-data-frame-analytics-execute-listener
|
||||
ActionListener<ExplainDataFrameAnalyticsResponse> listener = new ActionListener<ExplainDataFrameAnalyticsResponse>() {
|
||||
@Override
|
||||
public void onResponse(EstimateMemoryUsageResponse response) {
|
||||
public void onResponse(ExplainDataFrameAnalyticsResponse response) {
|
||||
// <1>
|
||||
}
|
||||
|
||||
|
@ -3512,15 +3527,15 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
|
|||
// <2>
|
||||
}
|
||||
};
|
||||
// end::estimate-memory-usage-execute-listener
|
||||
// end::explain-data-frame-analytics-execute-listener
|
||||
|
||||
// Replace the empty listener by a blocking listener in test
|
||||
final CountDownLatch latch = new CountDownLatch(1);
|
||||
listener = new LatchedActionListener<>(listener, latch);
|
||||
|
||||
// tag::estimate-memory-usage-execute-async
|
||||
client.machineLearning().estimateMemoryUsageAsync(request, RequestOptions.DEFAULT, listener); // <1>
|
||||
// end::estimate-memory-usage-execute-async
|
||||
// tag::explain-data-frame-analytics-execute-async
|
||||
client.machineLearning().explainDataFrameAnalyticsAsync(request, RequestOptions.DEFAULT, listener); // <1>
|
||||
// end::explain-data-frame-analytics-execute-async
|
||||
|
||||
assertTrue(latch.await(30L, TimeUnit.SECONDS));
|
||||
}
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.client.ml;
|
||||
|
||||
import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsConfig;
|
||||
import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsConfigTests;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
import static org.hamcrest.Matchers.nullValue;
|
||||
|
||||
public class ExplainDataFrameAnalyticsRequestTests extends ESTestCase {
|
||||
|
||||
public void testIdConstructor() {
|
||||
ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest("foo");
|
||||
assertThat(request.getId(), equalTo("foo"));
|
||||
assertThat(request.getConfig(), is(nullValue()));
|
||||
}
|
||||
|
||||
public void testConfigConstructor() {
|
||||
DataFrameAnalyticsConfig config = DataFrameAnalyticsConfigTests.randomDataFrameAnalyticsConfig();
|
||||
|
||||
ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest(config);
|
||||
assertThat(request.getId(), is(nullValue()));
|
||||
assertThat(request.getConfig(), equalTo(config));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.client.ml;
|
||||
|
||||
import org.elasticsearch.client.ml.dataframe.explain.FieldSelection;
|
||||
import org.elasticsearch.client.ml.dataframe.explain.FieldSelectionTests;
|
||||
import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation;
|
||||
import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimationTests;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.test.AbstractXContentTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
public class ExplainDataFrameAnalyticsResponseTests extends AbstractXContentTestCase<ExplainDataFrameAnalyticsResponse> {
|
||||
|
||||
@Override
|
||||
protected ExplainDataFrameAnalyticsResponse createTestInstance() {
|
||||
int fieldSelectionCount = randomIntBetween(1, 5);
|
||||
List<FieldSelection> fieldSelection = new ArrayList<>(fieldSelectionCount);
|
||||
IntStream.of(fieldSelectionCount).forEach(i -> fieldSelection.add(FieldSelectionTests.createRandom()));
|
||||
MemoryEstimation memoryEstimation = MemoryEstimationTests.createRandom();
|
||||
|
||||
return new ExplainDataFrameAnalyticsResponse(fieldSelection, memoryEstimation);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ExplainDataFrameAnalyticsResponse doParseInstance(XContentParser parser) throws IOException {
|
||||
return ExplainDataFrameAnalyticsResponse.fromXContent(parser);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean supportsUnknownFields() {
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.client.ml.dataframe.explain;
|
||||
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.test.AbstractXContentTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class FieldSelectionTests extends AbstractXContentTestCase<FieldSelection> {
|
||||
|
||||
public static FieldSelection createRandom() {
|
||||
Set<String> mappingTypes = randomSubsetOf(randomIntBetween(1, 3), "int", "float", "double", "text", "keyword", "ip")
|
||||
.stream().collect(Collectors.toSet());
|
||||
FieldSelection.FeatureType featureType = randomBoolean() ? null : randomFrom(FieldSelection.FeatureType.values());
|
||||
String reason = randomBoolean() ? null : randomAlphaOfLength(20);
|
||||
return new FieldSelection(randomAlphaOfLength(10),
|
||||
mappingTypes,
|
||||
randomBoolean(),
|
||||
randomBoolean(),
|
||||
featureType,
|
||||
reason);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected FieldSelection createTestInstance() {
|
||||
return createRandom();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected FieldSelection doParseInstance(XContentParser parser) throws IOException {
|
||||
return FieldSelection.PARSER.apply(parser, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean supportsUnknownFields() {
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -7,7 +7,7 @@
|
|||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
|
@ -16,7 +16,7 @@
|
|||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.client.ml;
|
||||
package org.elasticsearch.client.ml.dataframe.explain;
|
||||
|
||||
import org.elasticsearch.common.unit.ByteSizeValue;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
|
@ -24,22 +24,22 @@ import org.elasticsearch.test.AbstractXContentTestCase;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
public class EstimateMemoryUsageResponseTests extends AbstractXContentTestCase<EstimateMemoryUsageResponse> {
|
||||
public class MemoryEstimationTests extends AbstractXContentTestCase<MemoryEstimation> {
|
||||
|
||||
public static EstimateMemoryUsageResponse randomResponse() {
|
||||
return new EstimateMemoryUsageResponse(
|
||||
public static MemoryEstimation createRandom() {
|
||||
return new MemoryEstimation(
|
||||
randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null,
|
||||
randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected EstimateMemoryUsageResponse createTestInstance() {
|
||||
return randomResponse();
|
||||
protected MemoryEstimation createTestInstance() {
|
||||
return createRandom();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected EstimateMemoryUsageResponse doParseInstance(XContentParser parser) throws IOException {
|
||||
return EstimateMemoryUsageResponse.fromXContent(parser);
|
||||
protected MemoryEstimation doParseInstance(XContentParser parser) throws IOException {
|
||||
return MemoryEstimation.PARSER.apply(parser, null);
|
||||
}
|
||||
|
||||
@Override
|
|
@ -1,36 +0,0 @@
|
|||
--
|
||||
:api: estimate-memory-usage
|
||||
:request: PutDataFrameAnalyticsRequest
|
||||
:response: EstimateMemoryUsageResponse
|
||||
--
|
||||
[role="xpack"]
|
||||
[id="{upid}-{api}"]
|
||||
=== Estimate memory usage API
|
||||
|
||||
Estimates memory usage of {dfanalytics}.
|
||||
Estimation results can be used when deciding the appropriate value for `model_memory_limit` setting later on.
|
||||
|
||||
The API accepts an +{request}+ object and returns an +{response}+.
|
||||
|
||||
[id="{upid}-{api}-request"]
|
||||
==== Estimate memory usage request
|
||||
|
||||
["source","java",subs="attributes,callouts,macros"]
|
||||
--------------------------------------------------
|
||||
include-tagged::{doc-tests-file}[{api}-request]
|
||||
--------------------------------------------------
|
||||
<1> Constructing a new request containing a {dataframe-analytics-config} for which memory usage estimation should be performed
|
||||
|
||||
include::../execution.asciidoc[]
|
||||
|
||||
[id="{upid}-{api}-response"]
|
||||
==== Response
|
||||
|
||||
The returned +{response}+ contains the memory usage estimates.
|
||||
|
||||
["source","java",subs="attributes,callouts,macros"]
|
||||
--------------------------------------------------
|
||||
include-tagged::{doc-tests-file}[{api}-response]
|
||||
--------------------------------------------------
|
||||
<1> Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory (i.e. without overflowing to disk).
|
||||
<2> Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}.
|
|
@ -0,0 +1,48 @@
|
|||
--
|
||||
:api: explain-data-frame-analytics
|
||||
:request: ExplainDataFrameAnalyticsRequest
|
||||
:response: ExplainDataFrameAnalyticsResponse
|
||||
--
|
||||
[role="xpack"]
|
||||
[id="{upid}-{api}"]
|
||||
=== Explain {dfanalytics}} API
|
||||
|
||||
Explains the following about a {dataframe-analytics-config}:
|
||||
|
||||
* field selection: which fields are included or not in the analysis
|
||||
* memory estimation: how much memory is estimated to be required. The estimate can be used when deciding the appropriate value for `model_memory_limit` setting later on.
|
||||
|
||||
The API accepts an +{request}+ object and returns an +{response}+.
|
||||
|
||||
[id="{upid}-{api}-request"]
|
||||
==== Explain {dfanalytics} request
|
||||
|
||||
The request can be constructed with the id of an existing {dfanalytics-job}.
|
||||
|
||||
["source","java",subs="attributes,callouts,macros"]
|
||||
--------------------------------------------------
|
||||
include-tagged::{doc-tests-file}[{api}-id-request]
|
||||
--------------------------------------------------
|
||||
<1> Constructing a new request with the id of an existing {dfanalytics-job}
|
||||
|
||||
It can also be constructed with a {dataframe-analytics-config} to explain it before creating it.
|
||||
|
||||
["source","java",subs="attributes,callouts,macros"]
|
||||
--------------------------------------------------
|
||||
include-tagged::{doc-tests-file}[{api}-config-request]
|
||||
--------------------------------------------------
|
||||
<1> Constructing a new request containing a {dataframe-analytics-config}
|
||||
|
||||
include::../execution.asciidoc[]
|
||||
|
||||
[id="{upid}-{api}-response"]
|
||||
==== Response
|
||||
|
||||
The returned +{response}+ contains the field selection and the memory usage estimation.
|
||||
|
||||
["source","java",subs="attributes,callouts,macros"]
|
||||
--------------------------------------------------
|
||||
include-tagged::{doc-tests-file}[{api}-response]
|
||||
--------------------------------------------------
|
||||
<1> A list where each item explains whether a field was selected for analysis or not
|
||||
<2> The memory estimation for the {dfanalytics-job}
|
|
@ -300,7 +300,7 @@ The Java High Level REST Client supports the following Machine Learning APIs:
|
|||
* <<{upid}-start-data-frame-analytics>>
|
||||
* <<{upid}-stop-data-frame-analytics>>
|
||||
* <<{upid}-evaluate-data-frame>>
|
||||
* <<{upid}-estimate-memory-usage>>
|
||||
* <<{upid}-explain-data-frame-analytics>>
|
||||
* <<{upid}-get-trained-models>>
|
||||
* <<{upid}-put-filter>>
|
||||
* <<{upid}-get-filters>>
|
||||
|
@ -353,7 +353,7 @@ include::ml/delete-data-frame-analytics.asciidoc[]
|
|||
include::ml/start-data-frame-analytics.asciidoc[]
|
||||
include::ml/stop-data-frame-analytics.asciidoc[]
|
||||
include::ml/evaluate-data-frame.asciidoc[]
|
||||
include::ml/estimate-memory-usage.asciidoc[]
|
||||
include::ml/explain-data-frame-analytics.asciidoc[]
|
||||
include::ml/get-trained-models.asciidoc[]
|
||||
include::ml/put-filter.asciidoc[]
|
||||
include::ml/get-filters.asciidoc[]
|
||||
|
|
|
@ -1,80 +0,0 @@
|
|||
[role="xpack"]
|
||||
[testenv="platinum"]
|
||||
[[estimate-memory-usage-dfanalytics]]
|
||||
=== Estimate memory usage API
|
||||
|
||||
[subs="attributes"]
|
||||
++++
|
||||
<titleabbrev>Estimate memory usage for {dfanalytics-jobs}</titleabbrev>
|
||||
++++
|
||||
|
||||
Estimates memory usage for the given {dataframe-analytics-config}.
|
||||
|
||||
experimental[]
|
||||
|
||||
[[ml-estimate-memory-usage-dfanalytics-request]]
|
||||
==== {api-request-title}
|
||||
|
||||
`POST _ml/data_frame/analytics/_estimate_memory_usage`
|
||||
|
||||
[[ml-estimate-memory-usage-dfanalytics-prereq]]
|
||||
==== {api-prereq-title}
|
||||
|
||||
* You must have `monitor_ml` privilege to use this API. For more
|
||||
information, see <<security-privileges>> and <<built-in-roles>>.
|
||||
|
||||
[[ml-estimate-memory-usage-dfanalytics-desc]]
|
||||
==== {api-description-title}
|
||||
|
||||
This API estimates memory usage for the given {dataframe-analytics-config} before the {dfanalytics-job} is even created.
|
||||
|
||||
Serves as an advice on how to set `model_memory_limit` when creating {dfanalytics-job}.
|
||||
|
||||
[[ml-estimate-memory-usage-dfanalytics-request-body]]
|
||||
==== {api-request-body-title}
|
||||
|
||||
`data_frame_analytics_config`::
|
||||
(Required, object) Intended configuration of {dfanalytics-job}. For more information, see
|
||||
<<ml-dfanalytics-resources>>.
|
||||
Note that `id` and `dest` don't need to be provided in the context of this API.
|
||||
|
||||
[[ml-estimate-memory-usage-dfanalytics-results]]
|
||||
==== {api-response-body-title}
|
||||
|
||||
`expected_memory_without_disk`::
|
||||
(string) Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory
|
||||
(i.e. without overflowing to disk).
|
||||
|
||||
`expected_memory_with_disk`::
|
||||
(string) Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}.
|
||||
`expected_memory_with_disk` is usually smaller than `expected_memory_without_disk` as using disk allows to
|
||||
limit the main memory needed to perform {dfanalytics}.
|
||||
|
||||
[[ml-estimate-memory-usage-dfanalytics-example]]
|
||||
==== {api-examples-title}
|
||||
|
||||
[source,console]
|
||||
--------------------------------------------------
|
||||
POST _ml/data_frame/analytics/_estimate_memory_usage
|
||||
{
|
||||
"data_frame_analytics_config": {
|
||||
"source": {
|
||||
"index": "logdata"
|
||||
},
|
||||
"analysis": {
|
||||
"outlier_detection": {}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// TEST[skip:TBD]
|
||||
|
||||
The API returns the following results:
|
||||
|
||||
[source,console-result]
|
||||
----
|
||||
{
|
||||
"expected_memory_without_disk": "128MB",
|
||||
"expected_memory_with_disk": "32MB"
|
||||
}
|
||||
----
|
|
@ -0,0 +1,159 @@
|
|||
[role="xpack"]
|
||||
[testenv="platinum"]
|
||||
[[explain-dfanalytics]]
|
||||
=== Explain {dfanalytics} API
|
||||
|
||||
[subs="attributes"]
|
||||
++++
|
||||
<titleabbrev>Explain {dfanalytics} API</titleabbrev>
|
||||
++++
|
||||
|
||||
Explains a {dataframe-analytics-config}.
|
||||
|
||||
experimental[]
|
||||
|
||||
[[ml-explain-dfanalytics-request]]
|
||||
==== {api-request-title}
|
||||
|
||||
`GET _ml/data_frame/analytics/_explain` +
|
||||
|
||||
`POST _ml/data_frame/analytics/_explain` +
|
||||
|
||||
`GET _ml/data_frame/analytics/<data_frame_analytics_id>/_explain` +
|
||||
|
||||
`POST _ml/data_frame/analytics/<data_frame_analytics_id>/_explain`
|
||||
|
||||
[[ml-explain-dfanalytics-prereq]]
|
||||
==== {api-prereq-title}
|
||||
|
||||
* You must have `monitor_ml` privilege to use this API. For more
|
||||
information, see <<security-privileges>> and <<built-in-roles>>.
|
||||
|
||||
[[ml-explain-dfanalytics-desc]]
|
||||
==== {api-description-title}
|
||||
|
||||
This API provides explanations for a {dataframe-analytics-config} that either exists already or one that has not been created yet.
|
||||
The following explanations are provided:
|
||||
|
||||
* which fields are included or not in the analysis and why
|
||||
* how much memory is estimated to be required. The estimate can be used when deciding the appropriate value for `model_memory_limit` setting later on.
|
||||
about either an existing {dfanalytics-job} or one that has not been created yet.
|
||||
|
||||
[[ml-explain-dfanalytics-path-params]]
|
||||
==== {api-path-parms-title}
|
||||
|
||||
`<data_frame_analytics_id>`::
|
||||
(Optional, string) A numerical character string that uniquely identifies the existing
|
||||
{dfanalytics-job} to explain. This identifier can contain lowercase alphanumeric
|
||||
characters (a-z and 0-9), hyphens, and underscores. It must start and end with
|
||||
alphanumeric characters.
|
||||
|
||||
[[ml-explain-dfanalytics-request-body]]
|
||||
==== {api-request-body-title}
|
||||
|
||||
`data_frame_analytics_config`::
|
||||
(Optional, object) Intended configuration of {dfanalytics-job}. For more information, see
|
||||
<<ml-dfanalytics-resources>>.
|
||||
Note that `id` and `dest` don't need to be provided in the context of this API.
|
||||
|
||||
[[ml-explain-dfanalytics-results]]
|
||||
==== {api-response-body-title}
|
||||
|
||||
The API returns a response that contains the following:
|
||||
|
||||
`field_selection`::
|
||||
(array) An array of objects that explain selection for each field, sorted by the field names.
|
||||
Each object in the array has the following properties:
|
||||
|
||||
`name`:::
|
||||
(string) The field name.
|
||||
|
||||
`mapping_types`:::
|
||||
(string) The mapping types of the field.
|
||||
|
||||
`is_included`:::
|
||||
(boolean) Whether the field is selected to be included in the analysis.
|
||||
|
||||
`is_required`:::
|
||||
(boolean) Whether the field is required.
|
||||
|
||||
`feature_type`:::
|
||||
(string) The feature type of this field for the analysis. May be `categorical` or `numerical`.
|
||||
|
||||
`reason`:::
|
||||
(string) The reason a field is not selected to be included in the analysis.
|
||||
|
||||
`memory_estimation`::
|
||||
(object) An object containing the memory estimates. The object has the following properties:
|
||||
|
||||
`expected_memory_without_disk`:::
|
||||
(string) Estimated memory usage under the assumption that the whole {dfanalytics} should happen in memory
|
||||
(i.e. without overflowing to disk).
|
||||
|
||||
`expected_memory_with_disk`:::
|
||||
(string) Estimated memory usage under the assumption that overflowing to disk is allowed during {dfanalytics}.
|
||||
`expected_memory_with_disk` is usually smaller than `expected_memory_without_disk` as using disk allows to
|
||||
limit the main memory needed to perform {dfanalytics}.
|
||||
|
||||
[[ml-explain-dfanalytics-example]]
|
||||
==== {api-examples-title}
|
||||
|
||||
[source,console]
|
||||
--------------------------------------------------
|
||||
POST _ml/data_frame/analytics/_explain
|
||||
{
|
||||
"data_frame_analytics_config": {
|
||||
"source": {
|
||||
"index": "houses_sold_last_10_yrs"
|
||||
},
|
||||
"analysis": {
|
||||
"regression": {
|
||||
"dependent_variable": "price"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// TEST[skip:TBD]
|
||||
|
||||
The API returns the following results:
|
||||
|
||||
[source,console-result]
|
||||
----
|
||||
{
|
||||
"field_selection": [
|
||||
{
|
||||
"field": "number_of_bedrooms",
|
||||
"mappings_types": ["integer"],
|
||||
"is_included": true,
|
||||
"is_required": false,
|
||||
"feature_type": "numerical"
|
||||
},
|
||||
{
|
||||
"field": "postcode",
|
||||
"mappings_types": ["text"],
|
||||
"is_included": false,
|
||||
"is_required": false,
|
||||
"reason": "[postcode.keyword] is preferred because it is aggregatable"
|
||||
},
|
||||
{
|
||||
"field": "postcode.keyword",
|
||||
"mappings_types": ["keyword"],
|
||||
"is_included": true,
|
||||
"is_required": false,
|
||||
"feature_type": "categorical"
|
||||
},
|
||||
{
|
||||
"field": "price",
|
||||
"mappings_types": ["float"],
|
||||
"is_included": true,
|
||||
"is_required": true,
|
||||
"feature_type": "numerical"
|
||||
}
|
||||
],
|
||||
"memory_estimation": {
|
||||
"expected_memory_without_disk": "128MB",
|
||||
"expected_memory_with_disk": "32MB"
|
||||
}
|
||||
}
|
||||
----
|
|
@ -12,7 +12,7 @@ You can use the following APIs to perform {ml} {dfanalytics} activities.
|
|||
* <<start-dfanalytics,Start {dfanalytics-jobs}>>
|
||||
* <<stop-dfanalytics,Stop {dfanalytics-jobs}>>
|
||||
* <<evaluate-dfanalytics,Evaluate {dfanalytics}>>
|
||||
* <<estimate-memory-usage-dfanalytics,Estimate memory usage for {dfanalytics}>>
|
||||
* <<explain-dfanalytics,Explain {dfanalytics}>>
|
||||
|
||||
See also <<ml-apis>>.
|
||||
|
||||
|
@ -23,7 +23,7 @@ include::delete-dfanalytics.asciidoc[]
|
|||
//EVALUATE
|
||||
include::evaluate-dfanalytics.asciidoc[]
|
||||
//ESTIMATE_MEMORY_USAGE
|
||||
include::estimate-memory-usage-dfanalytics.asciidoc[]
|
||||
include::explain-dfanalytics.asciidoc[]
|
||||
//GET
|
||||
include::get-dfanalytics.asciidoc[]
|
||||
include::get-dfanalytics-stats.asciidoc[]
|
||||
|
|
|
@ -79,6 +79,7 @@ import org.elasticsearch.xpack.core.ml.MachineLearningFeatureSetUsage;
|
|||
import org.elasticsearch.xpack.core.ml.MlMetadata;
|
||||
import org.elasticsearch.xpack.core.ml.MlTasks;
|
||||
import org.elasticsearch.xpack.core.ml.action.CloseJobAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.DeleteCalendarAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.DeleteCalendarEventAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.DeleteDataFrameAnalyticsAction;
|
||||
|
@ -89,7 +90,6 @@ import org.elasticsearch.xpack.core.ml.action.DeleteForecastAction;
|
|||
import org.elasticsearch.xpack.core.ml.action.DeleteJobAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.DeleteModelSnapshotAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.DeleteTrainedModelAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.EvaluateDataFrameAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.FinalizeJobExecutionAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction;
|
||||
|
@ -158,6 +158,10 @@ import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.P
|
|||
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.Recall;
|
||||
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.ScoreByThresholdResult;
|
||||
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.SoftClassificationMetric;
|
||||
import org.elasticsearch.xpack.core.ml.inference.preprocessing.FrequencyEncoding;
|
||||
import org.elasticsearch.xpack.core.ml.inference.preprocessing.OneHotEncoding;
|
||||
import org.elasticsearch.xpack.core.ml.inference.preprocessing.PreProcessor;
|
||||
import org.elasticsearch.xpack.core.ml.inference.preprocessing.TargetMeanEncoding;
|
||||
import org.elasticsearch.xpack.core.ml.inference.results.ClassificationInferenceResults;
|
||||
import org.elasticsearch.xpack.core.ml.inference.results.InferenceResults;
|
||||
import org.elasticsearch.xpack.core.ml.inference.results.RegressionInferenceResults;
|
||||
|
@ -171,10 +175,6 @@ import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ensemble.OutputAgg
|
|||
import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ensemble.WeightedMode;
|
||||
import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ensemble.WeightedSum;
|
||||
import org.elasticsearch.xpack.core.ml.inference.trainedmodel.tree.Tree;
|
||||
import org.elasticsearch.xpack.core.ml.inference.preprocessing.FrequencyEncoding;
|
||||
import org.elasticsearch.xpack.core.ml.inference.preprocessing.OneHotEncoding;
|
||||
import org.elasticsearch.xpack.core.ml.inference.preprocessing.PreProcessor;
|
||||
import org.elasticsearch.xpack.core.ml.inference.preprocessing.TargetMeanEncoding;
|
||||
import org.elasticsearch.xpack.core.ml.job.config.JobTaskState;
|
||||
import org.elasticsearch.xpack.core.monitoring.MonitoringFeatureSetUsage;
|
||||
import org.elasticsearch.xpack.core.rollup.RollupFeatureSetUsage;
|
||||
|
@ -381,7 +381,7 @@ public class XPackClientPlugin extends Plugin implements ActionPlugin, NetworkPl
|
|||
StartDataFrameAnalyticsAction.INSTANCE,
|
||||
StopDataFrameAnalyticsAction.INSTANCE,
|
||||
EvaluateDataFrameAction.INSTANCE,
|
||||
EstimateMemoryUsageAction.INSTANCE,
|
||||
ExplainDataFrameAnalyticsAction.INSTANCE,
|
||||
InternalInferModelAction.INSTANCE,
|
||||
GetTrainedModelsAction.INSTANCE,
|
||||
DeleteTrainedModelAction.INSTANCE,
|
||||
|
|
|
@ -1,119 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.core.ml.action;
|
||||
|
||||
import org.elasticsearch.action.ActionResponse;
|
||||
import org.elasticsearch.action.ActionType;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.unit.ByteSizeValue;
|
||||
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
|
||||
import org.elasticsearch.common.xcontent.ObjectParser;
|
||||
import org.elasticsearch.common.xcontent.ToXContentObject;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
|
||||
import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg;
|
||||
|
||||
public class EstimateMemoryUsageAction extends ActionType<EstimateMemoryUsageAction.Response> {
|
||||
|
||||
public static final EstimateMemoryUsageAction INSTANCE = new EstimateMemoryUsageAction();
|
||||
public static final String NAME = "cluster:admin/xpack/ml/data_frame/analytics/estimate_memory_usage";
|
||||
|
||||
private EstimateMemoryUsageAction() {
|
||||
super(NAME, EstimateMemoryUsageAction.Response::new);
|
||||
}
|
||||
|
||||
public static class Response extends ActionResponse implements ToXContentObject {
|
||||
|
||||
public static final ParseField TYPE = new ParseField("memory_usage_estimation_result");
|
||||
|
||||
public static final ParseField EXPECTED_MEMORY_WITHOUT_DISK = new ParseField("expected_memory_without_disk");
|
||||
public static final ParseField EXPECTED_MEMORY_WITH_DISK = new ParseField("expected_memory_with_disk");
|
||||
|
||||
static final ConstructingObjectParser<Response, Void> PARSER =
|
||||
new ConstructingObjectParser<>(
|
||||
TYPE.getPreferredName(),
|
||||
args -> new Response((ByteSizeValue) args[0], (ByteSizeValue) args[1]));
|
||||
|
||||
static {
|
||||
PARSER.declareField(
|
||||
optionalConstructorArg(),
|
||||
(p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName()),
|
||||
EXPECTED_MEMORY_WITHOUT_DISK,
|
||||
ObjectParser.ValueType.VALUE);
|
||||
PARSER.declareField(
|
||||
optionalConstructorArg(),
|
||||
(p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITH_DISK.getPreferredName()),
|
||||
EXPECTED_MEMORY_WITH_DISK,
|
||||
ObjectParser.ValueType.VALUE);
|
||||
}
|
||||
|
||||
private final ByteSizeValue expectedMemoryWithoutDisk;
|
||||
private final ByteSizeValue expectedMemoryWithDisk;
|
||||
|
||||
public Response(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) {
|
||||
this.expectedMemoryWithoutDisk = expectedMemoryWithoutDisk;
|
||||
this.expectedMemoryWithDisk = expectedMemoryWithDisk;
|
||||
}
|
||||
|
||||
public Response(StreamInput in) throws IOException {
|
||||
super(in);
|
||||
this.expectedMemoryWithoutDisk = in.readOptionalWriteable(ByteSizeValue::new);
|
||||
this.expectedMemoryWithDisk = in.readOptionalWriteable(ByteSizeValue::new);
|
||||
}
|
||||
|
||||
public ByteSizeValue getExpectedMemoryWithoutDisk() {
|
||||
return expectedMemoryWithoutDisk;
|
||||
}
|
||||
|
||||
public ByteSizeValue getExpectedMemoryWithDisk() {
|
||||
return expectedMemoryWithDisk;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeOptionalWriteable(expectedMemoryWithoutDisk);
|
||||
out.writeOptionalWriteable(expectedMemoryWithDisk);
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject();
|
||||
if (expectedMemoryWithoutDisk != null) {
|
||||
builder.field(EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName(), expectedMemoryWithoutDisk.getStringRep());
|
||||
}
|
||||
if (expectedMemoryWithDisk != null) {
|
||||
builder.field(EXPECTED_MEMORY_WITH_DISK.getPreferredName(), expectedMemoryWithDisk.getStringRep());
|
||||
}
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (this == other) {
|
||||
return true;
|
||||
}
|
||||
if (other == null || getClass() != other.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Response that = (Response) other;
|
||||
return Objects.equals(expectedMemoryWithoutDisk, that.expectedMemoryWithoutDisk)
|
||||
&& Objects.equals(expectedMemoryWithDisk, that.expectedMemoryWithDisk);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(expectedMemoryWithoutDisk, expectedMemoryWithDisk);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.core.ml.action;
|
||||
|
||||
import org.elasticsearch.action.ActionResponse;
|
||||
import org.elasticsearch.action.ActionType;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
|
||||
import org.elasticsearch.common.xcontent.ToXContentObject;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection;
|
||||
import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimation;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
public class ExplainDataFrameAnalyticsAction extends ActionType<ExplainDataFrameAnalyticsAction.Response> {
|
||||
|
||||
public static final ExplainDataFrameAnalyticsAction INSTANCE = new ExplainDataFrameAnalyticsAction();
|
||||
public static final String NAME = "cluster:admin/xpack/ml/data_frame/analytics/explain";
|
||||
|
||||
private ExplainDataFrameAnalyticsAction() {
|
||||
super(NAME, ExplainDataFrameAnalyticsAction.Response::new);
|
||||
}
|
||||
|
||||
public static class Response extends ActionResponse implements ToXContentObject {
|
||||
|
||||
public static final ParseField TYPE = new ParseField("explain_data_frame_analytics_response");
|
||||
|
||||
public static final ParseField FIELD_SELECTION = new ParseField("field_selection");
|
||||
public static final ParseField MEMORY_ESTIMATION = new ParseField("memory_estimation");
|
||||
|
||||
static final ConstructingObjectParser<Response, Void> PARSER =
|
||||
new ConstructingObjectParser<>(
|
||||
TYPE.getPreferredName(),
|
||||
args -> new Response((List<FieldSelection>) args[0], (MemoryEstimation) args[1]));
|
||||
|
||||
static {
|
||||
PARSER.declareObjectArray(ConstructingObjectParser.constructorArg(), FieldSelection.PARSER, FIELD_SELECTION);
|
||||
PARSER.declareObject(ConstructingObjectParser.constructorArg(), MemoryEstimation.PARSER, MEMORY_ESTIMATION);
|
||||
}
|
||||
|
||||
private final List<FieldSelection> fieldSelection;
|
||||
private final MemoryEstimation memoryEstimation;
|
||||
|
||||
public Response(List<FieldSelection> fieldSelection, MemoryEstimation memoryEstimation) {
|
||||
this.fieldSelection = Objects.requireNonNull(fieldSelection);
|
||||
this.memoryEstimation = Objects.requireNonNull(memoryEstimation);
|
||||
}
|
||||
|
||||
public Response(StreamInput in) throws IOException {
|
||||
super(in);
|
||||
this.fieldSelection = in.readList(FieldSelection::new);
|
||||
this.memoryEstimation = new MemoryEstimation(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeList(fieldSelection);
|
||||
memoryEstimation.writeTo(out);
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject();
|
||||
builder.field(FIELD_SELECTION.getPreferredName(), fieldSelection);
|
||||
builder.field(MEMORY_ESTIMATION.getPreferredName(), memoryEstimation);
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (this == other) return true;
|
||||
if (other == null || getClass() != other.getClass()) return false;
|
||||
|
||||
Response that = (Response) other;
|
||||
return Objects.equals(fieldSelection, that.fieldSelection)
|
||||
&& Objects.equals(memoryEstimation, that.memoryEstimation);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(fieldSelection, memoryEstimation);
|
||||
}
|
||||
|
||||
public MemoryEstimation getMemoryEstimation() {
|
||||
return memoryEstimation;
|
||||
}
|
||||
|
||||
public List<FieldSelection> getFieldSelection() {
|
||||
return fieldSelection;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -51,13 +51,14 @@ public class PutDataFrameAnalyticsAction extends ActionType<PutDataFrameAnalytic
|
|||
}
|
||||
|
||||
/**
|
||||
* Parses request for memory estimation.
|
||||
* {@link Request} is reused across {@link PutDataFrameAnalyticsAction} and {@link EstimateMemoryUsageAction} but parsing differs
|
||||
* Parses request for use in the explain action.
|
||||
* {@link Request} is reused across {@link PutDataFrameAnalyticsAction} and
|
||||
* {@link ExplainDataFrameAnalyticsAction} but parsing differs
|
||||
* between these two usages.
|
||||
*/
|
||||
public static Request parseRequestForMemoryEstimation(XContentParser parser) {
|
||||
public static Request parseRequestForExplain(XContentParser parser) {
|
||||
DataFrameAnalyticsConfig.Builder configBuilder = DataFrameAnalyticsConfig.STRICT_PARSER.apply(parser, null);
|
||||
DataFrameAnalyticsConfig config = configBuilder.buildForMemoryEstimation();
|
||||
DataFrameAnalyticsConfig config = configBuilder.buildForExplain();
|
||||
return new PutDataFrameAnalyticsAction.Request(config);
|
||||
}
|
||||
|
||||
|
|
|
@ -416,11 +416,11 @@ public class DataFrameAnalyticsConfig implements ToXContentObject, Writeable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Builds {@link DataFrameAnalyticsConfig} object for the purpose of performing memory estimation.
|
||||
* Builds {@link DataFrameAnalyticsConfig} object for the purpose of explaining a job that has not been created yet.
|
||||
* Some fields (i.e. "id", "dest") may not be present, therefore we overwrite them here to make {@link DataFrameAnalyticsConfig}'s
|
||||
* constructor validations happy.
|
||||
*/
|
||||
public DataFrameAnalyticsConfig buildForMemoryEstimation() {
|
||||
public DataFrameAnalyticsConfig buildForExplain() {
|
||||
return new DataFrameAnalyticsConfig(
|
||||
id != null ? id : "dummy",
|
||||
description,
|
||||
|
|
|
@ -0,0 +1,184 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.core.ml.dataframe.explain;
|
||||
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
|
||||
import org.elasticsearch.common.xcontent.ObjectParser;
|
||||
import org.elasticsearch.common.xcontent.ToXContentObject;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
public class FieldSelection implements ToXContentObject, Writeable {
|
||||
|
||||
private static final ParseField NAME = new ParseField("name");
|
||||
private static final ParseField MAPPING_TYPES = new ParseField("mapping_types");
|
||||
private static final ParseField IS_INCLUDED = new ParseField("is_included");
|
||||
private static final ParseField IS_REQUIRED = new ParseField("is_required");
|
||||
private static final ParseField FEATURE_TYPE = new ParseField("feature_type");
|
||||
private static final ParseField REASON = new ParseField("reason");
|
||||
|
||||
public enum FeatureType {
|
||||
CATEGORICAL, NUMERICAL;
|
||||
|
||||
public static FeatureType fromString(String value) {
|
||||
return FeatureType.valueOf(value.toUpperCase(Locale.ROOT));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return name().toLowerCase(Locale.ROOT);
|
||||
}
|
||||
}
|
||||
|
||||
public static ConstructingObjectParser<FieldSelection, Void> PARSER = new ConstructingObjectParser<>("field_selection",
|
||||
a -> new FieldSelection((String) a[0], new HashSet<>((List<String>) a[1]), (boolean) a[2], (boolean) a[3], (FeatureType) a[4],
|
||||
(String) a[5]));
|
||||
|
||||
static {
|
||||
PARSER.declareString(ConstructingObjectParser.constructorArg(), NAME);
|
||||
PARSER.declareStringArray(ConstructingObjectParser.constructorArg(), MAPPING_TYPES);
|
||||
PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_INCLUDED);
|
||||
PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), IS_REQUIRED);
|
||||
PARSER.declareField(ConstructingObjectParser.optionalConstructorArg(), p -> {
|
||||
if (p.currentToken() == XContentParser.Token.VALUE_STRING) {
|
||||
return FeatureType.fromString(p.text());
|
||||
}
|
||||
throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]");
|
||||
}, FEATURE_TYPE, ObjectParser.ValueType.STRING);
|
||||
PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), REASON);
|
||||
}
|
||||
|
||||
private final String name;
|
||||
private final Set<String> mappingTypes;
|
||||
private final boolean isIncluded;
|
||||
private final boolean isRequired;
|
||||
private final FeatureType featureType;
|
||||
private final String reason;
|
||||
|
||||
public static FieldSelection included(String name, Set<String> mappingTypes, boolean isRequired, FeatureType featureType) {
|
||||
return new FieldSelection(name, mappingTypes, true, isRequired, featureType, null);
|
||||
}
|
||||
|
||||
public static FieldSelection excluded(String name, Set<String> mappingTypes, String reason) {
|
||||
return new FieldSelection(name, mappingTypes, false, false, null, reason);
|
||||
}
|
||||
|
||||
FieldSelection(String name, Set<String> mappingTypes, boolean isIncluded, boolean isRequired, @Nullable FeatureType featureType,
|
||||
@Nullable String reason) {
|
||||
this.name = Objects.requireNonNull(name);
|
||||
this.mappingTypes = Collections.unmodifiableSet(mappingTypes);
|
||||
this.isIncluded = isIncluded;
|
||||
this.isRequired = isRequired;
|
||||
this.featureType = featureType;
|
||||
this.reason = reason;
|
||||
}
|
||||
|
||||
public FieldSelection(StreamInput in) throws IOException {
|
||||
this.name = in.readString();
|
||||
this.mappingTypes = Collections.unmodifiableSet(in.readSet(StreamInput::readString));
|
||||
this.isIncluded = in.readBoolean();
|
||||
this.isRequired = in.readBoolean();
|
||||
boolean hasFeatureType = in.readBoolean();
|
||||
|
||||
if (hasFeatureType) {
|
||||
this.featureType = in.readEnum(FeatureType.class);
|
||||
} else {
|
||||
this.featureType = null;
|
||||
}
|
||||
|
||||
this.reason = in.readOptionalString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeString(name);
|
||||
out.writeCollection(mappingTypes, StreamOutput::writeString);
|
||||
out.writeBoolean(isIncluded);
|
||||
out.writeBoolean(isRequired);
|
||||
|
||||
if (featureType == null) {
|
||||
out.writeBoolean(false);
|
||||
} else {
|
||||
out.writeBoolean(true);
|
||||
out.writeEnum(featureType);
|
||||
}
|
||||
out.writeOptionalString(reason);
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject();
|
||||
builder.field(NAME.getPreferredName(), name);
|
||||
builder.field(MAPPING_TYPES.getPreferredName(), mappingTypes);
|
||||
builder.field(IS_INCLUDED.getPreferredName(), isIncluded);
|
||||
builder.field(IS_REQUIRED.getPreferredName(), isRequired);
|
||||
if (featureType != null) {
|
||||
builder.field(FEATURE_TYPE.getPreferredName(), featureType);
|
||||
}
|
||||
if (reason != null) {
|
||||
builder.field(REASON.getPreferredName(), reason);
|
||||
}
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
FieldSelection that = (FieldSelection) o;
|
||||
return Objects.equals(name, that.name)
|
||||
&& Objects.equals(mappingTypes, that.mappingTypes)
|
||||
&& isIncluded == that.isIncluded
|
||||
&& isRequired == that.isRequired
|
||||
&& Objects.equals(featureType, that.featureType)
|
||||
&& Objects.equals(reason, that.reason);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(name, mappingTypes, isIncluded, isRequired, featureType, reason);
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public Set<String> getMappingTypes() {
|
||||
return mappingTypes;
|
||||
}
|
||||
|
||||
public boolean isIncluded() {
|
||||
return isIncluded;
|
||||
}
|
||||
|
||||
public boolean isRequired() {
|
||||
return isRequired;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
public FeatureType getFeatureType() {
|
||||
return featureType;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
public String getReason() {
|
||||
return reason;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,103 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.core.ml.dataframe.explain;
|
||||
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
import org.elasticsearch.common.unit.ByteSizeValue;
|
||||
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
|
||||
import org.elasticsearch.common.xcontent.ObjectParser;
|
||||
import org.elasticsearch.common.xcontent.ToXContentObject;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
|
||||
import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg;
|
||||
|
||||
public class MemoryEstimation implements ToXContentObject, Writeable {
|
||||
|
||||
public static final ParseField EXPECTED_MEMORY_WITHOUT_DISK = new ParseField("expected_memory_without_disk");
|
||||
public static final ParseField EXPECTED_MEMORY_WITH_DISK = new ParseField("expected_memory_with_disk");
|
||||
|
||||
public static final ConstructingObjectParser<MemoryEstimation, Void> PARSER = new ConstructingObjectParser<>("memory_estimation",
|
||||
a -> new MemoryEstimation((ByteSizeValue) a[0], (ByteSizeValue) a[1]));
|
||||
|
||||
static {
|
||||
PARSER.declareField(
|
||||
optionalConstructorArg(),
|
||||
(p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName()),
|
||||
EXPECTED_MEMORY_WITHOUT_DISK,
|
||||
ObjectParser.ValueType.VALUE);
|
||||
PARSER.declareField(
|
||||
optionalConstructorArg(),
|
||||
(p, c) -> ByteSizeValue.parseBytesSizeValue(p.text(), EXPECTED_MEMORY_WITH_DISK.getPreferredName()),
|
||||
EXPECTED_MEMORY_WITH_DISK,
|
||||
ObjectParser.ValueType.VALUE);
|
||||
}
|
||||
|
||||
private final ByteSizeValue expectedMemoryWithoutDisk;
|
||||
private final ByteSizeValue expectedMemoryWithDisk;
|
||||
|
||||
public MemoryEstimation(@Nullable ByteSizeValue expectedMemoryWithoutDisk, @Nullable ByteSizeValue expectedMemoryWithDisk) {
|
||||
this.expectedMemoryWithoutDisk = expectedMemoryWithoutDisk;
|
||||
this.expectedMemoryWithDisk = expectedMemoryWithDisk;
|
||||
}
|
||||
|
||||
public MemoryEstimation(StreamInput in) throws IOException {
|
||||
this.expectedMemoryWithoutDisk = in.readOptionalWriteable(ByteSizeValue::new);
|
||||
this.expectedMemoryWithDisk = in.readOptionalWriteable(ByteSizeValue::new);
|
||||
}
|
||||
|
||||
public ByteSizeValue getExpectedMemoryWithoutDisk() {
|
||||
return expectedMemoryWithoutDisk;
|
||||
}
|
||||
|
||||
public ByteSizeValue getExpectedMemoryWithDisk() {
|
||||
return expectedMemoryWithDisk;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeOptionalWriteable(expectedMemoryWithoutDisk);
|
||||
out.writeOptionalWriteable(expectedMemoryWithDisk);
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject();
|
||||
if (expectedMemoryWithoutDisk != null) {
|
||||
builder.field(EXPECTED_MEMORY_WITHOUT_DISK.getPreferredName(), expectedMemoryWithoutDisk.getStringRep());
|
||||
}
|
||||
if (expectedMemoryWithDisk != null) {
|
||||
builder.field(EXPECTED_MEMORY_WITH_DISK.getPreferredName(), expectedMemoryWithDisk.getStringRep());
|
||||
}
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (this == other) {
|
||||
return true;
|
||||
}
|
||||
if (other == null || getClass() != other.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
MemoryEstimation that = (MemoryEstimation) other;
|
||||
return Objects.equals(expectedMemoryWithoutDisk, that.expectedMemoryWithoutDisk)
|
||||
&& Objects.equals(expectedMemoryWithDisk, that.expectedMemoryWithDisk);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(expectedMemoryWithoutDisk, expectedMemoryWithDisk);
|
||||
}
|
||||
}
|
|
@ -1,54 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.core.ml.action;
|
||||
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
import org.elasticsearch.common.unit.ByteSizeUnit;
|
||||
import org.elasticsearch.common.unit.ByteSizeValue;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.test.AbstractSerializingTestCase;
|
||||
import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction.Response;
|
||||
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.nullValue;
|
||||
|
||||
public class EstimateMemoryUsageActionResponseTests extends AbstractSerializingTestCase<Response> {
|
||||
|
||||
@Override
|
||||
protected Response createTestInstance() {
|
||||
return new Response(
|
||||
randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null,
|
||||
randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Writeable.Reader<Response> instanceReader() {
|
||||
return Response::new;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Response doParseInstance(XContentParser parser) {
|
||||
return Response.PARSER.apply(parser, null);
|
||||
}
|
||||
|
||||
public void testConstructor_NullValues() {
|
||||
Response response = new Response(null, null);
|
||||
assertThat(response.getExpectedMemoryWithoutDisk(), nullValue());
|
||||
assertThat(response.getExpectedMemoryWithDisk(), nullValue());
|
||||
}
|
||||
|
||||
public void testConstructor_SmallValues() {
|
||||
Response response = new Response(new ByteSizeValue(120, ByteSizeUnit.KB), new ByteSizeValue(30, ByteSizeUnit.KB));
|
||||
assertThat(response.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(120, ByteSizeUnit.KB)));
|
||||
assertThat(response.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(30, ByteSizeUnit.KB)));
|
||||
}
|
||||
|
||||
public void testConstructor() {
|
||||
Response response = new Response(new ByteSizeValue(20, ByteSizeUnit.MB), new ByteSizeValue(10, ByteSizeUnit.MB));
|
||||
assertThat(response.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(20, ByteSizeUnit.MB)));
|
||||
assertThat(response.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(10, ByteSizeUnit.MB)));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.core.ml.action;
|
||||
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.test.AbstractSerializingTestCase;
|
||||
import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction.Response;
|
||||
import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection;
|
||||
import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelectionTests;
|
||||
import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimation;
|
||||
import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimationTests;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
public class ExplainDataFrameAnalyticsActionResponseTests extends AbstractSerializingTestCase<Response> {
|
||||
|
||||
@Override
|
||||
protected Response createTestInstance() {
|
||||
int fieldSelectionCount = randomIntBetween(1, 5);
|
||||
List<FieldSelection> fieldSelection = new ArrayList<>(fieldSelectionCount);
|
||||
IntStream.of(fieldSelectionCount).forEach(i -> fieldSelection.add(FieldSelectionTests.createRandom()));
|
||||
MemoryEstimation memoryEstimation = MemoryEstimationTests.createRandom();
|
||||
|
||||
return new Response(fieldSelection, memoryEstimation);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Writeable.Reader<Response> instanceReader() {
|
||||
return Response::new;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Response doParseInstance(XContentParser parser) {
|
||||
return Response.PARSER.apply(parser, null);
|
||||
}
|
||||
}
|
|
@ -279,32 +279,32 @@ public class DataFrameAnalyticsConfigTests extends AbstractSerializingTestCase<D
|
|||
assertThat(e.getMessage(), containsString("must be less than the value of the xpack.ml.max_model_memory_limit setting"));
|
||||
}
|
||||
|
||||
public void testBuildForMemoryEstimation() {
|
||||
public void testBuildForExplain() {
|
||||
DataFrameAnalyticsConfig.Builder builder = createRandomBuilder("foo");
|
||||
|
||||
DataFrameAnalyticsConfig config = builder.buildForMemoryEstimation();
|
||||
DataFrameAnalyticsConfig config = builder.buildForExplain();
|
||||
|
||||
assertThat(config, equalTo(builder.build()));
|
||||
}
|
||||
|
||||
public void testBuildForMemoryEstimation_MissingId() {
|
||||
public void testBuildForExplain_MissingId() {
|
||||
DataFrameAnalyticsConfig.Builder builder = new DataFrameAnalyticsConfig.Builder()
|
||||
.setAnalysis(OutlierDetectionTests.createRandom())
|
||||
.setSource(DataFrameAnalyticsSourceTests.createRandom())
|
||||
.setDest(DataFrameAnalyticsDestTests.createRandom());
|
||||
|
||||
DataFrameAnalyticsConfig config = builder.buildForMemoryEstimation();
|
||||
DataFrameAnalyticsConfig config = builder.buildForExplain();
|
||||
|
||||
assertThat(config.getId(), equalTo("dummy"));
|
||||
}
|
||||
|
||||
public void testBuildForMemoryEstimation_MissingDest() {
|
||||
public void testBuildForExplain_MissingDest() {
|
||||
DataFrameAnalyticsConfig.Builder builder = new DataFrameAnalyticsConfig.Builder()
|
||||
.setId("foo")
|
||||
.setAnalysis(OutlierDetectionTests.createRandom())
|
||||
.setSource(DataFrameAnalyticsSourceTests.createRandom());
|
||||
|
||||
DataFrameAnalyticsConfig config = builder.buildForMemoryEstimation();
|
||||
DataFrameAnalyticsConfig config = builder.buildForExplain();
|
||||
|
||||
assertThat(config.getDest().getIndex(), equalTo("dummy"));
|
||||
}
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.core.ml.dataframe.explain;
|
||||
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.test.AbstractSerializingTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class FieldSelectionTests extends AbstractSerializingTestCase<FieldSelection> {
|
||||
|
||||
public static FieldSelection createRandom() {
|
||||
Set<String> mappingTypes = randomSubsetOf(randomIntBetween(1, 3), "int", "float", "double", "text", "keyword", "ip")
|
||||
.stream().collect(Collectors.toSet());
|
||||
FieldSelection.FeatureType featureType = randomBoolean() ? null : randomFrom(FieldSelection.FeatureType.values());
|
||||
String reason = randomBoolean() ? null : randomAlphaOfLength(20);
|
||||
return new FieldSelection(randomAlphaOfLength(10),
|
||||
mappingTypes,
|
||||
randomBoolean(),
|
||||
randomBoolean(),
|
||||
featureType,
|
||||
reason);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected FieldSelection createTestInstance() {
|
||||
return createRandom();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected FieldSelection doParseInstance(XContentParser parser) throws IOException {
|
||||
return FieldSelection.PARSER.apply(parser, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Writeable.Reader<FieldSelection> instanceReader() {
|
||||
return FieldSelection::new;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.core.ml.dataframe.explain;
|
||||
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
import org.elasticsearch.common.unit.ByteSizeUnit;
|
||||
import org.elasticsearch.common.unit.ByteSizeValue;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.test.AbstractSerializingTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.nullValue;
|
||||
|
||||
public class MemoryEstimationTests extends AbstractSerializingTestCase<MemoryEstimation> {
|
||||
|
||||
public static MemoryEstimation createRandom() {
|
||||
return new MemoryEstimation(
|
||||
randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null,
|
||||
randomBoolean() ? new ByteSizeValue(randomNonNegativeLong()) : null);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected MemoryEstimation createTestInstance() {
|
||||
return createRandom();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Writeable.Reader<MemoryEstimation> instanceReader() {
|
||||
return MemoryEstimation::new;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected MemoryEstimation doParseInstance(XContentParser parser) throws IOException {
|
||||
return MemoryEstimation.PARSER.apply(parser, null);
|
||||
}
|
||||
|
||||
public void testConstructor_NullValues() {
|
||||
MemoryEstimation memoryEstimation = new MemoryEstimation(null, null);
|
||||
assertThat(memoryEstimation.getExpectedMemoryWithoutDisk(), nullValue());
|
||||
assertThat(memoryEstimation.getExpectedMemoryWithDisk(), nullValue());
|
||||
}
|
||||
|
||||
public void testConstructor_SmallValues() {
|
||||
MemoryEstimation memoryEstimation = new MemoryEstimation(
|
||||
new ByteSizeValue(120, ByteSizeUnit.KB), new ByteSizeValue(30, ByteSizeUnit.KB));
|
||||
assertThat(memoryEstimation.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(120, ByteSizeUnit.KB)));
|
||||
assertThat(memoryEstimation.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(30, ByteSizeUnit.KB)));
|
||||
}
|
||||
|
||||
public void testConstructor() {
|
||||
MemoryEstimation memoryEstimation = new MemoryEstimation(
|
||||
new ByteSizeValue(20, ByteSizeUnit.MB), new ByteSizeValue(10, ByteSizeUnit.MB));
|
||||
assertThat(memoryEstimation.getExpectedMemoryWithoutDisk(), equalTo(new ByteSizeValue(20, ByteSizeUnit.MB)));
|
||||
assertThat(memoryEstimation.getExpectedMemoryWithDisk(), equalTo(new ByteSizeValue(10, ByteSizeUnit.MB)));
|
||||
}
|
||||
}
|
|
@ -92,7 +92,6 @@ integTest.runner {
|
|||
'ml/data_frame_analytics_crud/Test put classification given num_top_classes is greater than 1k',
|
||||
'ml/data_frame_analytics_crud/Test put classification given training_percent is less than one',
|
||||
'ml/data_frame_analytics_crud/Test put classification given training_percent is greater than hundred',
|
||||
'ml/data_frame_analytics_memory_usage_estimation/Test memory usage estimation for empty data frame',
|
||||
'ml/evaluate_data_frame/Test given missing index',
|
||||
'ml/evaluate_data_frame/Test given index does not exist',
|
||||
'ml/evaluate_data_frame/Test given missing evaluation',
|
||||
|
@ -113,6 +112,10 @@ integTest.runner {
|
|||
'ml/evaluate_data_frame/Test regression given evaluation with empty metrics',
|
||||
'ml/evaluate_data_frame/Test regression given missing actual_field',
|
||||
'ml/evaluate_data_frame/Test regression given missing predicted_field',
|
||||
'ml/explain_data_frame_analytics/Test neither job id nor body',
|
||||
'ml/explain_data_frame_analytics/Test both job id and body',
|
||||
'ml/explain_data_frame_analytics/Test missing job',
|
||||
'ml/explain_data_frame_analytics/Test empty data frame given body',
|
||||
'ml/delete_job_force/Test cannot force delete a non-existent job',
|
||||
'ml/delete_model_snapshot/Test delete snapshot missing snapshotId',
|
||||
'ml/delete_model_snapshot/Test delete snapshot missing job_id',
|
||||
|
|
|
@ -65,6 +65,7 @@ import org.elasticsearch.xpack.core.XPackSettings;
|
|||
import org.elasticsearch.xpack.core.ml.MachineLearningField;
|
||||
import org.elasticsearch.xpack.core.ml.MlMetaIndex;
|
||||
import org.elasticsearch.xpack.core.ml.action.CloseJobAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.DeleteCalendarAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.DeleteCalendarEventAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.DeleteDataFrameAnalyticsAction;
|
||||
|
@ -75,7 +76,6 @@ import org.elasticsearch.xpack.core.ml.action.DeleteForecastAction;
|
|||
import org.elasticsearch.xpack.core.ml.action.DeleteJobAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.DeleteModelSnapshotAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.DeleteTrainedModelAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.EvaluateDataFrameAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.FinalizeJobExecutionAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction;
|
||||
|
@ -98,8 +98,8 @@ import org.elasticsearch.xpack.core.ml.action.GetOverallBucketsAction;
|
|||
import org.elasticsearch.xpack.core.ml.action.GetRecordsAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.GetTrainedModelsStatsAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.IsolateDatafeedAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.InternalInferModelAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.IsolateDatafeedAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.KillProcessAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.MlInfoAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.OpenJobAction;
|
||||
|
@ -136,6 +136,7 @@ import org.elasticsearch.xpack.core.ml.job.persistence.ElasticsearchMappings;
|
|||
import org.elasticsearch.xpack.core.ml.notifications.AuditorField;
|
||||
import org.elasticsearch.xpack.core.template.TemplateUtils;
|
||||
import org.elasticsearch.xpack.ml.action.TransportCloseJobAction;
|
||||
import org.elasticsearch.xpack.ml.action.TransportExplainDataFrameAnalyticsAction;
|
||||
import org.elasticsearch.xpack.ml.action.TransportDeleteCalendarAction;
|
||||
import org.elasticsearch.xpack.ml.action.TransportDeleteCalendarEventAction;
|
||||
import org.elasticsearch.xpack.ml.action.TransportDeleteDataFrameAnalyticsAction;
|
||||
|
@ -146,7 +147,6 @@ import org.elasticsearch.xpack.ml.action.TransportDeleteForecastAction;
|
|||
import org.elasticsearch.xpack.ml.action.TransportDeleteJobAction;
|
||||
import org.elasticsearch.xpack.ml.action.TransportDeleteModelSnapshotAction;
|
||||
import org.elasticsearch.xpack.ml.action.TransportDeleteTrainedModelAction;
|
||||
import org.elasticsearch.xpack.ml.action.TransportEstimateMemoryUsageAction;
|
||||
import org.elasticsearch.xpack.ml.action.TransportEvaluateDataFrameAction;
|
||||
import org.elasticsearch.xpack.ml.action.TransportFinalizeJobExecutionAction;
|
||||
import org.elasticsearch.xpack.ml.action.TransportFindFileStructureAction;
|
||||
|
@ -167,9 +167,9 @@ import org.elasticsearch.xpack.ml.action.TransportGetJobsStatsAction;
|
|||
import org.elasticsearch.xpack.ml.action.TransportGetModelSnapshotsAction;
|
||||
import org.elasticsearch.xpack.ml.action.TransportGetOverallBucketsAction;
|
||||
import org.elasticsearch.xpack.ml.action.TransportGetRecordsAction;
|
||||
import org.elasticsearch.xpack.ml.action.TransportGetTrainedModelsAction;
|
||||
import org.elasticsearch.xpack.ml.action.TransportGetTrainedModelsStatsAction;
|
||||
import org.elasticsearch.xpack.ml.action.TransportInternalInferModelAction;
|
||||
import org.elasticsearch.xpack.ml.action.TransportGetTrainedModelsAction;
|
||||
import org.elasticsearch.xpack.ml.action.TransportIsolateDatafeedAction;
|
||||
import org.elasticsearch.xpack.ml.action.TransportKillProcessAction;
|
||||
import org.elasticsearch.xpack.ml.action.TransportMlInfoAction;
|
||||
|
@ -258,8 +258,8 @@ import org.elasticsearch.xpack.ml.rest.datafeeds.RestPutDatafeedAction;
|
|||
import org.elasticsearch.xpack.ml.rest.datafeeds.RestStartDatafeedAction;
|
||||
import org.elasticsearch.xpack.ml.rest.datafeeds.RestStopDatafeedAction;
|
||||
import org.elasticsearch.xpack.ml.rest.datafeeds.RestUpdateDatafeedAction;
|
||||
import org.elasticsearch.xpack.ml.rest.dataframe.RestExplainDataFrameAnalyticsAction;
|
||||
import org.elasticsearch.xpack.ml.rest.dataframe.RestDeleteDataFrameAnalyticsAction;
|
||||
import org.elasticsearch.xpack.ml.rest.dataframe.RestEstimateMemoryUsageAction;
|
||||
import org.elasticsearch.xpack.ml.rest.dataframe.RestEvaluateDataFrameAction;
|
||||
import org.elasticsearch.xpack.ml.rest.dataframe.RestGetDataFrameAnalyticsAction;
|
||||
import org.elasticsearch.xpack.ml.rest.dataframe.RestGetDataFrameAnalyticsStatsAction;
|
||||
|
@ -759,7 +759,7 @@ public class MachineLearning extends Plugin implements ActionPlugin, AnalysisPlu
|
|||
new RestStartDataFrameAnalyticsAction(restController),
|
||||
new RestStopDataFrameAnalyticsAction(restController),
|
||||
new RestEvaluateDataFrameAction(restController),
|
||||
new RestEstimateMemoryUsageAction(restController),
|
||||
new RestExplainDataFrameAnalyticsAction(restController),
|
||||
new RestGetTrainedModelsAction(restController),
|
||||
new RestDeleteTrainedModelAction(restController),
|
||||
new RestGetTrainedModelsStatsAction(restController)
|
||||
|
@ -829,7 +829,7 @@ public class MachineLearning extends Plugin implements ActionPlugin, AnalysisPlu
|
|||
new ActionHandler<>(StartDataFrameAnalyticsAction.INSTANCE, TransportStartDataFrameAnalyticsAction.class),
|
||||
new ActionHandler<>(StopDataFrameAnalyticsAction.INSTANCE, TransportStopDataFrameAnalyticsAction.class),
|
||||
new ActionHandler<>(EvaluateDataFrameAction.INSTANCE, TransportEvaluateDataFrameAction.class),
|
||||
new ActionHandler<>(EstimateMemoryUsageAction.INSTANCE, TransportEstimateMemoryUsageAction.class),
|
||||
new ActionHandler<>(ExplainDataFrameAnalyticsAction.INSTANCE, TransportExplainDataFrameAnalyticsAction.class),
|
||||
new ActionHandler<>(InternalInferModelAction.INSTANCE, TransportInternalInferModelAction.class),
|
||||
new ActionHandler<>(GetTrainedModelsAction.INSTANCE, TransportGetTrainedModelsAction.class),
|
||||
new ActionHandler<>(DeleteTrainedModelAction.INSTANCE, TransportDeleteTrainedModelAction.class),
|
||||
|
|
|
@ -1,130 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.ml.action;
|
||||
|
||||
import org.elasticsearch.action.ActionListener;
|
||||
import org.elasticsearch.action.ActionListenerResponseHandler;
|
||||
import org.elasticsearch.action.support.ActionFilters;
|
||||
import org.elasticsearch.action.support.HandledTransportAction;
|
||||
import org.elasticsearch.client.node.NodeClient;
|
||||
import org.elasticsearch.cluster.ClusterState;
|
||||
import org.elasticsearch.cluster.node.DiscoveryNode;
|
||||
import org.elasticsearch.cluster.service.ClusterService;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.tasks.Task;
|
||||
import org.elasticsearch.transport.TransportService;
|
||||
import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction;
|
||||
import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
|
||||
import org.elasticsearch.xpack.ml.MachineLearning;
|
||||
import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractorFactory;
|
||||
import org.elasticsearch.xpack.ml.dataframe.process.MemoryUsageEstimationProcessManager;
|
||||
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
|
||||
/**
|
||||
* Estimates memory usage for the given data frame analytics spec.
|
||||
* Redirects to a different node if the current node is *not* an ML node.
|
||||
*/
|
||||
public class TransportEstimateMemoryUsageAction
|
||||
extends HandledTransportAction<PutDataFrameAnalyticsAction.Request, EstimateMemoryUsageAction.Response> {
|
||||
|
||||
private final TransportService transportService;
|
||||
private final ClusterService clusterService;
|
||||
private final NodeClient client;
|
||||
private final MemoryUsageEstimationProcessManager processManager;
|
||||
|
||||
@Inject
|
||||
public TransportEstimateMemoryUsageAction(TransportService transportService,
|
||||
ActionFilters actionFilters,
|
||||
ClusterService clusterService,
|
||||
NodeClient client,
|
||||
MemoryUsageEstimationProcessManager processManager) {
|
||||
super(EstimateMemoryUsageAction.NAME, transportService, actionFilters, PutDataFrameAnalyticsAction.Request::new);
|
||||
this.transportService = transportService;
|
||||
this.clusterService = Objects.requireNonNull(clusterService);
|
||||
this.client = Objects.requireNonNull(client);
|
||||
this.processManager = Objects.requireNonNull(processManager);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doExecute(Task task,
|
||||
PutDataFrameAnalyticsAction.Request request,
|
||||
ActionListener<EstimateMemoryUsageAction.Response> listener) {
|
||||
DiscoveryNode localNode = clusterService.localNode();
|
||||
if (MachineLearning.isMlNode(localNode)) {
|
||||
doEstimateMemoryUsage(createTaskIdForMemoryEstimation(task), request, listener);
|
||||
} else {
|
||||
redirectToMlNode(request, listener);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates unique task id for the memory estimation process. This id is useful when logging.
|
||||
*/
|
||||
private static String createTaskIdForMemoryEstimation(Task task) {
|
||||
return "memory_usage_estimation_" + task.getId();
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs memory usage estimation.
|
||||
* Memory usage estimation spawns an ML C++ process which is only available on ML nodes. That's why this method can only be called on
|
||||
* the ML node.
|
||||
*/
|
||||
private void doEstimateMemoryUsage(String taskId,
|
||||
PutDataFrameAnalyticsAction.Request request,
|
||||
ActionListener<EstimateMemoryUsageAction.Response> listener) {
|
||||
DataFrameDataExtractorFactory.createForSourceIndices(
|
||||
client,
|
||||
taskId,
|
||||
true, // We are not interested in first-time run validations here
|
||||
request.getConfig(),
|
||||
ActionListener.wrap(
|
||||
dataExtractorFactory -> {
|
||||
processManager.runJobAsync(
|
||||
taskId,
|
||||
request.getConfig(),
|
||||
dataExtractorFactory,
|
||||
ActionListener.wrap(
|
||||
result -> listener.onResponse(
|
||||
new EstimateMemoryUsageAction.Response(
|
||||
result.getExpectedMemoryWithoutDisk(), result.getExpectedMemoryWithDisk())),
|
||||
listener::onFailure
|
||||
)
|
||||
);
|
||||
},
|
||||
listener::onFailure
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the first available ML node in the cluster and redirects the request to this node.
|
||||
*/
|
||||
private void redirectToMlNode(PutDataFrameAnalyticsAction.Request request,
|
||||
ActionListener<EstimateMemoryUsageAction.Response> listener) {
|
||||
Optional<DiscoveryNode> node = findMlNode(clusterService.state());
|
||||
if (node.isPresent()) {
|
||||
transportService.sendRequest(
|
||||
node.get(), actionName, request, new ActionListenerResponseHandler<>(listener, EstimateMemoryUsageAction.Response::new));
|
||||
} else {
|
||||
listener.onFailure(ExceptionsHelper.badRequestException("No ML node to run on"));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the first available ML node in the cluster state.
|
||||
*/
|
||||
private static Optional<DiscoveryNode> findMlNode(ClusterState clusterState) {
|
||||
for (DiscoveryNode node : clusterState.getNodes()) {
|
||||
if (MachineLearning.isMlNode(node)) {
|
||||
return Optional.of(node);
|
||||
}
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,156 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.ml.action;
|
||||
|
||||
import org.elasticsearch.action.ActionListener;
|
||||
import org.elasticsearch.action.ActionListenerResponseHandler;
|
||||
import org.elasticsearch.action.support.ActionFilters;
|
||||
import org.elasticsearch.action.support.HandledTransportAction;
|
||||
import org.elasticsearch.client.node.NodeClient;
|
||||
import org.elasticsearch.cluster.ClusterState;
|
||||
import org.elasticsearch.cluster.node.DiscoveryNode;
|
||||
import org.elasticsearch.cluster.service.ClusterService;
|
||||
import org.elasticsearch.common.collect.Tuple;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.license.LicenseUtils;
|
||||
import org.elasticsearch.license.XPackLicenseState;
|
||||
import org.elasticsearch.tasks.Task;
|
||||
import org.elasticsearch.transport.TransportService;
|
||||
import org.elasticsearch.xpack.core.XPackField;
|
||||
import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction;
|
||||
import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection;
|
||||
import org.elasticsearch.xpack.core.ml.dataframe.explain.MemoryEstimation;
|
||||
import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
|
||||
import org.elasticsearch.xpack.ml.MachineLearning;
|
||||
import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractorFactory;
|
||||
import org.elasticsearch.xpack.ml.dataframe.extractor.ExtractedFieldsDetector;
|
||||
import org.elasticsearch.xpack.ml.dataframe.extractor.ExtractedFieldsDetectorFactory;
|
||||
import org.elasticsearch.xpack.ml.dataframe.process.MemoryUsageEstimationProcessManager;
|
||||
import org.elasticsearch.xpack.ml.extractor.ExtractedFields;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
|
||||
/**
|
||||
* Provides explanations on aspects of the given data frame analytics spec like memory estimation, field selection, etc.
|
||||
* Redirects to a different node if the current node is *not* an ML node.
|
||||
*/
|
||||
public class TransportExplainDataFrameAnalyticsAction
|
||||
extends HandledTransportAction<PutDataFrameAnalyticsAction.Request, ExplainDataFrameAnalyticsAction.Response> {
|
||||
|
||||
private final XPackLicenseState licenseState;
|
||||
private final TransportService transportService;
|
||||
private final ClusterService clusterService;
|
||||
private final NodeClient client;
|
||||
private final MemoryUsageEstimationProcessManager processManager;
|
||||
|
||||
@Inject
|
||||
public TransportExplainDataFrameAnalyticsAction(TransportService transportService,
|
||||
ActionFilters actionFilters,
|
||||
ClusterService clusterService,
|
||||
NodeClient client,
|
||||
XPackLicenseState licenseState,
|
||||
MemoryUsageEstimationProcessManager processManager) {
|
||||
super(ExplainDataFrameAnalyticsAction.NAME, transportService, actionFilters, PutDataFrameAnalyticsAction.Request::new);
|
||||
this.transportService = transportService;
|
||||
this.clusterService = Objects.requireNonNull(clusterService);
|
||||
this.client = Objects.requireNonNull(client);
|
||||
this.licenseState = licenseState;
|
||||
this.processManager = Objects.requireNonNull(processManager);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doExecute(Task task,
|
||||
PutDataFrameAnalyticsAction.Request request,
|
||||
ActionListener<ExplainDataFrameAnalyticsAction.Response> listener) {
|
||||
if (licenseState.isMachineLearningAllowed() == false) {
|
||||
listener.onFailure(LicenseUtils.newComplianceException(XPackField.MACHINE_LEARNING));
|
||||
return;
|
||||
}
|
||||
|
||||
DiscoveryNode localNode = clusterService.localNode();
|
||||
if (MachineLearning.isMlNode(localNode)) {
|
||||
explain(task, request, listener);
|
||||
} else {
|
||||
redirectToMlNode(request, listener);
|
||||
}
|
||||
}
|
||||
|
||||
private void explain(Task task, PutDataFrameAnalyticsAction.Request request,
|
||||
ActionListener<ExplainDataFrameAnalyticsAction.Response> listener) {
|
||||
ExtractedFieldsDetectorFactory extractedFieldsDetectorFactory = new ExtractedFieldsDetectorFactory(client);
|
||||
extractedFieldsDetectorFactory.createFromSource(request.getConfig(), true, ActionListener.wrap(
|
||||
extractedFieldsDetector -> {
|
||||
explain(task, request, extractedFieldsDetector, listener);
|
||||
},
|
||||
listener::onFailure
|
||||
));
|
||||
}
|
||||
|
||||
private void explain(Task task, PutDataFrameAnalyticsAction.Request request, ExtractedFieldsDetector extractedFieldsDetector,
|
||||
ActionListener<ExplainDataFrameAnalyticsAction.Response> listener) {
|
||||
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
|
||||
|
||||
ActionListener<MemoryEstimation> memoryEstimationListener = ActionListener.wrap(
|
||||
memoryEstimation -> listener.onResponse(new ExplainDataFrameAnalyticsAction.Response(fieldExtraction.v2(), memoryEstimation)),
|
||||
listener::onFailure
|
||||
);
|
||||
|
||||
estimateMemoryUsage(task, request, fieldExtraction.v1(), memoryEstimationListener);
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs memory usage estimation.
|
||||
* Memory usage estimation spawns an ML C++ process which is only available on ML nodes. That's why this method can only be called on
|
||||
* the ML node.
|
||||
*/
|
||||
private void estimateMemoryUsage(Task task,
|
||||
PutDataFrameAnalyticsAction.Request request,
|
||||
ExtractedFields extractedFields,
|
||||
ActionListener<MemoryEstimation> listener) {
|
||||
final String estimateMemoryTaskId = "memory_usage_estimation_" + task.getId();
|
||||
DataFrameDataExtractorFactory extractorFactory = DataFrameDataExtractorFactory.createForSourceIndices(
|
||||
client, estimateMemoryTaskId, request.getConfig(), extractedFields);
|
||||
processManager.runJobAsync(
|
||||
estimateMemoryTaskId,
|
||||
request.getConfig(),
|
||||
extractorFactory,
|
||||
ActionListener.wrap(
|
||||
result -> listener.onResponse(
|
||||
new MemoryEstimation(result.getExpectedMemoryWithoutDisk(), result.getExpectedMemoryWithDisk())),
|
||||
listener::onFailure
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the first available ML node in the cluster and redirects the request to this node.
|
||||
*/
|
||||
private void redirectToMlNode(PutDataFrameAnalyticsAction.Request request,
|
||||
ActionListener<ExplainDataFrameAnalyticsAction.Response> listener) {
|
||||
Optional<DiscoveryNode> node = findMlNode(clusterService.state());
|
||||
if (node.isPresent()) {
|
||||
transportService.sendRequest(node.get(), actionName, request,
|
||||
new ActionListenerResponseHandler<>(listener, ExplainDataFrameAnalyticsAction.Response::new));
|
||||
} else {
|
||||
listener.onFailure(ExceptionsHelper.badRequestException("No ML node to run on"));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the first available ML node in the cluster state.
|
||||
*/
|
||||
private static Optional<DiscoveryNode> findMlNode(ClusterState clusterState) {
|
||||
for (DiscoveryNode node : clusterState.getNodes()) {
|
||||
if (MachineLearning.isMlNode(node)) {
|
||||
return Optional.of(node);
|
||||
}
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
|
@ -29,6 +29,7 @@ import org.elasticsearch.common.Strings;
|
|||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.unit.ByteSizeValue;
|
||||
import org.elasticsearch.common.unit.TimeValue;
|
||||
import org.elasticsearch.index.IndexNotFoundException;
|
||||
import org.elasticsearch.license.LicenseUtils;
|
||||
|
@ -47,7 +48,7 @@ import org.elasticsearch.xpack.core.ClientHelper;
|
|||
import org.elasticsearch.xpack.core.XPackField;
|
||||
import org.elasticsearch.xpack.core.ml.MlMetadata;
|
||||
import org.elasticsearch.xpack.core.ml.MlTasks;
|
||||
import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.GetDataFrameAnalyticsStatsAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.StartDataFrameAnalyticsAction;
|
||||
|
@ -66,6 +67,7 @@ import org.elasticsearch.xpack.ml.dataframe.SourceDestValidator;
|
|||
import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractorFactory;
|
||||
import org.elasticsearch.xpack.ml.dataframe.extractor.ExtractedFieldsDetectorFactory;
|
||||
import org.elasticsearch.xpack.ml.dataframe.persistence.DataFrameAnalyticsConfigProvider;
|
||||
import org.elasticsearch.xpack.ml.extractor.ExtractedFields;
|
||||
import org.elasticsearch.xpack.ml.job.JobNodeSelector;
|
||||
import org.elasticsearch.xpack.ml.notifications.DataFrameAnalyticsAuditor;
|
||||
import org.elasticsearch.xpack.ml.process.MlMemoryTracker;
|
||||
|
@ -190,20 +192,18 @@ public class TransportStartDataFrameAnalyticsAction
|
|||
final String jobId = startContext.config.getId();
|
||||
|
||||
// Tell the job tracker to refresh the memory requirement for this job and all other jobs that have persistent tasks
|
||||
ActionListener<EstimateMemoryUsageAction.Response> estimateMemoryUsageListener = ActionListener.wrap(
|
||||
estimateMemoryUsageResponse -> {
|
||||
auditor.info(
|
||||
jobId,
|
||||
Messages.getMessage(
|
||||
Messages.DATA_FRAME_ANALYTICS_AUDIT_ESTIMATED_MEMORY_USAGE,
|
||||
estimateMemoryUsageResponse.getExpectedMemoryWithoutDisk()));
|
||||
ActionListener<ExplainDataFrameAnalyticsAction.Response> explainListener = ActionListener.wrap(
|
||||
explainResponse -> {
|
||||
ByteSizeValue expectedMemoryWithoutDisk = explainResponse.getMemoryEstimation().getExpectedMemoryWithoutDisk();
|
||||
auditor.info(jobId,
|
||||
Messages.getMessage(Messages.DATA_FRAME_ANALYTICS_AUDIT_ESTIMATED_MEMORY_USAGE, expectedMemoryWithoutDisk));
|
||||
// Validate that model memory limit is sufficient to run the analysis
|
||||
if (startContext.config.getModelMemoryLimit()
|
||||
.compareTo(estimateMemoryUsageResponse.getExpectedMemoryWithoutDisk()) < 0) {
|
||||
.compareTo(expectedMemoryWithoutDisk) < 0) {
|
||||
ElasticsearchStatusException e =
|
||||
ExceptionsHelper.badRequestException(
|
||||
"Cannot start because the configured model memory limit [{}] is lower than the expected memory usage [{}]",
|
||||
startContext.config.getModelMemoryLimit(), estimateMemoryUsageResponse.getExpectedMemoryWithoutDisk());
|
||||
startContext.config.getModelMemoryLimit(), expectedMemoryWithoutDisk);
|
||||
listener.onFailure(e);
|
||||
return;
|
||||
}
|
||||
|
@ -215,13 +215,13 @@ public class TransportStartDataFrameAnalyticsAction
|
|||
listener::onFailure
|
||||
);
|
||||
|
||||
PutDataFrameAnalyticsAction.Request estimateMemoryUsageRequest = new PutDataFrameAnalyticsAction.Request(startContext.config);
|
||||
PutDataFrameAnalyticsAction.Request explainRequest = new PutDataFrameAnalyticsAction.Request(startContext.config);
|
||||
ClientHelper.executeAsyncWithOrigin(
|
||||
client,
|
||||
ClientHelper.ML_ORIGIN,
|
||||
EstimateMemoryUsageAction.INSTANCE,
|
||||
estimateMemoryUsageRequest,
|
||||
estimateMemoryUsageListener);
|
||||
ExplainDataFrameAnalyticsAction.INSTANCE,
|
||||
explainRequest,
|
||||
explainListener);
|
||||
|
||||
}
|
||||
|
||||
|
@ -277,7 +277,11 @@ public class TransportStartDataFrameAnalyticsAction
|
|||
// Validate extraction is possible
|
||||
boolean isTaskRestarting = startContext.startingState != DataFrameAnalyticsTask.StartingState.FIRST_TIME;
|
||||
new ExtractedFieldsDetectorFactory(client).createFromSource(startContext.config, isTaskRestarting, ActionListener.wrap(
|
||||
extractedFieldsDetector -> toValidateDestEmptyListener.onResponse(startContext), finalListener::onFailure));
|
||||
extractedFieldsDetector -> {
|
||||
startContext.extractedFields = extractedFieldsDetector.detect().v1();
|
||||
toValidateDestEmptyListener.onResponse(startContext);
|
||||
},
|
||||
finalListener::onFailure));
|
||||
},
|
||||
finalListener::onFailure
|
||||
);
|
||||
|
@ -294,33 +298,27 @@ public class TransportStartDataFrameAnalyticsAction
|
|||
}
|
||||
|
||||
private void validateSourceIndexHasRows(StartContext startContext, ActionListener<StartContext> listener) {
|
||||
boolean isTaskRestarting = startContext.startingState != DataFrameAnalyticsTask.StartingState.FIRST_TIME;
|
||||
DataFrameDataExtractorFactory.createForSourceIndices(client,
|
||||
DataFrameDataExtractorFactory extractorFactory = DataFrameDataExtractorFactory.createForSourceIndices(client,
|
||||
"validate_source_index_has_rows-" + startContext.config.getId(),
|
||||
isTaskRestarting,
|
||||
startContext.config,
|
||||
ActionListener.wrap(
|
||||
dataFrameDataExtractorFactory ->
|
||||
dataFrameDataExtractorFactory
|
||||
.newExtractor(false)
|
||||
.collectDataSummaryAsync(ActionListener.wrap(
|
||||
dataSummary -> {
|
||||
if (dataSummary.rows == 0) {
|
||||
listener.onFailure(ExceptionsHelper.badRequestException(
|
||||
"Unable to start {} as no documents in the source indices [{}] contained all the fields "
|
||||
+ "selected for analysis. If you are relying on automatic field selection then there are "
|
||||
+ "currently mapped fields that do not exist in any indexed documents, and you will have "
|
||||
+ "to switch to explicit field selection and include only fields that exist in indexed "
|
||||
+ "documents.",
|
||||
startContext.config.getId(),
|
||||
Strings.arrayToCommaDelimitedString(startContext.config.getSource().getIndex())
|
||||
));
|
||||
} else {
|
||||
listener.onResponse(startContext);
|
||||
}
|
||||
},
|
||||
listener::onFailure
|
||||
)),
|
||||
startContext.extractedFields);
|
||||
extractorFactory.newExtractor(false)
|
||||
.collectDataSummaryAsync(ActionListener.wrap(
|
||||
dataSummary -> {
|
||||
if (dataSummary.rows == 0) {
|
||||
listener.onFailure(ExceptionsHelper.badRequestException(
|
||||
"Unable to start {} as no documents in the source indices [{}] contained all the fields "
|
||||
+ "selected for analysis. If you are relying on automatic field selection then there are "
|
||||
+ "currently mapped fields that do not exist in any indexed documents, and you will have "
|
||||
+ "to switch to explicit field selection and include only fields that exist in indexed "
|
||||
+ "documents.",
|
||||
startContext.config.getId(),
|
||||
Strings.arrayToCommaDelimitedString(startContext.config.getSource().getIndex())
|
||||
));
|
||||
} else {
|
||||
listener.onResponse(startContext);
|
||||
}
|
||||
},
|
||||
listener::onFailure
|
||||
));
|
||||
}
|
||||
|
@ -402,6 +400,7 @@ public class TransportStartDataFrameAnalyticsAction
|
|||
private final DataFrameAnalyticsConfig config;
|
||||
private final List<PhaseProgress> progressOnStart;
|
||||
private final DataFrameAnalyticsTask.StartingState startingState;
|
||||
private volatile ExtractedFields extractedFields;
|
||||
|
||||
private StartContext(DataFrameAnalyticsConfig config, List<PhaseProgress> progressOnStart) {
|
||||
this.config = config;
|
||||
|
|
|
@ -29,7 +29,7 @@ public class DataFrameDataExtractorFactory {
|
|||
private final Map<String, String> headers;
|
||||
private final boolean includeRowsWithMissingValues;
|
||||
|
||||
private DataFrameDataExtractorFactory(Client client, String analyticsId, List<String> indices, ExtractedFields extractedFields,
|
||||
public DataFrameDataExtractorFactory(Client client, String analyticsId, List<String> indices, ExtractedFields extractedFields,
|
||||
Map<String, String> headers, boolean includeRowsWithMissingValues) {
|
||||
this.client = Objects.requireNonNull(client);
|
||||
this.analyticsId = Objects.requireNonNull(analyticsId);
|
||||
|
@ -66,32 +66,19 @@ public class DataFrameDataExtractorFactory {
|
|||
}
|
||||
|
||||
/**
|
||||
* Validate and create a new extractor factory
|
||||
* Create a new extractor factory
|
||||
*
|
||||
* The source index must exist and contain at least 1 compatible field or validations will fail.
|
||||
*
|
||||
* @param client ES Client used to make calls against the cluster
|
||||
* @param taskId The task id
|
||||
* @param isTaskRestarting Whether the task is restarting or it is running for the first time
|
||||
* @param config The config from which to create the extractor factory
|
||||
* @param listener The listener to notify on creation or failure
|
||||
* @param extractedFields The fields to extract
|
||||
*/
|
||||
public static void createForSourceIndices(Client client,
|
||||
String taskId,
|
||||
boolean isTaskRestarting,
|
||||
DataFrameAnalyticsConfig config,
|
||||
ActionListener<DataFrameDataExtractorFactory> listener) {
|
||||
ExtractedFieldsDetectorFactory extractedFieldsDetectorFactory = new ExtractedFieldsDetectorFactory(client);
|
||||
extractedFieldsDetectorFactory.createFromSource(config, isTaskRestarting, ActionListener.wrap(
|
||||
extractedFieldsDetector -> {
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
DataFrameDataExtractorFactory extractorFactory = new DataFrameDataExtractorFactory(client, taskId,
|
||||
Arrays.asList(config.getSource().getIndex()), extractedFields, config.getHeaders(),
|
||||
config.getAnalysis().supportsMissingValues());
|
||||
listener.onResponse(extractorFactory);
|
||||
},
|
||||
listener::onFailure
|
||||
));
|
||||
public static DataFrameDataExtractorFactory createForSourceIndices(Client client, String taskId, DataFrameAnalyticsConfig config,
|
||||
ExtractedFields extractedFields) {
|
||||
return new DataFrameDataExtractorFactory(client, taskId, Arrays.asList(config.getSource().getIndex()), extractedFields,
|
||||
config.getHeaders(), config.getAnalysis().supportsMissingValues());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -111,7 +98,7 @@ public class DataFrameDataExtractorFactory {
|
|||
ExtractedFieldsDetectorFactory extractedFieldsDetectorFactory = new ExtractedFieldsDetectorFactory(client);
|
||||
extractedFieldsDetectorFactory.createFromDest(config, isTaskRestarting, ActionListener.wrap(
|
||||
extractedFieldsDetector -> {
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect().v1();
|
||||
DataFrameDataExtractorFactory extractorFactory = new DataFrameDataExtractorFactory(client, config.getId(),
|
||||
Collections.singletonList(config.getDest().getIndex()), extractedFields, config.getHeaders(),
|
||||
config.getAnalysis().supportsMissingValues());
|
||||
|
|
|
@ -11,6 +11,7 @@ import org.elasticsearch.ResourceNotFoundException;
|
|||
import org.elasticsearch.action.fieldcaps.FieldCapabilities;
|
||||
import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.collect.Tuple;
|
||||
import org.elasticsearch.common.regex.Regex;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.mapper.BooleanFieldMapper;
|
||||
|
@ -19,6 +20,7 @@ import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig;
|
|||
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsDest;
|
||||
import org.elasticsearch.xpack.core.ml.dataframe.analyses.RequiredField;
|
||||
import org.elasticsearch.xpack.core.ml.dataframe.analyses.Types;
|
||||
import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection;
|
||||
import org.elasticsearch.xpack.core.ml.job.messages.Messages;
|
||||
import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
|
||||
import org.elasticsearch.xpack.core.ml.utils.NameResolver;
|
||||
|
@ -29,13 +31,12 @@ import org.elasticsearch.xpack.ml.extractor.ExtractedFields;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import java.util.stream.Collectors;
|
||||
|
@ -57,9 +58,8 @@ public class ExtractedFieldsDetector {
|
|||
private final FieldCapabilitiesResponse fieldCapabilitiesResponse;
|
||||
private final Map<String, Long> fieldCardinalities;
|
||||
|
||||
ExtractedFieldsDetector(String[] index, DataFrameAnalyticsConfig config, boolean isTaskRestarting,
|
||||
int docValueFieldsLimit, FieldCapabilitiesResponse fieldCapabilitiesResponse,
|
||||
Map<String, Long> fieldCardinalities) {
|
||||
ExtractedFieldsDetector(String[] index, DataFrameAnalyticsConfig config, boolean isTaskRestarting, int docValueFieldsLimit,
|
||||
FieldCapabilitiesResponse fieldCapabilitiesResponse, Map<String, Long> fieldCardinalities) {
|
||||
this.index = Objects.requireNonNull(index);
|
||||
this.config = Objects.requireNonNull(config);
|
||||
this.isTaskRestarting = isTaskRestarting;
|
||||
|
@ -68,8 +68,30 @@ public class ExtractedFieldsDetector {
|
|||
this.fieldCardinalities = Objects.requireNonNull(fieldCardinalities);
|
||||
}
|
||||
|
||||
public ExtractedFields detect() {
|
||||
Set<String> fields = getIncludedFields();
|
||||
public Tuple<ExtractedFields, List<FieldSelection>> detect() {
|
||||
TreeSet<FieldSelection> fieldSelection = new TreeSet<>(Comparator.comparing(FieldSelection::getName));
|
||||
Set<String> fields = getIncludedFields(fieldSelection);
|
||||
checkFieldsHaveCompatibleTypes(fields);
|
||||
checkRequiredFields(fields);
|
||||
checkFieldsWithCardinalityLimit();
|
||||
ExtractedFields extractedFields = detectExtractedFields(fields, fieldSelection);
|
||||
addIncludedFields(extractedFields, fieldSelection);
|
||||
|
||||
return Tuple.tuple(extractedFields, Collections.unmodifiableList(new ArrayList<>(fieldSelection)));
|
||||
}
|
||||
|
||||
private Set<String> getIncludedFields(Set<FieldSelection> fieldSelection) {
|
||||
Set<String> fields = new TreeSet<>(fieldCapabilitiesResponse.get().keySet());
|
||||
fields.removeAll(IGNORE_FIELDS);
|
||||
checkResultsFieldIsNotPresent();
|
||||
removeFieldsUnderResultsField(fields);
|
||||
FetchSourceContext analyzedFields = config.getAnalyzedFields();
|
||||
|
||||
// If the user has not explicitly included fields we'll include all compatible fields
|
||||
if (analyzedFields == null || analyzedFields.includes().length == 0) {
|
||||
removeFieldsWithIncompatibleTypes(fields, fieldSelection);
|
||||
}
|
||||
includeAndExcludeFields(fields, fieldSelection);
|
||||
|
||||
if (fields.isEmpty()) {
|
||||
throw ExceptionsHelper.badRequestException("No compatible fields could be detected in index {}. Supported types are {}.",
|
||||
|
@ -77,26 +99,19 @@ public class ExtractedFieldsDetector {
|
|||
getSupportedTypes());
|
||||
}
|
||||
|
||||
checkNoIgnoredFields(fields);
|
||||
checkFieldsHaveCompatibleTypes(fields);
|
||||
checkRequiredFields(fields);
|
||||
checkFieldsWithCardinalityLimit();
|
||||
return detectExtractedFields(fields);
|
||||
return fields;
|
||||
}
|
||||
|
||||
private Set<String> getIncludedFields() {
|
||||
Set<String> fields = new HashSet<>(fieldCapabilitiesResponse.get().keySet());
|
||||
checkResultsFieldIsNotPresent();
|
||||
removeFieldsUnderResultsField(fields);
|
||||
FetchSourceContext analyzedFields = config.getAnalyzedFields();
|
||||
|
||||
// If the user has not explicitly included fields we'll include all compatible fields
|
||||
if (analyzedFields == null || analyzedFields.includes().length == 0) {
|
||||
fields.removeAll(IGNORE_FIELDS);
|
||||
removeFieldsWithIncompatibleTypes(fields);
|
||||
private void removeFieldsUnderResultsField(Set<String> fields) {
|
||||
String resultsField = config.getDest().getResultsField();
|
||||
Iterator<String> fieldsIterator = fields.iterator();
|
||||
while (fieldsIterator.hasNext()) {
|
||||
String field = fieldsIterator.next();
|
||||
if (field.startsWith(resultsField + ".")) {
|
||||
fieldsIterator.remove();
|
||||
}
|
||||
}
|
||||
includeAndExcludeFields(fields);
|
||||
return fields;
|
||||
fields.removeIf(field -> field.startsWith(resultsField + "."));
|
||||
}
|
||||
|
||||
private void checkResultsFieldIsNotPresent() {
|
||||
|
@ -117,16 +132,21 @@ public class ExtractedFieldsDetector {
|
|||
}
|
||||
}
|
||||
|
||||
private void removeFieldsUnderResultsField(Set<String> fields) {
|
||||
// Ignore fields under the results object
|
||||
fields.removeIf(field -> field.startsWith(config.getDest().getResultsField() + "."));
|
||||
private void addExcludedField(String field, String reason, Set<FieldSelection> fieldSelection) {
|
||||
fieldSelection.add(FieldSelection.excluded(field, getMappingTypes(field), reason));
|
||||
}
|
||||
|
||||
private void removeFieldsWithIncompatibleTypes(Set<String> fields) {
|
||||
private Set<String> getMappingTypes(String field) {
|
||||
Map<String, FieldCapabilities> fieldCaps = fieldCapabilitiesResponse.getField(field);
|
||||
return fieldCaps == null ? Collections.emptySet() : fieldCaps.keySet();
|
||||
}
|
||||
|
||||
private void removeFieldsWithIncompatibleTypes(Set<String> fields, Set<FieldSelection> fieldSelection) {
|
||||
Iterator<String> fieldsIterator = fields.iterator();
|
||||
while (fieldsIterator.hasNext()) {
|
||||
String field = fieldsIterator.next();
|
||||
if (hasCompatibleType(field) == false) {
|
||||
addExcludedField(field, "unsupported type; supported types are " + getSupportedTypes(), fieldSelection);
|
||||
fieldsIterator.remove();
|
||||
}
|
||||
}
|
||||
|
@ -163,7 +183,7 @@ public class ExtractedFieldsDetector {
|
|||
return supportedTypes;
|
||||
}
|
||||
|
||||
private void includeAndExcludeFields(Set<String> fields) {
|
||||
private void includeAndExcludeFields(Set<String> fields, Set<FieldSelection> fieldSelection) {
|
||||
FetchSourceContext analyzedFields = config.getAnalyzedFields();
|
||||
if (analyzedFields == null) {
|
||||
return;
|
||||
|
@ -188,18 +208,30 @@ public class ExtractedFieldsDetector {
|
|||
Messages.getMessage(Messages.DATA_FRAME_ANALYTICS_BAD_FIELD_FILTER, ex)))
|
||||
.expand(excludes, true);
|
||||
|
||||
fields.retainAll(includedSet);
|
||||
fields.removeAll(excludedSet);
|
||||
applyIncludesExcludes(fields, includedSet, excludedSet, fieldSelection);
|
||||
} catch (ResourceNotFoundException ex) {
|
||||
// Re-wrap our exception so that we throw the same exception type when there are no fields.
|
||||
throw ExceptionsHelper.badRequestException(ex.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private void checkNoIgnoredFields(Set<String> fields) {
|
||||
Optional<String> ignoreField = IGNORE_FIELDS.stream().filter(fields::contains).findFirst();
|
||||
if (ignoreField.isPresent()) {
|
||||
throw ExceptionsHelper.badRequestException("field [{}] cannot be analyzed", ignoreField.get());
|
||||
private void applyIncludesExcludes(Set<String> fields, Set<String> includes, Set<String> excludes,
|
||||
Set<FieldSelection> fieldSelection) {
|
||||
Iterator<String> fieldsIterator = fields.iterator();
|
||||
while (fieldsIterator.hasNext()) {
|
||||
String field = fieldsIterator.next();
|
||||
if (includes.contains(field)) {
|
||||
if (IGNORE_FIELDS.contains(field)) {
|
||||
throw ExceptionsHelper.badRequestException("field [{}] cannot be analyzed", field);
|
||||
}
|
||||
} else {
|
||||
fieldsIterator.remove();
|
||||
addExcludedField(field, "field not in includes list", fieldSelection);
|
||||
}
|
||||
if (excludes.contains(field)) {
|
||||
fieldsIterator.remove();
|
||||
addExcludedField(field, "field in excludes list", fieldSelection);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -247,13 +279,10 @@ public class ExtractedFieldsDetector {
|
|||
}
|
||||
}
|
||||
|
||||
private ExtractedFields detectExtractedFields(Set<String> fields) {
|
||||
List<String> sortedFields = new ArrayList<>(fields);
|
||||
// We sort the fields to ensure the checksum for each document is deterministic
|
||||
Collections.sort(sortedFields);
|
||||
ExtractedFields extractedFields = ExtractedFields.build(sortedFields, Collections.emptySet(), fieldCapabilitiesResponse);
|
||||
private ExtractedFields detectExtractedFields(Set<String> fields, Set<FieldSelection> fieldSelection) {
|
||||
ExtractedFields extractedFields = ExtractedFields.build(fields, Collections.emptySet(), fieldCapabilitiesResponse);
|
||||
boolean preferSource = extractedFields.getDocValueFields().size() > docValueFieldsLimit;
|
||||
extractedFields = deduplicateMultiFields(extractedFields, preferSource);
|
||||
extractedFields = deduplicateMultiFields(extractedFields, preferSource, fieldSelection);
|
||||
if (preferSource) {
|
||||
extractedFields = fetchFromSourceIfSupported(extractedFields);
|
||||
if (extractedFields.getDocValueFields().size() > docValueFieldsLimit) {
|
||||
|
@ -266,7 +295,8 @@ public class ExtractedFieldsDetector {
|
|||
return extractedFields;
|
||||
}
|
||||
|
||||
private ExtractedFields deduplicateMultiFields(ExtractedFields extractedFields, boolean preferSource) {
|
||||
private ExtractedFields deduplicateMultiFields(ExtractedFields extractedFields, boolean preferSource,
|
||||
Set<FieldSelection> fieldSelection) {
|
||||
Set<String> requiredFields = config.getAnalysis().getRequiredFields().stream().map(RequiredField::getName)
|
||||
.collect(Collectors.toSet());
|
||||
Map<String, ExtractedField> nameOrParentToField = new LinkedHashMap<>();
|
||||
|
@ -276,43 +306,53 @@ public class ExtractedFieldsDetector {
|
|||
if (existingField != null) {
|
||||
ExtractedField parent = currentField.isMultiField() ? existingField : currentField;
|
||||
ExtractedField multiField = currentField.isMultiField() ? currentField : existingField;
|
||||
nameOrParentToField.put(nameOrParent, chooseMultiFieldOrParent(preferSource, requiredFields, parent, multiField));
|
||||
nameOrParentToField.put(nameOrParent,
|
||||
chooseMultiFieldOrParent(preferSource, requiredFields, parent, multiField, fieldSelection));
|
||||
}
|
||||
}
|
||||
return new ExtractedFields(new ArrayList<>(nameOrParentToField.values()));
|
||||
}
|
||||
|
||||
private ExtractedField chooseMultiFieldOrParent(boolean preferSource, Set<String> requiredFields,
|
||||
ExtractedField parent, ExtractedField multiField) {
|
||||
private ExtractedField chooseMultiFieldOrParent(boolean preferSource, Set<String> requiredFields, ExtractedField parent,
|
||||
ExtractedField multiField, Set<FieldSelection> fieldSelection) {
|
||||
// Check requirements first
|
||||
if (requiredFields.contains(parent.getName())) {
|
||||
addExcludedField(multiField.getName(), "[" + parent.getName() + "] is required instead", fieldSelection);
|
||||
return parent;
|
||||
}
|
||||
if (requiredFields.contains(multiField.getName())) {
|
||||
addExcludedField(parent.getName(), "[" + multiField.getName() + "] is required instead", fieldSelection);
|
||||
return multiField;
|
||||
}
|
||||
|
||||
// If both are multi-fields it means there are several. In this case parent is the previous multi-field
|
||||
// we selected. We'll just keep that.
|
||||
if (parent.isMultiField() && multiField.isMultiField()) {
|
||||
addExcludedField(multiField.getName(), "[" + parent.getName() + "] came first", fieldSelection);
|
||||
return parent;
|
||||
}
|
||||
|
||||
// If we prefer source only the parent may support it. If it does we pick it immediately.
|
||||
if (preferSource && parent.supportsFromSource()) {
|
||||
addExcludedField(multiField.getName(), "[" + parent.getName() + "] is preferred because it supports fetching from source",
|
||||
fieldSelection);
|
||||
return parent;
|
||||
}
|
||||
|
||||
// If any of the two is a doc_value field let's prefer it as it'd support aggregations.
|
||||
// We check the parent first as it'd be a shorter field name.
|
||||
if (parent.getMethod() == ExtractedField.Method.DOC_VALUE) {
|
||||
addExcludedField(multiField.getName(), "[" + parent.getName() + "] is preferred because it is aggregatable", fieldSelection);
|
||||
return parent;
|
||||
}
|
||||
if (multiField.getMethod() == ExtractedField.Method.DOC_VALUE) {
|
||||
addExcludedField(parent.getName(), "[" + multiField.getName() + "] is preferred because it is aggregatable", fieldSelection);
|
||||
return multiField;
|
||||
}
|
||||
|
||||
// None is aggregatable. Let's pick the parent for its shorter name.
|
||||
addExcludedField(multiField.getName(), "[" + parent.getName() + "] is preferred because none of the multi-fields are aggregatable",
|
||||
fieldSelection);
|
||||
return parent;
|
||||
}
|
||||
|
||||
|
@ -343,6 +383,26 @@ public class ExtractedFieldsDetector {
|
|||
return new ExtractedFields(adjusted);
|
||||
}
|
||||
|
||||
private void addIncludedFields(ExtractedFields extractedFields, Set<FieldSelection> fieldSelection) {
|
||||
Set<String> requiredFields = config.getAnalysis().getRequiredFields().stream().map(RequiredField::getName)
|
||||
.collect(Collectors.toSet());
|
||||
Set<String> categoricalFields = getCategoricalFields(extractedFields);
|
||||
for (ExtractedField includedField : extractedFields.getAllFields()) {
|
||||
FieldSelection.FeatureType featureType = categoricalFields.contains(includedField.getName()) ?
|
||||
FieldSelection.FeatureType.CATEGORICAL : FieldSelection.FeatureType.NUMERICAL;
|
||||
fieldSelection.add(FieldSelection.included(includedField.getName(), includedField.getTypes(),
|
||||
requiredFields.contains(includedField.getName()), featureType));
|
||||
}
|
||||
}
|
||||
|
||||
private Set<String> getCategoricalFields(ExtractedFields extractedFields) {
|
||||
return extractedFields.getAllFields().stream()
|
||||
.filter(extractedField -> config.getAnalysis().getAllowedCategoricalTypes(extractedField.getName())
|
||||
.containsAll(extractedField.getTypes()))
|
||||
.map(ExtractedField::getName)
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
private static boolean isBoolean(Set<String> types) {
|
||||
return types.size() == 1 && types.contains(BooleanFieldMapper.CONTENT_TYPE);
|
||||
}
|
||||
|
|
|
@ -100,9 +100,9 @@ public class MemoryUsageEstimationProcessManager {
|
|||
} finally {
|
||||
process.consumeAndCloseOutputStream();
|
||||
try {
|
||||
LOGGER.info("[{}] Closing process", jobId);
|
||||
LOGGER.debug("[{}] Closing process", jobId);
|
||||
process.close();
|
||||
LOGGER.info("[{}] Closed process", jobId);
|
||||
LOGGER.debug("[{}] Closed process", jobId);
|
||||
} catch (Exception e) {
|
||||
String errorMsg =
|
||||
new ParameterizedMessage(
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.ml.rest.dataframe;
|
||||
|
||||
import org.elasticsearch.client.node.NodeClient;
|
||||
import org.elasticsearch.rest.BaseRestHandler;
|
||||
import org.elasticsearch.rest.RestController;
|
||||
import org.elasticsearch.rest.RestRequest;
|
||||
import org.elasticsearch.rest.action.RestToXContentListener;
|
||||
import org.elasticsearch.xpack.core.ml.action.EstimateMemoryUsageAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction;
|
||||
import org.elasticsearch.xpack.ml.MachineLearning;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class RestEstimateMemoryUsageAction extends BaseRestHandler {
|
||||
|
||||
public RestEstimateMemoryUsageAction(RestController controller) {
|
||||
controller.registerHandler(
|
||||
RestRequest.Method.POST,
|
||||
MachineLearning.BASE_PATH + "data_frame/analytics/_estimate_memory_usage", this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return "ml_estimate_memory_usage_action";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException {
|
||||
PutDataFrameAnalyticsAction.Request request =
|
||||
PutDataFrameAnalyticsAction.Request.parseRequestForMemoryEstimation(restRequest.contentOrSourceParamParser());
|
||||
return channel -> client.execute(EstimateMemoryUsageAction.INSTANCE, request, new RestToXContentListener<>(channel));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.ml.rest.dataframe;
|
||||
|
||||
import org.elasticsearch.action.ActionListener;
|
||||
import org.elasticsearch.client.node.NodeClient;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.rest.BaseRestHandler;
|
||||
import org.elasticsearch.rest.RestController;
|
||||
import org.elasticsearch.rest.RestRequest;
|
||||
import org.elasticsearch.rest.action.RestToXContentListener;
|
||||
import org.elasticsearch.xpack.core.ml.action.ExplainDataFrameAnalyticsAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.GetDataFrameAnalyticsAction;
|
||||
import org.elasticsearch.xpack.core.ml.action.PutDataFrameAnalyticsAction;
|
||||
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig;
|
||||
import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
|
||||
import org.elasticsearch.xpack.ml.MachineLearning;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class RestExplainDataFrameAnalyticsAction extends BaseRestHandler {
|
||||
|
||||
public RestExplainDataFrameAnalyticsAction(RestController controller) {
|
||||
controller.registerHandler(RestRequest.Method.GET, MachineLearning.BASE_PATH + "data_frame/analytics/_explain", this);
|
||||
controller.registerHandler(RestRequest.Method.POST, MachineLearning.BASE_PATH + "data_frame/analytics/_explain", this);
|
||||
controller.registerHandler(RestRequest.Method.GET, MachineLearning.BASE_PATH + "data_frame/analytics/{"
|
||||
+ DataFrameAnalyticsConfig.ID.getPreferredName() + "}/_explain", this);
|
||||
controller.registerHandler(RestRequest.Method.POST, MachineLearning.BASE_PATH + "data_frame/analytics/{"
|
||||
+ DataFrameAnalyticsConfig.ID.getPreferredName() + "}/_explain", this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return "ml_explain_data_frame_analytics_action";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException {
|
||||
final String jobId = restRequest.param(DataFrameAnalyticsConfig.ID.getPreferredName());
|
||||
|
||||
if (Strings.isNullOrEmpty(jobId) && restRequest.hasContentOrSourceParam() == false) {
|
||||
throw ExceptionsHelper.badRequestException("Please provide a job [{}] or the config object",
|
||||
DataFrameAnalyticsConfig.ID.getPreferredName());
|
||||
}
|
||||
|
||||
if (Strings.isNullOrEmpty(jobId) == false && restRequest.hasContentOrSourceParam()) {
|
||||
throw ExceptionsHelper.badRequestException("Please provide either a job [{}] or the config object but not both",
|
||||
DataFrameAnalyticsConfig.ID.getPreferredName());
|
||||
}
|
||||
|
||||
// We need to consume the body before returning
|
||||
PutDataFrameAnalyticsAction.Request explainRequestFromBody = Strings.isNullOrEmpty(jobId) ?
|
||||
PutDataFrameAnalyticsAction.Request.parseRequestForExplain(restRequest.contentOrSourceParamParser()) : null;
|
||||
|
||||
return channel -> {
|
||||
RestToXContentListener<ExplainDataFrameAnalyticsAction.Response> listener = new RestToXContentListener<>(channel);
|
||||
|
||||
if (explainRequestFromBody != null) {
|
||||
client.execute(ExplainDataFrameAnalyticsAction.INSTANCE, explainRequestFromBody, listener);
|
||||
} else {
|
||||
GetDataFrameAnalyticsAction.Request getRequest = new GetDataFrameAnalyticsAction.Request(jobId);
|
||||
getRequest.setAllowNoResources(false);
|
||||
client.execute(GetDataFrameAnalyticsAction.INSTANCE, getRequest, ActionListener.wrap(
|
||||
getResponse -> {
|
||||
List<DataFrameAnalyticsConfig> jobs = getResponse.getResources().results();
|
||||
if (jobs.size() > 1) {
|
||||
listener.onFailure(ExceptionsHelper.badRequestException("expected only one config but matched {}",
|
||||
jobs.stream().map(DataFrameAnalyticsConfig::getId).collect(Collectors.toList())));
|
||||
} else {
|
||||
PutDataFrameAnalyticsAction.Request explainRequest = new PutDataFrameAnalyticsAction.Request(jobs.get(0));
|
||||
client.execute(ExplainDataFrameAnalyticsAction.INSTANCE, explainRequest, listener);
|
||||
}
|
||||
},
|
||||
listener::onFailure
|
||||
));
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -8,6 +8,7 @@ package org.elasticsearch.xpack.ml.dataframe.extractor;
|
|||
import org.elasticsearch.ElasticsearchStatusException;
|
||||
import org.elasticsearch.action.fieldcaps.FieldCapabilities;
|
||||
import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse;
|
||||
import org.elasticsearch.common.collect.Tuple;
|
||||
import org.elasticsearch.search.SearchHit;
|
||||
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
|
@ -17,6 +18,7 @@ import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsSource;
|
|||
import org.elasticsearch.xpack.core.ml.dataframe.analyses.Classification;
|
||||
import org.elasticsearch.xpack.core.ml.dataframe.analyses.OutlierDetection;
|
||||
import org.elasticsearch.xpack.core.ml.dataframe.analyses.Regression;
|
||||
import org.elasticsearch.xpack.core.ml.dataframe.explain.FieldSelection;
|
||||
import org.elasticsearch.xpack.ml.extractor.ExtractedField;
|
||||
import org.elasticsearch.xpack.ml.extractor.ExtractedFields;
|
||||
import org.elasticsearch.xpack.ml.test.SearchHitBuilder;
|
||||
|
@ -25,6 +27,7 @@ import java.util.ArrayList;
|
|||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
@ -48,12 +51,15 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
|
||||
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
|
||||
SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap());
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
|
||||
|
||||
List<ExtractedField> allFields = extractedFields.getAllFields();
|
||||
List<ExtractedField> allFields = fieldExtraction.v1().getAllFields();
|
||||
assertThat(allFields.size(), equalTo(1));
|
||||
assertThat(allFields.get(0).getName(), equalTo("some_float"));
|
||||
assertThat(allFields.get(0).getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
|
||||
|
||||
assertFieldSelectionContains(fieldExtraction.v2(),
|
||||
FieldSelection.included("some_float", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL));
|
||||
}
|
||||
|
||||
public void testDetect_GivenNumericFieldWithMultipleTypes() {
|
||||
|
@ -63,12 +69,16 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
|
||||
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
|
||||
SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap());
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
|
||||
|
||||
List<ExtractedField> allFields = extractedFields.getAllFields();
|
||||
List<ExtractedField> allFields = fieldExtraction.v1().getAllFields();
|
||||
assertThat(allFields.size(), equalTo(1));
|
||||
assertThat(allFields.get(0).getName(), equalTo("some_number"));
|
||||
assertThat(allFields.get(0).getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
|
||||
|
||||
assertFieldSelectionContains(fieldExtraction.v2(), FieldSelection.included("some_number",
|
||||
new HashSet<>(Arrays.asList("long", "integer", "short", "byte", "double", "float", "half_float", "scaled_float")), false,
|
||||
FieldSelection.FeatureType.NUMERICAL));
|
||||
}
|
||||
|
||||
public void testDetect_GivenOutlierDetectionAndNonNumericField() {
|
||||
|
@ -105,14 +115,22 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
|
||||
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
|
||||
SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap());
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
|
||||
|
||||
List<ExtractedField> allFields = extractedFields.getAllFields();
|
||||
List<ExtractedField> allFields = fieldExtraction.v1().getAllFields();
|
||||
assertThat(allFields.size(), equalTo(3));
|
||||
assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toSet()),
|
||||
containsInAnyOrder("some_float", "some_long", "some_boolean"));
|
||||
assertThat(allFields.stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
|
||||
contains(equalTo(ExtractedField.Method.DOC_VALUE)));
|
||||
|
||||
assertFieldSelectionContains(fieldExtraction.v2(),
|
||||
FieldSelection.included("some_boolean", Collections.singleton("boolean"), false, FieldSelection.FeatureType.NUMERICAL),
|
||||
FieldSelection.included("some_float", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
|
||||
FieldSelection.excluded("some_keyword", Collections.singleton("keyword"), "unsupported type; " +
|
||||
"supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]"),
|
||||
FieldSelection.included("some_long", Collections.singleton("long"), false, FieldSelection.FeatureType.NUMERICAL)
|
||||
);
|
||||
}
|
||||
|
||||
public void testDetect_GivenRegressionAndMultipleFields() {
|
||||
|
@ -126,14 +144,22 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
|
||||
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
|
||||
SOURCE_INDEX, buildRegressionConfig("foo"), false, 100, fieldCapabilities, Collections.emptyMap());
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
|
||||
|
||||
List<ExtractedField> allFields = extractedFields.getAllFields();
|
||||
List<ExtractedField> allFields = fieldExtraction.v1().getAllFields();
|
||||
assertThat(allFields.size(), equalTo(5));
|
||||
assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toList()),
|
||||
containsInAnyOrder("foo", "some_float", "some_keyword", "some_long", "some_boolean"));
|
||||
assertThat(allFields.stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
|
||||
contains(equalTo(ExtractedField.Method.DOC_VALUE)));
|
||||
|
||||
assertFieldSelectionContains(fieldExtraction.v2(),
|
||||
FieldSelection.included("foo", Collections.singleton("double"), true, FieldSelection.FeatureType.NUMERICAL),
|
||||
FieldSelection.included("some_boolean", Collections.singleton("boolean"), false, FieldSelection.FeatureType.NUMERICAL),
|
||||
FieldSelection.included("some_float", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
|
||||
FieldSelection.included("some_keyword", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL),
|
||||
FieldSelection.included("some_long", Collections.singleton("long"), false, FieldSelection.FeatureType.NUMERICAL)
|
||||
);
|
||||
}
|
||||
|
||||
public void testDetect_GivenRegressionAndRequiredFieldMissing() {
|
||||
|
@ -191,11 +217,16 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
|
||||
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
|
||||
SOURCE_INDEX, buildOutlierDetectionConfig(analyzedFields), false, 100, fieldCapabilities, Collections.emptyMap());
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
|
||||
|
||||
List<ExtractedField> allFields = extractedFields.getAllFields();
|
||||
List<ExtractedField> allFields = fieldExtraction.v1().getAllFields();
|
||||
assertThat(allFields.size(), equalTo(1));
|
||||
assertThat(allFields.stream().map(ExtractedField::getName).collect(Collectors.toList()), contains("bar"));
|
||||
|
||||
assertFieldSelectionContains(fieldExtraction.v2(),
|
||||
FieldSelection.included("bar", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
|
||||
FieldSelection.excluded("foo", Collections.singleton("float"), "field in excludes list")
|
||||
);
|
||||
}
|
||||
|
||||
public void testDetect_GivenRegressionAndRequiredFieldHasInvalidType() {
|
||||
|
@ -258,14 +289,15 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
|
||||
public void testDetect_GivenIncludedIgnoredField() {
|
||||
FieldCapabilitiesResponse fieldCapabilities = new MockFieldCapsResponseBuilder()
|
||||
.addAggregatableField("_id", "float").build();
|
||||
.addAggregatableField("_id", "float")
|
||||
.build();
|
||||
FetchSourceContext analyzedFields = new FetchSourceContext(true, new String[]{"_id"}, new String[0]);
|
||||
|
||||
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
|
||||
SOURCE_INDEX, buildOutlierDetectionConfig(analyzedFields), false, 100, fieldCapabilities, Collections.emptyMap());
|
||||
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, () -> extractedFieldsDetector.detect());
|
||||
|
||||
assertThat(e.getMessage(), equalTo("field [_id] cannot be analyzed"));
|
||||
assertThat(e.getMessage(), equalTo("No field [_id] could be detected"));
|
||||
}
|
||||
|
||||
public void testDetect_ShouldSortFieldsAlphabetically() {
|
||||
|
@ -285,9 +317,9 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
|
||||
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
|
||||
SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap());
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
|
||||
|
||||
List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
|
||||
List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
|
||||
.collect(Collectors.toList());
|
||||
assertThat(extractedFieldNames, equalTo(sortedFields));
|
||||
}
|
||||
|
@ -333,11 +365,17 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
|
||||
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
|
||||
SOURCE_INDEX, buildOutlierDetectionConfig(desiredFields), false, 100, fieldCapabilities, Collections.emptyMap());
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
|
||||
|
||||
List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
|
||||
List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
|
||||
.collect(Collectors.toList());
|
||||
assertThat(extractedFieldNames, equalTo(Arrays.asList("my_field1", "your_field2")));
|
||||
|
||||
assertFieldSelectionContains(fieldExtraction.v2(),
|
||||
FieldSelection.included("my_field1", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
|
||||
FieldSelection.excluded("my_field1_nope", Collections.singleton("float"), "field in excludes list"),
|
||||
FieldSelection.included("your_field2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL)
|
||||
);
|
||||
}
|
||||
|
||||
public void testDetect_GivenIncludedFieldHasUnsupportedType() {
|
||||
|
@ -384,11 +422,18 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
|
||||
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
|
||||
SOURCE_INDEX, buildOutlierDetectionConfig(), true, 100, fieldCapabilities, Collections.emptyMap());
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
|
||||
|
||||
List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
|
||||
List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
|
||||
.collect(Collectors.toList());
|
||||
assertThat(extractedFieldNames, equalTo(Arrays.asList("my_field1", "your_field2")));
|
||||
|
||||
assertFieldSelectionContains(fieldExtraction.v2(),
|
||||
FieldSelection.included("my_field1", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
|
||||
FieldSelection.included("your_field2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL),
|
||||
FieldSelection.excluded("your_keyword", Collections.singleton("keyword"), "unsupported type; supported types " +
|
||||
"are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]")
|
||||
);
|
||||
}
|
||||
|
||||
public void testDetect_GivenIncludedResultsField() {
|
||||
|
@ -434,12 +479,12 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
|
||||
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
|
||||
SOURCE_INDEX, buildOutlierDetectionConfig(), true, 4, fieldCapabilities, Collections.emptyMap());
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
|
||||
|
||||
List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
|
||||
List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
|
||||
.collect(Collectors.toList());
|
||||
assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3")));
|
||||
assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
|
||||
assertThat(fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
|
||||
contains(equalTo(ExtractedField.Method.DOC_VALUE)));
|
||||
}
|
||||
|
||||
|
@ -453,12 +498,12 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
|
||||
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
|
||||
SOURCE_INDEX, buildOutlierDetectionConfig(), true, 3, fieldCapabilities, Collections.emptyMap());
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
|
||||
|
||||
List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
|
||||
List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
|
||||
.collect(Collectors.toList());
|
||||
assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3")));
|
||||
assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
|
||||
assertThat(fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
|
||||
contains(equalTo(ExtractedField.Method.DOC_VALUE)));
|
||||
}
|
||||
|
||||
|
@ -472,12 +517,12 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
|
||||
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
|
||||
SOURCE_INDEX, buildOutlierDetectionConfig(), true, 2, fieldCapabilities, Collections.emptyMap());
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
|
||||
|
||||
List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
|
||||
List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
|
||||
.collect(Collectors.toList());
|
||||
assertThat(extractedFieldNames, equalTo(Arrays.asList("field_1", "field_2", "field_3")));
|
||||
assertThat(extractedFields.getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
|
||||
assertThat(fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getMethod).collect(Collectors.toSet()),
|
||||
contains(equalTo(ExtractedField.Method.SOURCE)));
|
||||
}
|
||||
|
||||
|
@ -488,14 +533,18 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
|
||||
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
|
||||
SOURCE_INDEX, buildOutlierDetectionConfig(), false, 100, fieldCapabilities, Collections.emptyMap());
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
|
||||
|
||||
List<ExtractedField> allFields = extractedFields.getAllFields();
|
||||
List<ExtractedField> allFields = fieldExtraction.v1().getAllFields();
|
||||
assertThat(allFields.size(), equalTo(1));
|
||||
ExtractedField booleanField = allFields.get(0);
|
||||
assertThat(booleanField.getTypes(), contains("boolean"));
|
||||
assertThat(booleanField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
|
||||
|
||||
assertFieldSelectionContains(fieldExtraction.v2(),
|
||||
FieldSelection.included("some_boolean", Collections.singleton("boolean"), false, FieldSelection.FeatureType.NUMERICAL)
|
||||
);
|
||||
|
||||
SearchHit hit = new SearchHitBuilder(42).addField("some_boolean", true).build();
|
||||
assertThat(booleanField.value(hit), arrayContaining(1));
|
||||
|
||||
|
@ -514,14 +563,18 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
|
||||
SOURCE_INDEX, buildClassificationConfig("some_boolean"), false, 100, fieldCapabilities,
|
||||
Collections.singletonMap("some_boolean", 2L));
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
|
||||
|
||||
List<ExtractedField> allFields = extractedFields.getAllFields();
|
||||
List<ExtractedField> allFields = fieldExtraction.v1().getAllFields();
|
||||
assertThat(allFields.size(), equalTo(1));
|
||||
ExtractedField booleanField = allFields.get(0);
|
||||
assertThat(booleanField.getTypes(), contains("boolean"));
|
||||
assertThat(booleanField.getMethod(), equalTo(ExtractedField.Method.DOC_VALUE));
|
||||
|
||||
assertFieldSelectionContains(fieldExtraction.v2(),
|
||||
FieldSelection.included("some_boolean", Collections.singleton("boolean"), true, FieldSelection.FeatureType.CATEGORICAL)
|
||||
);
|
||||
|
||||
SearchHit hit = new SearchHitBuilder(42).addField("some_boolean", true).build();
|
||||
assertThat(booleanField.value(hit), arrayContaining("true"));
|
||||
|
||||
|
@ -546,12 +599,26 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
|
||||
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
|
||||
SOURCE_INDEX, buildRegressionConfig("a_float"), true, 100, fieldCapabilities, Collections.emptyMap());
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
|
||||
|
||||
assertThat(extractedFields.getAllFields().size(), equalTo(5));
|
||||
List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
|
||||
assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(5));
|
||||
List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
|
||||
.collect(Collectors.toList());
|
||||
assertThat(extractedFieldNames, contains("a_float", "keyword_1", "text_1.keyword", "text_2.keyword", "text_without_keyword"));
|
||||
|
||||
assertFieldSelectionContains(fieldExtraction.v2(),
|
||||
FieldSelection.included("a_float", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL),
|
||||
FieldSelection.included("keyword_1", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL),
|
||||
FieldSelection.excluded("keyword_1.text", Collections.singleton("text"),
|
||||
"[keyword_1] is preferred because it is aggregatable"),
|
||||
FieldSelection.excluded("text_1", Collections.singleton("text"),
|
||||
"[text_1.keyword] is preferred because it is aggregatable"),
|
||||
FieldSelection.included("text_1.keyword", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL),
|
||||
FieldSelection.excluded("text_2", Collections.singleton("text"),
|
||||
"[text_2.keyword] is preferred because it is aggregatable"),
|
||||
FieldSelection.included("text_2.keyword", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL),
|
||||
FieldSelection.included("text_without_keyword", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL)
|
||||
);
|
||||
}
|
||||
|
||||
public void testDetect_GivenMultiFieldAndParentIsRequired() {
|
||||
|
@ -563,12 +630,19 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
|
||||
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
|
||||
SOURCE_INDEX, buildClassificationConfig("field_1"), true, 100, fieldCapabilities, Collections.singletonMap("field_1", 2L));
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
|
||||
|
||||
assertThat(extractedFields.getAllFields().size(), equalTo(2));
|
||||
List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
|
||||
assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2));
|
||||
List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
|
||||
.collect(Collectors.toList());
|
||||
assertThat(extractedFieldNames, contains("field_1", "field_2"));
|
||||
|
||||
assertFieldSelectionContains(fieldExtraction.v2(),
|
||||
FieldSelection.included("field_1", Collections.singleton("keyword"), true, FieldSelection.FeatureType.CATEGORICAL),
|
||||
FieldSelection.excluded("field_1.keyword", Collections.singleton("keyword"),
|
||||
"[field_1] is required instead"),
|
||||
FieldSelection.included("field_2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL)
|
||||
);
|
||||
}
|
||||
|
||||
public void testDetect_GivenMultiFieldAndMultiFieldIsRequired() {
|
||||
|
@ -581,12 +655,19 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
|
||||
SOURCE_INDEX, buildClassificationConfig("field_1.keyword"), true, 100, fieldCapabilities,
|
||||
Collections.singletonMap("field_1.keyword", 2L));
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
|
||||
|
||||
assertThat(extractedFields.getAllFields().size(), equalTo(2));
|
||||
List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
|
||||
assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2));
|
||||
List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
|
||||
.collect(Collectors.toList());
|
||||
assertThat(extractedFieldNames, contains("field_1.keyword", "field_2"));
|
||||
|
||||
assertFieldSelectionContains(fieldExtraction.v2(),
|
||||
FieldSelection.excluded("field_1", Collections.singleton("keyword"),
|
||||
"[field_1.keyword] is required instead"),
|
||||
FieldSelection.included("field_1.keyword", Collections.singleton("keyword"), true, FieldSelection.FeatureType.CATEGORICAL),
|
||||
FieldSelection.included("field_2", Collections.singleton("float"), false, FieldSelection.FeatureType.NUMERICAL)
|
||||
);
|
||||
}
|
||||
|
||||
public void testDetect_GivenSeveralMultiFields_ShouldPickFirstSorted() {
|
||||
|
@ -600,12 +681,21 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
|
||||
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
|
||||
SOURCE_INDEX, buildRegressionConfig("field_2"), true, 100, fieldCapabilities, Collections.emptyMap());
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
|
||||
|
||||
assertThat(extractedFields.getAllFields().size(), equalTo(2));
|
||||
List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
|
||||
assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2));
|
||||
List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
|
||||
.collect(Collectors.toList());
|
||||
assertThat(extractedFieldNames, contains("field_1.keyword_1", "field_2"));
|
||||
|
||||
assertFieldSelectionContains(fieldExtraction.v2(),
|
||||
FieldSelection.excluded("field_1", Collections.singleton("text"),
|
||||
"[field_1.keyword_1] is preferred because it is aggregatable"),
|
||||
FieldSelection.included("field_1.keyword_1", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL),
|
||||
FieldSelection.excluded("field_1.keyword_2", Collections.singleton("keyword"), "[field_1.keyword_1] came first"),
|
||||
FieldSelection.excluded("field_1.keyword_3", Collections.singleton("keyword"), "[field_1.keyword_1] came first"),
|
||||
FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL)
|
||||
);
|
||||
}
|
||||
|
||||
public void testDetect_GivenMultiFields_OverDocValueLimit() {
|
||||
|
@ -617,12 +707,19 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
|
||||
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
|
||||
SOURCE_INDEX, buildRegressionConfig("field_2"), true, 0, fieldCapabilities, Collections.emptyMap());
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
|
||||
|
||||
assertThat(extractedFields.getAllFields().size(), equalTo(2));
|
||||
List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
|
||||
assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2));
|
||||
List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
|
||||
.collect(Collectors.toList());
|
||||
assertThat(extractedFieldNames, contains("field_1", "field_2"));
|
||||
|
||||
assertFieldSelectionContains(fieldExtraction.v2(),
|
||||
FieldSelection.included("field_1", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL),
|
||||
FieldSelection.excluded("field_1.keyword_1", Collections.singleton("keyword"),
|
||||
"[field_1] is preferred because it supports fetching from source"),
|
||||
FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL)
|
||||
);
|
||||
}
|
||||
|
||||
public void testDetect_GivenParentAndMultiFieldBothAggregatable() {
|
||||
|
@ -635,12 +732,20 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
|
||||
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
|
||||
SOURCE_INDEX, buildRegressionConfig("field_2.double"), true, 100, fieldCapabilities, Collections.emptyMap());
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
|
||||
|
||||
assertThat(extractedFields.getAllFields().size(), equalTo(2));
|
||||
List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
|
||||
assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2));
|
||||
List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
|
||||
.collect(Collectors.toList());
|
||||
assertThat(extractedFieldNames, contains("field_1", "field_2.double"));
|
||||
|
||||
assertFieldSelectionContains(fieldExtraction.v2(),
|
||||
FieldSelection.included("field_1", Collections.singleton("keyword"), false, FieldSelection.FeatureType.CATEGORICAL),
|
||||
FieldSelection.excluded("field_1.keyword", Collections.singleton("keyword"),
|
||||
"[field_1] is preferred because it is aggregatable"),
|
||||
FieldSelection.included("field_2.double", Collections.singleton("double"), true, FieldSelection.FeatureType.NUMERICAL),
|
||||
FieldSelection.excluded("field_2.keyword", Collections.singleton("float"), "[field_2.double] is required instead")
|
||||
);
|
||||
}
|
||||
|
||||
public void testDetect_GivenParentAndMultiFieldNoneAggregatable() {
|
||||
|
@ -652,12 +757,19 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
|
||||
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
|
||||
SOURCE_INDEX, buildRegressionConfig("field_2"), true, 100, fieldCapabilities, Collections.emptyMap());
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
|
||||
|
||||
assertThat(extractedFields.getAllFields().size(), equalTo(2));
|
||||
List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
|
||||
assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2));
|
||||
List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
|
||||
.collect(Collectors.toList());
|
||||
assertThat(extractedFieldNames, contains("field_1", "field_2"));
|
||||
|
||||
assertFieldSelectionContains(fieldExtraction.v2(),
|
||||
FieldSelection.included("field_1", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL),
|
||||
FieldSelection.excluded("field_1.text", Collections.singleton("text"),
|
||||
"[field_1] is preferred because none of the multi-fields are aggregatable"),
|
||||
FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL)
|
||||
);
|
||||
}
|
||||
|
||||
public void testDetect_GivenMultiFields_AndExplicitlyIncludedFields() {
|
||||
|
@ -670,12 +782,18 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
|
||||
ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
|
||||
SOURCE_INDEX, buildRegressionConfig("field_2", analyzedFields), false, 100, fieldCapabilities, Collections.emptyMap());
|
||||
ExtractedFields extractedFields = extractedFieldsDetector.detect();
|
||||
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();
|
||||
|
||||
assertThat(extractedFields.getAllFields().size(), equalTo(2));
|
||||
List<String> extractedFieldNames = extractedFields.getAllFields().stream().map(ExtractedField::getName)
|
||||
assertThat(fieldExtraction.v1().getAllFields().size(), equalTo(2));
|
||||
List<String> extractedFieldNames = fieldExtraction.v1().getAllFields().stream().map(ExtractedField::getName)
|
||||
.collect(Collectors.toList());
|
||||
assertThat(extractedFieldNames, contains("field_1", "field_2"));
|
||||
|
||||
assertFieldSelectionContains(fieldExtraction.v2(),
|
||||
FieldSelection.included("field_1", Collections.singleton("text"), false, FieldSelection.FeatureType.CATEGORICAL),
|
||||
FieldSelection.excluded("field_1.keyword", Collections.singleton("keyword"), "field not in includes list"),
|
||||
FieldSelection.included("field_2", Collections.singleton("float"), true, FieldSelection.FeatureType.NUMERICAL)
|
||||
);
|
||||
}
|
||||
|
||||
private static DataFrameAnalyticsConfig buildOutlierDetectionConfig() {
|
||||
|
@ -715,6 +833,21 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* We assert each field individually to get useful error messages in case of failure
|
||||
*/
|
||||
private static void assertFieldSelectionContains(List<FieldSelection> actual, FieldSelection... expected) {
|
||||
assertThat(actual.size(), equalTo(expected.length));
|
||||
for (int i = 0; i < expected.length; i++) {
|
||||
assertThat("i = " + i, actual.get(i).getName(), equalTo(expected[i].getName()));
|
||||
assertThat("i = " + i, actual.get(i).getMappingTypes(), equalTo(expected[i].getMappingTypes()));
|
||||
assertThat("i = " + i, actual.get(i).isIncluded(), equalTo(expected[i].isIncluded()));
|
||||
assertThat("i = " + i, actual.get(i).isRequired(), equalTo(expected[i].isRequired()));
|
||||
assertThat("i = " + i, actual.get(i).getFeatureType(), equalTo(expected[i].getFeatureType()));
|
||||
assertThat("i = " + i, actual.get(i).getReason(), equalTo(expected[i].getReason()));
|
||||
}
|
||||
}
|
||||
|
||||
private static class MockFieldCapsResponseBuilder {
|
||||
|
||||
private final Map<String, Map<String, FieldCapabilities>> fieldCaps = new HashMap<>();
|
||||
|
|
|
@ -1,21 +0,0 @@
|
|||
{
|
||||
"ml.estimate_memory_usage": {
|
||||
"documentation": {
|
||||
"url": "http://www.elastic.co/guide/en/elasticsearch/reference/current/estimate-memory-usage-dfanalytics.html"
|
||||
},
|
||||
"stability": "experimental",
|
||||
"url": {
|
||||
"paths" : [
|
||||
{
|
||||
"path" : "/_ml/data_frame/analytics/_estimate_memory_usage",
|
||||
"methods": [ "POST" ],
|
||||
"parts": {}
|
||||
}
|
||||
]
|
||||
},
|
||||
"body": {
|
||||
"description" : "Memory usage estimation definition",
|
||||
"required" : true
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
{
|
||||
"ml.explain_data_frame_analytics": {
|
||||
"documentation": {
|
||||
"url": "http://www.elastic.co/guide/en/elasticsearch/reference/current/explain-dfanalytics.html"
|
||||
},
|
||||
"stability": "experimental",
|
||||
"url": {
|
||||
"paths" : [
|
||||
{
|
||||
"path" : "/_ml/data_frame/analytics/_explain",
|
||||
"methods": [ "GET", "POST" ],
|
||||
"parts": {}
|
||||
},
|
||||
{
|
||||
"path" : "/_ml/data_frame/analytics/{id}/_explain",
|
||||
"methods": [ "GET", "POST" ],
|
||||
"parts":{
|
||||
"id":{
|
||||
"type":"string",
|
||||
"description":"The ID of the data frame analytics to explain"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"body": {
|
||||
"description" : "The data frame analytics config to explain",
|
||||
"required" : false
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,84 +0,0 @@
|
|||
---
|
||||
setup:
|
||||
- do:
|
||||
indices.create:
|
||||
index: index-source
|
||||
body:
|
||||
mappings:
|
||||
properties:
|
||||
x:
|
||||
type: float
|
||||
y:
|
||||
type: float
|
||||
|
||||
---
|
||||
"Test memory usage estimation for empty data frame":
|
||||
- do:
|
||||
catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/
|
||||
ml.estimate_memory_usage:
|
||||
body:
|
||||
source: { index: "index-source" }
|
||||
analysis: { outlier_detection: {} }
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: index-source
|
||||
refresh: true
|
||||
body: { x: 1 }
|
||||
- match: { result: "created" }
|
||||
|
||||
# Note that value for "y" is missing and outlier detection analysis does not support missing values.
|
||||
# Hence, the data frame is still considered empty.
|
||||
- do:
|
||||
catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/
|
||||
ml.estimate_memory_usage:
|
||||
body:
|
||||
source: { index: "index-source" }
|
||||
analysis: { outlier_detection: {} }
|
||||
|
||||
---
|
||||
"Test memory usage estimation for non-empty data frame":
|
||||
- do:
|
||||
index:
|
||||
index: index-source
|
||||
refresh: true
|
||||
body: { x: 1, y: 10 }
|
||||
- match: { result: "created" }
|
||||
|
||||
- do:
|
||||
ml.estimate_memory_usage:
|
||||
body:
|
||||
source: { index: "index-source" }
|
||||
analysis: { outlier_detection: {} }
|
||||
- match: { expected_memory_without_disk: "3kb" }
|
||||
- match: { expected_memory_with_disk: "3kb" }
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: index-source
|
||||
refresh: true
|
||||
body: { x: 2, y: 20 }
|
||||
- match: { result: "created" }
|
||||
|
||||
- do:
|
||||
ml.estimate_memory_usage:
|
||||
body:
|
||||
source: { index: "index-source" }
|
||||
analysis: { outlier_detection: {} }
|
||||
- match: { expected_memory_without_disk: "4kb" }
|
||||
- match: { expected_memory_with_disk: "4kb" }
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: index-source
|
||||
refresh: true
|
||||
body: { x: 3, y: 30 }
|
||||
- match: { result: "created" }
|
||||
|
||||
- do:
|
||||
ml.estimate_memory_usage:
|
||||
body:
|
||||
source: { index: "index-source" }
|
||||
analysis: { outlier_detection: {} }
|
||||
- match: { expected_memory_without_disk: "6kb" }
|
||||
- match: { expected_memory_with_disk: "5kb" }
|
|
@ -0,0 +1,308 @@
|
|||
---
|
||||
"Test neither job id nor body":
|
||||
- do:
|
||||
catch: /Please provide a job \[id\] or the config object/
|
||||
ml.explain_data_frame_analytics:
|
||||
id: ""
|
||||
|
||||
---
|
||||
"Test both job id and body":
|
||||
- do:
|
||||
catch: /Please provide either a job \[id\] or the config object but not both/
|
||||
ml.explain_data_frame_analytics:
|
||||
id: "foo"
|
||||
body:
|
||||
source: { index: "index-source" }
|
||||
analysis: { outlier_detection: {} }
|
||||
|
||||
---
|
||||
"Test missing job":
|
||||
- do:
|
||||
catch: missing
|
||||
ml.explain_data_frame_analytics:
|
||||
id: "no_such_job"
|
||||
|
||||
---
|
||||
"Test id that matches multiple jobs":
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: index-source
|
||||
|
||||
- do:
|
||||
ml.put_data_frame_analytics:
|
||||
id: "foo-1"
|
||||
body: >
|
||||
{
|
||||
"source": {
|
||||
"index": "index-source"
|
||||
},
|
||||
"dest": {
|
||||
"index": "index-dest"
|
||||
},
|
||||
"analysis": {"outlier_detection":{}}
|
||||
}
|
||||
|
||||
- do:
|
||||
ml.put_data_frame_analytics:
|
||||
id: "foo-2"
|
||||
body: >
|
||||
{
|
||||
"source": {
|
||||
"index": "index-source"
|
||||
},
|
||||
"dest": {
|
||||
"index": "index-dest"
|
||||
},
|
||||
"analysis": {"outlier_detection":{}}
|
||||
}
|
||||
|
||||
- do:
|
||||
catch: /expected only one config but matched \[foo-1, foo-2\]/
|
||||
ml.explain_data_frame_analytics:
|
||||
id: "foo-*"
|
||||
|
||||
---
|
||||
"Test empty data frame given body":
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: index-source
|
||||
body:
|
||||
mappings:
|
||||
properties:
|
||||
x:
|
||||
type: float
|
||||
y:
|
||||
type: float
|
||||
|
||||
- do:
|
||||
catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/
|
||||
ml.explain_data_frame_analytics:
|
||||
body:
|
||||
source: { index: "index-source" }
|
||||
analysis: { outlier_detection: {} }
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: index-source
|
||||
refresh: true
|
||||
body: { x: 1 }
|
||||
- match: { result: "created" }
|
||||
|
||||
# Note that value for "y" is missing and outlier detection analysis does not support missing values.
|
||||
# Hence, the data frame is still considered empty.
|
||||
- do:
|
||||
catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/
|
||||
ml.explain_data_frame_analytics:
|
||||
body:
|
||||
source: { index: "index-source" }
|
||||
analysis: { outlier_detection: {} }
|
||||
|
||||
---
|
||||
"Test non-empty data frame given body":
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: index-source
|
||||
body:
|
||||
mappings:
|
||||
properties:
|
||||
x:
|
||||
type: float
|
||||
y:
|
||||
type: float
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: index-source
|
||||
refresh: true
|
||||
body: { x: 1, y: 10 }
|
||||
- match: { result: "created" }
|
||||
|
||||
- do:
|
||||
ml.explain_data_frame_analytics:
|
||||
body:
|
||||
source: { index: "index-source" }
|
||||
analysis: { outlier_detection: {} }
|
||||
- match: { memory_estimation.expected_memory_without_disk: "3kb" }
|
||||
- match: { memory_estimation.expected_memory_with_disk: "3kb" }
|
||||
- length: { field_selection: 2 }
|
||||
- match: { field_selection.0.name: "x" }
|
||||
- match: { field_selection.0.mapping_types: ["float"] }
|
||||
- match: { field_selection.0.is_included: true }
|
||||
- match: { field_selection.0.is_required: false }
|
||||
- match: { field_selection.0.feature_type: "numerical" }
|
||||
- is_false: field_selection.0.reason
|
||||
- match: { field_selection.1.name: "y" }
|
||||
- match: { field_selection.1.mapping_types: ["float"] }
|
||||
- match: { field_selection.1.is_included: true }
|
||||
- match: { field_selection.1.is_required: false }
|
||||
- match: { field_selection.1.feature_type: "numerical" }
|
||||
- is_false: field_selection.1.reason
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: index-source
|
||||
refresh: true
|
||||
body: { x: 2, y: 20 }
|
||||
- match: { result: "created" }
|
||||
|
||||
- do:
|
||||
ml.explain_data_frame_analytics:
|
||||
body:
|
||||
source: { index: "index-source" }
|
||||
analysis: { outlier_detection: {} }
|
||||
- match: { memory_estimation.expected_memory_without_disk: "4kb" }
|
||||
- match: { memory_estimation.expected_memory_with_disk: "4kb" }
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: index-source
|
||||
refresh: true
|
||||
body: { x: 3, y: 30 }
|
||||
- match: { result: "created" }
|
||||
|
||||
- do:
|
||||
ml.explain_data_frame_analytics:
|
||||
body:
|
||||
source: { index: "index-source" }
|
||||
analysis: { outlier_detection: {} }
|
||||
- match: { memory_estimation.expected_memory_without_disk: "6kb" }
|
||||
- match: { memory_estimation.expected_memory_with_disk: "5kb" }
|
||||
|
||||
---
|
||||
"Test field_selection given body":
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: index-source
|
||||
body:
|
||||
mappings:
|
||||
properties:
|
||||
field_1:
|
||||
type: integer
|
||||
field_2:
|
||||
type: double
|
||||
field_3:
|
||||
type: date
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: index-source
|
||||
refresh: true
|
||||
body: { field_1: 3, field_2: 3.14, field_3: "2019-11-11T00:00:00", field_4: "blah" }
|
||||
- match: { result: "created" }
|
||||
|
||||
- do:
|
||||
ml.explain_data_frame_analytics:
|
||||
body:
|
||||
source: { index: "index-source" }
|
||||
analysis: { regression: { dependent_variable: "field_1" } }
|
||||
- is_true: memory_estimation.expected_memory_without_disk
|
||||
- is_true: memory_estimation.expected_memory_with_disk
|
||||
- length: { field_selection: 5 }
|
||||
- match: { field_selection.0.name: "field_1" }
|
||||
- match: { field_selection.0.mapping_types: ["integer"] }
|
||||
- match: { field_selection.0.is_included: true }
|
||||
- match: { field_selection.0.is_required: true }
|
||||
- match: { field_selection.0.feature_type: "numerical" }
|
||||
- is_false: field_selection.0.reason
|
||||
- match: { field_selection.1.name: "field_2" }
|
||||
- match: { field_selection.1.mapping_types: ["double"] }
|
||||
- match: { field_selection.1.is_included: true }
|
||||
- match: { field_selection.1.is_required: false }
|
||||
- match: { field_selection.1.feature_type: "numerical" }
|
||||
- is_false: field_selection.1.reason
|
||||
- match: { field_selection.2.name: "field_3" }
|
||||
- match: { field_selection.2.mapping_types: ["date"] }
|
||||
- match: { field_selection.2.is_included: false }
|
||||
- match: { field_selection.2.is_required: false }
|
||||
- is_false: field_selection.2.feature_type
|
||||
- match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text]" }
|
||||
- match: { field_selection.3.name: "field_4" }
|
||||
- match: { field_selection.3.mapping_types: ["text"] }
|
||||
- match: { field_selection.3.is_included: false }
|
||||
- match: { field_selection.3.is_required: false }
|
||||
- is_false: field_selection.3.feature_type
|
||||
- match: { field_selection.3.reason: "[field_4.keyword] is preferred because it is aggregatable" }
|
||||
- match: { field_selection.4.name: "field_4.keyword" }
|
||||
- match: { field_selection.4.mapping_types: ["keyword"] }
|
||||
- match: { field_selection.4.is_included: true }
|
||||
- match: { field_selection.4.is_required: false }
|
||||
- match: { field_selection.4.feature_type: "categorical" }
|
||||
- is_false: field_selection.4.reason
|
||||
|
||||
---
|
||||
"Test field_selection given job":
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: index-source
|
||||
body:
|
||||
mappings:
|
||||
properties:
|
||||
field_1:
|
||||
type: integer
|
||||
field_2:
|
||||
type: double
|
||||
field_3:
|
||||
type: date
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: index-source
|
||||
refresh: true
|
||||
body: { field_1: 3, field_2: 3.14, field_3: "2019-11-11T00:00:00", field_4: "blah" }
|
||||
- match: { result: "created" }
|
||||
|
||||
- do:
|
||||
ml.put_data_frame_analytics:
|
||||
id: "got-a-job-for-this-one"
|
||||
body: >
|
||||
{
|
||||
"source": {
|
||||
"index": "index-source"
|
||||
},
|
||||
"dest": {
|
||||
"index": "index-dest"
|
||||
},
|
||||
"analysis": {"regression":{ "dependent_variable": "field_1" }}
|
||||
}
|
||||
|
||||
- do:
|
||||
ml.explain_data_frame_analytics:
|
||||
id: "got-a-job-for-this-one"
|
||||
- is_true: memory_estimation.expected_memory_without_disk
|
||||
- is_true: memory_estimation.expected_memory_with_disk
|
||||
- length: { field_selection: 5 }
|
||||
- match: { field_selection.0.name: "field_1" }
|
||||
- match: { field_selection.0.mapping_types: ["integer"] }
|
||||
- match: { field_selection.0.is_included: true }
|
||||
- match: { field_selection.0.is_required: true }
|
||||
- match: { field_selection.0.feature_type: "numerical" }
|
||||
- is_false: field_selection.0.reason
|
||||
- match: { field_selection.1.name: "field_2" }
|
||||
- match: { field_selection.1.mapping_types: ["double"] }
|
||||
- match: { field_selection.1.is_included: true }
|
||||
- match: { field_selection.1.is_required: false }
|
||||
- match: { field_selection.1.feature_type: "numerical" }
|
||||
- is_false: field_selection.1.reason
|
||||
- match: { field_selection.2.name: "field_3" }
|
||||
- match: { field_selection.2.mapping_types: ["date"] }
|
||||
- match: { field_selection.2.is_included: false }
|
||||
- match: { field_selection.2.is_required: false }
|
||||
- is_false: field_selection.2.feature_type
|
||||
- match: { field_selection.2.reason: "unsupported type; supported types are [boolean, byte, double, float, half_float, integer, ip, keyword, long, scaled_float, short, text]" }
|
||||
- match: { field_selection.3.name: "field_4" }
|
||||
- match: { field_selection.3.mapping_types: ["text"] }
|
||||
- match: { field_selection.3.is_included: false }
|
||||
- match: { field_selection.3.is_required: false }
|
||||
- is_false: field_selection.3.feature_type
|
||||
- match: { field_selection.3.reason: "[field_4.keyword] is preferred because it is aggregatable" }
|
||||
- match: { field_selection.4.name: "field_4.keyword" }
|
||||
- match: { field_selection.4.mapping_types: ["keyword"] }
|
||||
- match: { field_selection.4.is_included: true }
|
||||
- match: { field_selection.4.is_required: false }
|
||||
- match: { field_selection.4.feature_type: "categorical" }
|
||||
- is_false: field_selection.4.reason
|
Loading…
Reference in New Issue