Adds the following parameters to `outlier_detection`: - `compute_feature_influence` (boolean): whether to compute or not feature influence scores - `outlier_fraction` (double): the proportion of the data set assumed to be outlying prior to running outlier detection - `standardization_enabled` (boolean): whether to apply standardization to the feature values Backport of #47600
This commit is contained in:
parent
833ed30f0d
commit
7667ea5f6f
|
@ -19,7 +19,6 @@
|
|||
|
||||
package org.elasticsearch.client.ml.dataframe;
|
||||
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.xcontent.ObjectParser;
|
||||
|
@ -48,6 +47,9 @@ public class OutlierDetection implements DataFrameAnalysis {
|
|||
static final ParseField N_NEIGHBORS = new ParseField("n_neighbors");
|
||||
static final ParseField METHOD = new ParseField("method");
|
||||
public static final ParseField FEATURE_INFLUENCE_THRESHOLD = new ParseField("feature_influence_threshold");
|
||||
static final ParseField COMPUTE_FEATURE_INFLUENCE = new ParseField("compute_feature_influence");
|
||||
static final ParseField OUTLIER_FRACTION = new ParseField("outlier_fraction");
|
||||
static final ParseField STANDARDIZATION_ENABLED = new ParseField("standardization_enabled");
|
||||
|
||||
private static ObjectParser<Builder, Void> PARSER = new ObjectParser<>(NAME.getPreferredName(), true, Builder::new);
|
||||
|
||||
|
@ -60,22 +62,49 @@ public class OutlierDetection implements DataFrameAnalysis {
|
|||
throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]");
|
||||
}, METHOD, ObjectParser.ValueType.STRING);
|
||||
PARSER.declareDouble(Builder::setFeatureInfluenceThreshold, FEATURE_INFLUENCE_THRESHOLD);
|
||||
PARSER.declareBoolean(Builder::setComputeFeatureInfluence, COMPUTE_FEATURE_INFLUENCE);
|
||||
PARSER.declareDouble(Builder::setOutlierFraction, OUTLIER_FRACTION);
|
||||
PARSER.declareBoolean(Builder::setStandardizationEnabled, STANDARDIZATION_ENABLED);
|
||||
}
|
||||
|
||||
/**
|
||||
* The number of neighbors. Leave unspecified for dynamic detection.
|
||||
*/
|
||||
private final Integer nNeighbors;
|
||||
|
||||
/**
|
||||
* The method. Leave unspecified for a dynamic mixture of methods.
|
||||
*/
|
||||
private final Method method;
|
||||
|
||||
/**
|
||||
* The min outlier score required to calculate feature influence. Defaults to 0.1.
|
||||
*/
|
||||
private final Double featureInfluenceThreshold;
|
||||
|
||||
/**
|
||||
* Constructs the outlier detection configuration
|
||||
* @param nNeighbors The number of neighbors. Leave unspecified for dynamic detection.
|
||||
* @param method The method. Leave unspecified for a dynamic mixture of methods.
|
||||
* @param featureInfluenceThreshold The min outlier score required to calculate feature influence. Defaults to 0.1.
|
||||
* Whether to compute feature influence or not. Defaults to true.
|
||||
*/
|
||||
private OutlierDetection(@Nullable Integer nNeighbors, @Nullable Method method, @Nullable Double featureInfluenceThreshold) {
|
||||
private final Boolean computeFeatureInfluence;
|
||||
|
||||
/**
|
||||
* The proportion of data assumed to be outlying prior to outlier detection. Defaults to 0.05.
|
||||
*/
|
||||
private final Double outlierFraction;
|
||||
|
||||
/**
|
||||
* Whether to perform standardization.
|
||||
*/
|
||||
private final Boolean standardizationEnabled;
|
||||
|
||||
private OutlierDetection(Integer nNeighbors, Method method, Double featureInfluenceThreshold, Boolean computeFeatureInfluence,
|
||||
Double outlierFraction, Boolean standardizationEnabled) {
|
||||
this.nNeighbors = nNeighbors;
|
||||
this.method = method;
|
||||
this.featureInfluenceThreshold = featureInfluenceThreshold;
|
||||
this.computeFeatureInfluence = computeFeatureInfluence;
|
||||
this.outlierFraction = outlierFraction;
|
||||
this.standardizationEnabled = standardizationEnabled;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -95,6 +124,18 @@ public class OutlierDetection implements DataFrameAnalysis {
|
|||
return featureInfluenceThreshold;
|
||||
}
|
||||
|
||||
public Boolean getComputeFeatureInfluence() {
|
||||
return computeFeatureInfluence;
|
||||
}
|
||||
|
||||
public Double getOutlierFraction() {
|
||||
return outlierFraction;
|
||||
}
|
||||
|
||||
public Boolean getStandardizationEnabled() {
|
||||
return standardizationEnabled;
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject();
|
||||
|
@ -107,6 +148,15 @@ public class OutlierDetection implements DataFrameAnalysis {
|
|||
if (featureInfluenceThreshold != null) {
|
||||
builder.field(FEATURE_INFLUENCE_THRESHOLD.getPreferredName(), featureInfluenceThreshold);
|
||||
}
|
||||
if (computeFeatureInfluence != null) {
|
||||
builder.field(COMPUTE_FEATURE_INFLUENCE.getPreferredName(), computeFeatureInfluence);
|
||||
}
|
||||
if (outlierFraction != null) {
|
||||
builder.field(OUTLIER_FRACTION.getPreferredName(), outlierFraction);
|
||||
}
|
||||
if (standardizationEnabled != null) {
|
||||
builder.field(STANDARDIZATION_ENABLED.getPreferredName(), standardizationEnabled);
|
||||
}
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
@ -119,12 +169,16 @@ public class OutlierDetection implements DataFrameAnalysis {
|
|||
OutlierDetection other = (OutlierDetection) o;
|
||||
return Objects.equals(nNeighbors, other.nNeighbors)
|
||||
&& Objects.equals(method, other.method)
|
||||
&& Objects.equals(featureInfluenceThreshold, other.featureInfluenceThreshold);
|
||||
&& Objects.equals(featureInfluenceThreshold, other.featureInfluenceThreshold)
|
||||
&& Objects.equals(computeFeatureInfluence, other.computeFeatureInfluence)
|
||||
&& Objects.equals(outlierFraction, other.outlierFraction)
|
||||
&& Objects.equals(standardizationEnabled, other.standardizationEnabled);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(nNeighbors, method, featureInfluenceThreshold);
|
||||
return Objects.hash(nNeighbors, method, featureInfluenceThreshold, computeFeatureInfluence, outlierFraction,
|
||||
standardizationEnabled);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -150,6 +204,9 @@ public class OutlierDetection implements DataFrameAnalysis {
|
|||
private Integer nNeighbors;
|
||||
private Method method;
|
||||
private Double featureInfluenceThreshold;
|
||||
private Boolean computeFeatureInfluence;
|
||||
private Double outlierFraction;
|
||||
private Boolean standardizationEnabled;
|
||||
|
||||
private Builder() {}
|
||||
|
||||
|
@ -168,8 +225,24 @@ public class OutlierDetection implements DataFrameAnalysis {
|
|||
return this;
|
||||
}
|
||||
|
||||
public Builder setComputeFeatureInfluence(Boolean computeFeatureInfluence) {
|
||||
this.computeFeatureInfluence = computeFeatureInfluence;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setOutlierFraction(Double outlierFraction) {
|
||||
this.outlierFraction = outlierFraction;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setStandardizationEnabled(Boolean standardizationEnabled) {
|
||||
this.standardizationEnabled = standardizationEnabled;
|
||||
return this;
|
||||
}
|
||||
|
||||
public OutlierDetection build() {
|
||||
return new OutlierDetection(nNeighbors, method, featureInfluenceThreshold);
|
||||
return new OutlierDetection(nNeighbors, method, featureInfluenceThreshold, computeFeatureInfluence, outlierFraction,
|
||||
standardizationEnabled);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1276,7 +1276,10 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
|
|||
assertThat(createdConfig.getSource().getQueryConfig(), equalTo(new QueryConfig(new MatchAllQueryBuilder()))); // default value
|
||||
assertThat(createdConfig.getDest().getIndex(), equalTo(config.getDest().getIndex()));
|
||||
assertThat(createdConfig.getDest().getResultsField(), equalTo("ml")); // default value
|
||||
assertThat(createdConfig.getAnalysis(), equalTo(config.getAnalysis()));
|
||||
assertThat(createdConfig.getAnalysis(), equalTo(OutlierDetection.builder()
|
||||
.setComputeFeatureInfluence(true)
|
||||
.setOutlierFraction(0.05)
|
||||
.setStandardizationEnabled(true).build()));
|
||||
assertThat(createdConfig.getAnalyzedFields(), equalTo(config.getAnalyzedFields()));
|
||||
assertThat(createdConfig.getModelMemoryLimit(), equalTo(ByteSizeValue.parseBytesSizeValue("1gb", ""))); // default value
|
||||
assertThat(createdConfig.getDescription(), equalTo("some description"));
|
||||
|
|
|
@ -2932,6 +2932,10 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
|
|||
DataFrameAnalysis outlierDetectionCustomized = OutlierDetection.builder() // <1>
|
||||
.setMethod(OutlierDetection.Method.DISTANCE_KNN) // <2>
|
||||
.setNNeighbors(5) // <3>
|
||||
.setFeatureInfluenceThreshold(0.1) // <4>
|
||||
.setComputeFeatureInfluence(true) // <5>
|
||||
.setOutlierFraction(0.05) // <6>
|
||||
.setStandardizationEnabled(true) // <7>
|
||||
.build();
|
||||
// end::put-data-frame-analytics-outlier-detection-customized
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@ import java.io.IOException;
|
|||
|
||||
import static org.hamcrest.Matchers.closeTo;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
|
||||
public class OutlierDetectionTests extends AbstractXContentTestCase<OutlierDetection> {
|
||||
|
||||
|
@ -34,6 +35,9 @@ public class OutlierDetectionTests extends AbstractXContentTestCase<OutlierDetec
|
|||
.setNNeighbors(randomBoolean() ? null : randomIntBetween(1, 20))
|
||||
.setMethod(randomBoolean() ? null : randomFrom(OutlierDetection.Method.values()))
|
||||
.setFeatureInfluenceThreshold(randomBoolean() ? null : randomDoubleBetween(0.0, 1.0, true))
|
||||
.setComputeFeatureInfluence(randomBoolean() ? null : randomBoolean())
|
||||
.setOutlierFraction(randomBoolean() ? null : randomDoubleBetween(0.0, 1.0, true))
|
||||
.setStandardizationEnabled(randomBoolean() ? null : randomBoolean())
|
||||
.build();
|
||||
}
|
||||
|
||||
|
@ -57,6 +61,9 @@ public class OutlierDetectionTests extends AbstractXContentTestCase<OutlierDetec
|
|||
assertNull(outlierDetection.getNNeighbors());
|
||||
assertNull(outlierDetection.getMethod());
|
||||
assertNull(outlierDetection.getFeatureInfluenceThreshold());
|
||||
assertNull(outlierDetection.getComputeFeatureInfluence());
|
||||
assertNull(outlierDetection.getOutlierFraction());
|
||||
assertNull(outlierDetection.getStandardizationEnabled());
|
||||
}
|
||||
|
||||
public void testGetParams_GivenExplicitValues() {
|
||||
|
@ -65,9 +72,15 @@ public class OutlierDetectionTests extends AbstractXContentTestCase<OutlierDetec
|
|||
.setNNeighbors(42)
|
||||
.setMethod(OutlierDetection.Method.LDOF)
|
||||
.setFeatureInfluenceThreshold(0.5)
|
||||
.setComputeFeatureInfluence(true)
|
||||
.setOutlierFraction(0.42)
|
||||
.setStandardizationEnabled(false)
|
||||
.build();
|
||||
assertThat(outlierDetection.getNNeighbors(), equalTo(42));
|
||||
assertThat(outlierDetection.getMethod(), equalTo(OutlierDetection.Method.LDOF));
|
||||
assertThat(outlierDetection.getFeatureInfluenceThreshold(), closeTo(0.5, 1E-9));
|
||||
assertThat(outlierDetection.getComputeFeatureInfluence(), is(true));
|
||||
assertThat(outlierDetection.getOutlierFraction(), closeTo(0.42, 1E-9));
|
||||
assertThat(outlierDetection.getStandardizationEnabled(), is(false));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -96,6 +96,10 @@ include-tagged::{doc-tests-file}[{api}-outlier-detection-customized]
|
|||
<1> Constructing a new OutlierDetection object
|
||||
<2> The method used to perform the analysis
|
||||
<3> Number of neighbors taken into account during analysis
|
||||
<4> The min `outlier_score` required to compute feature influence
|
||||
<5> Whether to compute feature influence
|
||||
<6> The proportion of the data set that is assumed to be outlying prior to outlier detection
|
||||
<7> Whether to apply standardization to feature values
|
||||
|
||||
===== Regression
|
||||
|
||||
|
|
|
@ -134,7 +134,7 @@ An `outlier_detection` configuration object has the following properties:
|
|||
{oldetection}. For example, 0.05 means it is assumed that 5% of values are real outliers
|
||||
and 95% are inliers.
|
||||
|
||||
`standardize_columns`::
|
||||
`standardization_enabled`::
|
||||
(boolean) If `true`, then the following operation is performed on the columns
|
||||
before computing outlier scores: (x_i - mean(x_i)) / sd(x_i). Defaults to
|
||||
`true`. For more information, see
|
||||
|
|
|
@ -140,6 +140,9 @@ PUT _ml/data_frame/analytics/loganalytics
|
|||
},
|
||||
"analysis": {
|
||||
"outlier_detection": {
|
||||
"compute_feature_influence": true,
|
||||
"outlier_fraction": 0.05,
|
||||
"standardization_enabled": true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -167,7 +170,11 @@ The API returns the following result:
|
|||
"results_field" : "ml"
|
||||
},
|
||||
"analysis": {
|
||||
"outlier_detection" : { }
|
||||
"outlier_detection": {
|
||||
"compute_feature_influence": true,
|
||||
"outlier_fraction": 0.05,
|
||||
"standardization_enabled": true
|
||||
}
|
||||
},
|
||||
"model_memory_limit" : "1gb",
|
||||
"create_time" : 1562351429434,
|
||||
|
|
|
@ -5,11 +5,11 @@
|
|||
*/
|
||||
package org.elasticsearch.xpack.core.ml.dataframe.analyses;
|
||||
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
|
||||
import org.elasticsearch.common.xcontent.ObjectParser;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
|
@ -30,39 +30,68 @@ public class OutlierDetection implements DataFrameAnalysis {
|
|||
public static final ParseField N_NEIGHBORS = new ParseField("n_neighbors");
|
||||
public static final ParseField METHOD = new ParseField("method");
|
||||
public static final ParseField FEATURE_INFLUENCE_THRESHOLD = new ParseField("feature_influence_threshold");
|
||||
public static final ParseField COMPUTE_FEATURE_INFLUENCE = new ParseField("compute_feature_influence");
|
||||
public static final ParseField OUTLIER_FRACTION = new ParseField("outlier_fraction");
|
||||
public static final ParseField STANDARDIZATION_ENABLED = new ParseField("standardization_enabled");
|
||||
|
||||
private static final ConstructingObjectParser<OutlierDetection, Void> LENIENT_PARSER = createParser(true);
|
||||
private static final ConstructingObjectParser<OutlierDetection, Void> STRICT_PARSER = createParser(false);
|
||||
private static final ObjectParser<Builder, Void> LENIENT_PARSER = createParser(true);
|
||||
private static final ObjectParser<Builder, Void> STRICT_PARSER = createParser(false);
|
||||
|
||||
private static ConstructingObjectParser<OutlierDetection, Void> createParser(boolean lenient) {
|
||||
ConstructingObjectParser<OutlierDetection, Void> parser = new ConstructingObjectParser<>(NAME.getPreferredName(), lenient,
|
||||
a -> new OutlierDetection((Integer) a[0], (Method) a[1], (Double) a[2]));
|
||||
parser.declareInt(ConstructingObjectParser.optionalConstructorArg(), N_NEIGHBORS);
|
||||
parser.declareField(ConstructingObjectParser.optionalConstructorArg(), p -> {
|
||||
private static ObjectParser<Builder, Void> createParser(boolean lenient) {
|
||||
ObjectParser<Builder, Void> parser = new ObjectParser<>(NAME.getPreferredName(), lenient, Builder::new);
|
||||
parser.declareInt(Builder::setNNeighbors, N_NEIGHBORS);
|
||||
parser.declareField(Builder::setMethod, p -> {
|
||||
if (p.currentToken() == XContentParser.Token.VALUE_STRING) {
|
||||
return Method.fromString(p.text());
|
||||
}
|
||||
throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]");
|
||||
}, METHOD, ObjectParser.ValueType.STRING);
|
||||
parser.declareDouble(ConstructingObjectParser.optionalConstructorArg(), FEATURE_INFLUENCE_THRESHOLD);
|
||||
parser.declareDouble(Builder::setFeatureInfluenceThreshold, FEATURE_INFLUENCE_THRESHOLD);
|
||||
parser.declareBoolean(Builder::setComputeFeatureInfluence, COMPUTE_FEATURE_INFLUENCE);
|
||||
parser.declareDouble(Builder::setOutlierFraction, OUTLIER_FRACTION);
|
||||
parser.declareBoolean(Builder::setStandardizationEnabled, STANDARDIZATION_ENABLED);
|
||||
return parser;
|
||||
}
|
||||
|
||||
public static OutlierDetection fromXContent(XContentParser parser, boolean ignoreUnknownFields) {
|
||||
return ignoreUnknownFields ? LENIENT_PARSER.apply(parser, null) : STRICT_PARSER.apply(parser, null);
|
||||
return ignoreUnknownFields ? LENIENT_PARSER.apply(parser, null).build() : STRICT_PARSER.apply(parser, null).build();
|
||||
}
|
||||
|
||||
/**
|
||||
* The number of neighbors. Leave unspecified for dynamic detection.
|
||||
*/
|
||||
@Nullable
|
||||
private final Integer nNeighbors;
|
||||
|
||||
/**
|
||||
* The method. Leave unspecified for a dynamic mixture of methods.
|
||||
*/
|
||||
@Nullable
|
||||
private final Method method;
|
||||
|
||||
/**
|
||||
* The min outlier score required to calculate feature influence. Defaults to 0.1.
|
||||
*/
|
||||
@Nullable
|
||||
private final Double featureInfluenceThreshold;
|
||||
|
||||
/**
|
||||
* Constructs the outlier detection configuration
|
||||
* @param nNeighbors The number of neighbors. Leave unspecified for dynamic detection.
|
||||
* @param method The method. Leave unspecified for a dynamic mixture of methods.
|
||||
* @param featureInfluenceThreshold The min outlier score required to calculate feature influence. Defaults to 0.1.
|
||||
* Whether to compute feature influence or not. Defaults to true.
|
||||
*/
|
||||
public OutlierDetection(@Nullable Integer nNeighbors, @Nullable Method method, @Nullable Double featureInfluenceThreshold) {
|
||||
private final boolean computeFeatureInfluence;
|
||||
|
||||
/**
|
||||
* The proportion of data assumed to be outlying prior to outlier detection. Defaults to 0.05.
|
||||
*/
|
||||
private final double outlierFraction;
|
||||
|
||||
/**
|
||||
* Whether to perform standardization.
|
||||
*/
|
||||
private final boolean standardizationEnabled;
|
||||
|
||||
private OutlierDetection(Integer nNeighbors, Method method, Double featureInfluenceThreshold, boolean computeFeatureInfluence,
|
||||
double outlierFraction, boolean standardizationEnabled) {
|
||||
if (nNeighbors != null && nNeighbors <= 0) {
|
||||
throw ExceptionsHelper.badRequestException("[{}] must be a positive integer", N_NEIGHBORS.getPreferredName());
|
||||
}
|
||||
|
@ -71,22 +100,31 @@ public class OutlierDetection implements DataFrameAnalysis {
|
|||
throw ExceptionsHelper.badRequestException("[{}] must be in [0, 1]", FEATURE_INFLUENCE_THRESHOLD.getPreferredName());
|
||||
}
|
||||
|
||||
if (outlierFraction < 0.0 || outlierFraction > 1.0) {
|
||||
throw ExceptionsHelper.badRequestException("[{}] must be in [0, 1]", OUTLIER_FRACTION.getPreferredName());
|
||||
}
|
||||
|
||||
this.nNeighbors = nNeighbors;
|
||||
this.method = method;
|
||||
this.featureInfluenceThreshold = featureInfluenceThreshold;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs the default outlier detection configuration
|
||||
*/
|
||||
public OutlierDetection() {
|
||||
this(null, null, null);
|
||||
this.computeFeatureInfluence = computeFeatureInfluence;
|
||||
this.outlierFraction = outlierFraction;
|
||||
this.standardizationEnabled = standardizationEnabled;
|
||||
}
|
||||
|
||||
public OutlierDetection(StreamInput in) throws IOException {
|
||||
nNeighbors = in.readOptionalVInt();
|
||||
method = in.readBoolean() ? in.readEnum(Method.class) : null;
|
||||
featureInfluenceThreshold = in.readOptionalDouble();
|
||||
if (in.getVersion().onOrAfter(Version.V_7_5_0)) {
|
||||
computeFeatureInfluence = in.readBoolean();
|
||||
outlierFraction = in.readDouble();
|
||||
standardizationEnabled = in.readBoolean();
|
||||
} else {
|
||||
computeFeatureInfluence = true;
|
||||
outlierFraction = 0.05;
|
||||
standardizationEnabled = true;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -106,6 +144,12 @@ public class OutlierDetection implements DataFrameAnalysis {
|
|||
}
|
||||
|
||||
out.writeOptionalDouble(featureInfluenceThreshold);
|
||||
|
||||
if (out.getVersion().onOrAfter(Version.V_7_5_0)) {
|
||||
out.writeBoolean(computeFeatureInfluence);
|
||||
out.writeDouble(outlierFraction);
|
||||
out.writeBoolean(standardizationEnabled);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -120,6 +164,9 @@ public class OutlierDetection implements DataFrameAnalysis {
|
|||
if (featureInfluenceThreshold != null) {
|
||||
builder.field(FEATURE_INFLUENCE_THRESHOLD.getPreferredName(), featureInfluenceThreshold);
|
||||
}
|
||||
builder.field(COMPUTE_FEATURE_INFLUENCE.getPreferredName(), computeFeatureInfluence);
|
||||
builder.field(OUTLIER_FRACTION.getPreferredName(), outlierFraction);
|
||||
builder.field(STANDARDIZATION_ENABLED.getPreferredName(), standardizationEnabled);
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
@ -131,12 +178,16 @@ public class OutlierDetection implements DataFrameAnalysis {
|
|||
OutlierDetection that = (OutlierDetection) o;
|
||||
return Objects.equals(nNeighbors, that.nNeighbors)
|
||||
&& Objects.equals(method, that.method)
|
||||
&& Objects.equals(featureInfluenceThreshold, that.featureInfluenceThreshold);
|
||||
&& Objects.equals(featureInfluenceThreshold, that.featureInfluenceThreshold)
|
||||
&& computeFeatureInfluence == that.computeFeatureInfluence
|
||||
&& outlierFraction == that.outlierFraction
|
||||
&& standardizationEnabled == that.standardizationEnabled;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(nNeighbors, method, featureInfluenceThreshold);
|
||||
return Objects.hash(nNeighbors, method, featureInfluenceThreshold, computeFeatureInfluence, outlierFraction,
|
||||
standardizationEnabled);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -151,6 +202,9 @@ public class OutlierDetection implements DataFrameAnalysis {
|
|||
if (featureInfluenceThreshold != null) {
|
||||
params.put(FEATURE_INFLUENCE_THRESHOLD.getPreferredName(), featureInfluenceThreshold);
|
||||
}
|
||||
params.put(COMPUTE_FEATURE_INFLUENCE.getPreferredName(), computeFeatureInfluence);
|
||||
params.put(OUTLIER_FRACTION.getPreferredName(), outlierFraction);
|
||||
params.put(STANDARDIZATION_ENABLED.getPreferredName(), standardizationEnabled);
|
||||
return params;
|
||||
}
|
||||
|
||||
|
@ -191,4 +245,49 @@ public class OutlierDetection implements DataFrameAnalysis {
|
|||
return name().toLowerCase(Locale.ROOT);
|
||||
}
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
|
||||
private Integer nNeighbors;
|
||||
private Method method;
|
||||
private Double featureInfluenceThreshold;
|
||||
private boolean computeFeatureInfluence = true;
|
||||
private double outlierFraction = 0.05;
|
||||
private boolean standardizationEnabled = true;
|
||||
|
||||
public Builder setNNeighbors(Integer nNeighbors) {
|
||||
this.nNeighbors = nNeighbors;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setMethod(Method method) {
|
||||
this.method = method;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setFeatureInfluenceThreshold(Double featureInfluenceThreshold) {
|
||||
this.featureInfluenceThreshold = featureInfluenceThreshold;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setComputeFeatureInfluence(boolean computeFeatureInfluence) {
|
||||
this.computeFeatureInfluence = computeFeatureInfluence;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setOutlierFraction(double outlierFraction) {
|
||||
this.outlierFraction = outlierFraction;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setStandardizationEnabled(boolean standardizationEnabled) {
|
||||
this.standardizationEnabled = standardizationEnabled;
|
||||
return this;
|
||||
}
|
||||
|
||||
public OutlierDetection build() {
|
||||
return new OutlierDetection(nNeighbors, method, featureInfluenceThreshold, computeFeatureInfluence, outlierFraction,
|
||||
standardizationEnabled);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,7 +32,14 @@ public class OutlierDetectionTests extends AbstractSerializingTestCase<OutlierDe
|
|||
Integer numberNeighbors = randomBoolean() ? null : randomIntBetween(1, 20);
|
||||
OutlierDetection.Method method = randomBoolean() ? null : randomFrom(OutlierDetection.Method.values());
|
||||
Double minScoreToWriteFeatureInfluence = randomBoolean() ? null : randomDoubleBetween(0.0, 1.0, true);
|
||||
return new OutlierDetection(numberNeighbors, method, minScoreToWriteFeatureInfluence);
|
||||
return new OutlierDetection.Builder()
|
||||
.setNNeighbors(numberNeighbors)
|
||||
.setMethod(method)
|
||||
.setFeatureInfluenceThreshold(minScoreToWriteFeatureInfluence)
|
||||
.setComputeFeatureInfluence(randomBoolean())
|
||||
.setOutlierFraction(randomDoubleBetween(0.0, 1.0, true))
|
||||
.setStandardizationEnabled(randomBoolean())
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -41,20 +48,38 @@ public class OutlierDetectionTests extends AbstractSerializingTestCase<OutlierDe
|
|||
}
|
||||
|
||||
public void testGetParams_GivenDefaults() {
|
||||
OutlierDetection outlierDetection = new OutlierDetection();
|
||||
assertThat(outlierDetection.getParams().isEmpty(), is(true));
|
||||
OutlierDetection outlierDetection = new OutlierDetection.Builder().build();
|
||||
Map<String, Object> params = outlierDetection.getParams();
|
||||
assertThat(params.size(), equalTo(3));
|
||||
assertThat(params.containsKey("compute_feature_influence"), is(true));
|
||||
assertThat(params.get("compute_feature_influence"), is(true));
|
||||
assertThat(params.containsKey("outlier_fraction"), is(true));
|
||||
assertThat((double) params.get("outlier_fraction"), closeTo(0.05, 0.0001));
|
||||
assertThat(params.containsKey("standardization_enabled"), is(true));
|
||||
assertThat(params.get("standardization_enabled"), is(true));
|
||||
}
|
||||
|
||||
public void testGetParams_GivenExplicitValues() {
|
||||
OutlierDetection outlierDetection = new OutlierDetection(42, OutlierDetection.Method.LDOF, 0.42);
|
||||
OutlierDetection outlierDetection = new OutlierDetection.Builder()
|
||||
.setNNeighbors(42)
|
||||
.setMethod(OutlierDetection.Method.LDOF)
|
||||
.setFeatureInfluenceThreshold(0.42)
|
||||
.setComputeFeatureInfluence(false)
|
||||
.setOutlierFraction(0.9)
|
||||
.setStandardizationEnabled(false)
|
||||
.build();
|
||||
|
||||
Map<String, Object> params = outlierDetection.getParams();
|
||||
|
||||
assertThat(params.size(), equalTo(3));
|
||||
assertThat(params.size(), equalTo(6));
|
||||
assertThat(params.get(OutlierDetection.N_NEIGHBORS.getPreferredName()), equalTo(42));
|
||||
assertThat(params.get(OutlierDetection.METHOD.getPreferredName()), equalTo(OutlierDetection.Method.LDOF));
|
||||
assertThat((Double) params.get(OutlierDetection.FEATURE_INFLUENCE_THRESHOLD.getPreferredName()),
|
||||
is(closeTo(0.42, 1E-9)));
|
||||
assertThat(params.get(OutlierDetection.COMPUTE_FEATURE_INFLUENCE.getPreferredName()), is(false));
|
||||
assertThat((Double) params.get(OutlierDetection.OUTLIER_FRACTION.getPreferredName()),
|
||||
is(closeTo(0.9, 1E-9)));
|
||||
assertThat(params.get(OutlierDetection.STANDARDIZATION_ENABLED.getPreferredName()), is(false));
|
||||
}
|
||||
|
||||
public void testGetStateDocId() {
|
||||
|
|
|
@ -62,6 +62,12 @@ integTest.runner {
|
|||
'ml/data_frame_analytics_crud/Test put config given missing analysis',
|
||||
'ml/data_frame_analytics_crud/Test put config given empty analysis',
|
||||
'ml/data_frame_analytics_crud/Test max model memory limit',
|
||||
'ml/data_frame_analytics_crud/Test put outlier_detection given n_neighbors is negative',
|
||||
'ml/data_frame_analytics_crud/Test put outlier_detection given n_neighbors is zero',
|
||||
'ml/data_frame_analytics_crud/Test put outlier_detection given feature_influence_threshold is negative',
|
||||
'ml/data_frame_analytics_crud/Test put outlier_detection given feature_influence_threshold is greater than one',
|
||||
'ml/data_frame_analytics_crud/Test put outlier_detection given outlier_fraction is negative',
|
||||
'ml/data_frame_analytics_crud/Test put outlier_detection given outlier_fraction is greater than one',
|
||||
'ml/data_frame_analytics_crud/Test put regression given dependent_variable is not defined',
|
||||
'ml/data_frame_analytics_crud/Test put regression given negative lambda',
|
||||
'ml/data_frame_analytics_crud/Test put regression given negative gamma',
|
||||
|
|
|
@ -69,7 +69,8 @@ public class OutlierDetectionWithMissingFieldsIT extends MlNativeDataFrameAnalyt
|
|||
}
|
||||
|
||||
String id = "test_outlier_detection_with_missing_fields";
|
||||
DataFrameAnalyticsConfig config = buildAnalytics(id, sourceIndex, sourceIndex + "-results", null, new OutlierDetection());
|
||||
DataFrameAnalyticsConfig config = buildAnalytics(id, sourceIndex, sourceIndex + "-results", null,
|
||||
new OutlierDetection.Builder().build());
|
||||
registerAnalytics(config);
|
||||
putAnalytics(config);
|
||||
|
||||
|
|
|
@ -72,7 +72,8 @@ public class RunDataFrameAnalyticsIT extends MlNativeDataFrameAnalyticsIntegTest
|
|||
}
|
||||
|
||||
String id = "test_outlier_detection_with_few_docs";
|
||||
DataFrameAnalyticsConfig config = buildAnalytics(id, sourceIndex, sourceIndex + "-results", null, new OutlierDetection());
|
||||
DataFrameAnalyticsConfig config = buildAnalytics(id, sourceIndex, sourceIndex + "-results", null,
|
||||
new OutlierDetection.Builder().build());
|
||||
registerAnalytics(config);
|
||||
putAnalytics(config);
|
||||
|
||||
|
@ -147,7 +148,8 @@ public class RunDataFrameAnalyticsIT extends MlNativeDataFrameAnalyticsIntegTest
|
|||
}
|
||||
|
||||
String id = "test_outlier_detection_with_enough_docs_to_scroll";
|
||||
DataFrameAnalyticsConfig config = buildAnalytics(id, sourceIndex, sourceIndex + "-results", "custom_ml", new OutlierDetection());
|
||||
DataFrameAnalyticsConfig config = buildAnalytics(id, sourceIndex, sourceIndex + "-results", "custom_ml",
|
||||
new OutlierDetection.Builder().build());
|
||||
registerAnalytics(config);
|
||||
putAnalytics(config);
|
||||
|
||||
|
@ -216,7 +218,8 @@ public class RunDataFrameAnalyticsIT extends MlNativeDataFrameAnalyticsIntegTest
|
|||
}
|
||||
|
||||
String id = "test_outlier_detection_with_more_fields_than_docvalue_limit";
|
||||
DataFrameAnalyticsConfig config = buildAnalytics(id, sourceIndex, sourceIndex + "-results", null, new OutlierDetection());
|
||||
DataFrameAnalyticsConfig config = buildAnalytics(id, sourceIndex, sourceIndex + "-results", null,
|
||||
new OutlierDetection.Builder().build());
|
||||
registerAnalytics(config);
|
||||
putAnalytics(config);
|
||||
|
||||
|
@ -279,7 +282,8 @@ public class RunDataFrameAnalyticsIT extends MlNativeDataFrameAnalyticsIntegTest
|
|||
}
|
||||
|
||||
String id = "test_stop_outlier_detection_with_enough_docs_to_scroll";
|
||||
DataFrameAnalyticsConfig config = buildAnalytics(id, sourceIndex, sourceIndex + "-results", "custom_ml", new OutlierDetection());
|
||||
DataFrameAnalyticsConfig config = buildAnalytics(id, sourceIndex, sourceIndex + "-results", "custom_ml",
|
||||
new OutlierDetection.Builder().build());
|
||||
registerAnalytics(config);
|
||||
putAnalytics(config);
|
||||
|
||||
|
@ -347,7 +351,7 @@ public class RunDataFrameAnalyticsIT extends MlNativeDataFrameAnalyticsIntegTest
|
|||
.setId(id)
|
||||
.setSource(new DataFrameAnalyticsSource(sourceIndex, null))
|
||||
.setDest(new DataFrameAnalyticsDest(destIndex, null))
|
||||
.setAnalysis(new OutlierDetection())
|
||||
.setAnalysis(new OutlierDetection.Builder().build())
|
||||
.build();
|
||||
registerAnalytics(config);
|
||||
putAnalytics(config);
|
||||
|
@ -405,7 +409,7 @@ public class RunDataFrameAnalyticsIT extends MlNativeDataFrameAnalyticsIntegTest
|
|||
}
|
||||
|
||||
String id = "test_outlier_detection_with_pre_existing_dest_index";
|
||||
DataFrameAnalyticsConfig config = buildAnalytics(id, sourceIndex, destIndex, null, new OutlierDetection());
|
||||
DataFrameAnalyticsConfig config = buildAnalytics(id, sourceIndex, destIndex, null, new OutlierDetection.Builder().build());
|
||||
registerAnalytics(config);
|
||||
putAnalytics(config);
|
||||
|
||||
|
@ -461,7 +465,7 @@ public class RunDataFrameAnalyticsIT extends MlNativeDataFrameAnalyticsIntegTest
|
|||
.setId(id)
|
||||
.setSource(new DataFrameAnalyticsSource(new String[] { sourceIndex }, null))
|
||||
.setDest(new DataFrameAnalyticsDest(sourceIndex + "-results", null))
|
||||
.setAnalysis(new OutlierDetection())
|
||||
.setAnalysis(new OutlierDetection.Builder().build())
|
||||
.setModelMemoryLimit(modelMemoryLimit)
|
||||
.build();
|
||||
|
||||
|
@ -503,7 +507,8 @@ public class RunDataFrameAnalyticsIT extends MlNativeDataFrameAnalyticsIntegTest
|
|||
}
|
||||
|
||||
String id = "test_outlier_detection_stop_and_restart";
|
||||
DataFrameAnalyticsConfig config = buildAnalytics(id, sourceIndex, sourceIndex + "-results", "custom_ml", new OutlierDetection());
|
||||
DataFrameAnalyticsConfig config = buildAnalytics(id, sourceIndex, sourceIndex + "-results", "custom_ml",
|
||||
new OutlierDetection.Builder().build());
|
||||
registerAnalytics(config);
|
||||
putAnalytics(config);
|
||||
|
||||
|
@ -545,4 +550,92 @@ public class RunDataFrameAnalyticsIT extends MlNativeDataFrameAnalyticsIntegTest
|
|||
assertProgress(id, 100, 100, 100, 100);
|
||||
assertThat(searchStoredProgress(id).getHits().getTotalHits().value, equalTo(1L));
|
||||
}
|
||||
|
||||
public void testOutlierDetectionWithCustomParams() throws Exception {
|
||||
String sourceIndex = "test-outlier-detection-with-custom-params";
|
||||
|
||||
client().admin().indices().prepareCreate(sourceIndex)
|
||||
.addMapping("_doc", "numeric_1", "type=double", "numeric_2", "type=float", "categorical_1", "type=keyword")
|
||||
.get();
|
||||
|
||||
BulkRequestBuilder bulkRequestBuilder = client().prepareBulk();
|
||||
bulkRequestBuilder.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
|
||||
|
||||
for (int i = 0; i < 5; i++) {
|
||||
IndexRequest indexRequest = new IndexRequest(sourceIndex);
|
||||
|
||||
// We insert one odd value out of 5 for one feature
|
||||
String docId = i == 0 ? "outlier" : "normal" + i;
|
||||
indexRequest.id(docId);
|
||||
indexRequest.source("numeric_1", i == 0 ? 100.0 : 1.0, "numeric_2", 1.0, "categorical_1", "foo_" + i);
|
||||
bulkRequestBuilder.add(indexRequest);
|
||||
}
|
||||
BulkResponse bulkResponse = bulkRequestBuilder.get();
|
||||
if (bulkResponse.hasFailures()) {
|
||||
fail("Failed to index data: " + bulkResponse.buildFailureMessage());
|
||||
}
|
||||
|
||||
String id = "test_outlier_detection_with_custom_params";
|
||||
DataFrameAnalyticsConfig config = buildAnalytics(id, sourceIndex, sourceIndex + "-results", null,
|
||||
new OutlierDetection.Builder()
|
||||
.setNNeighbors(3)
|
||||
.setMethod(OutlierDetection.Method.DISTANCE_KNN)
|
||||
.setFeatureInfluenceThreshold(0.01)
|
||||
.setComputeFeatureInfluence(false)
|
||||
.setOutlierFraction(0.04)
|
||||
.setStandardizationEnabled(true)
|
||||
.build());
|
||||
registerAnalytics(config);
|
||||
putAnalytics(config);
|
||||
|
||||
assertState(id, DataFrameAnalyticsState.STOPPED);
|
||||
assertProgress(id, 0, 0, 0, 0);
|
||||
|
||||
startAnalytics(id);
|
||||
waitUntilAnalyticsIsStopped(id);
|
||||
|
||||
SearchResponse sourceData = client().prepareSearch(sourceIndex).get();
|
||||
double scoreOfOutlier = 0.0;
|
||||
double scoreOfNonOutlier = -1.0;
|
||||
for (SearchHit hit : sourceData.getHits()) {
|
||||
GetResponse destDocGetResponse = client().prepareGet().setIndex(config.getDest().getIndex()).setId(hit.getId()).get();
|
||||
assertThat(destDocGetResponse.isExists(), is(true));
|
||||
Map<String, Object> sourceDoc = hit.getSourceAsMap();
|
||||
Map<String, Object> destDoc = destDocGetResponse.getSource();
|
||||
for (String field : sourceDoc.keySet()) {
|
||||
assertThat(destDoc.containsKey(field), is(true));
|
||||
assertThat(destDoc.get(field), equalTo(sourceDoc.get(field)));
|
||||
}
|
||||
assertThat(destDoc.containsKey("ml"), is(true));
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
Map<String, Object> resultsObject = (Map<String, Object>) destDoc.get("ml");
|
||||
|
||||
assertThat(resultsObject.containsKey("outlier_score"), is(true));
|
||||
assertThat(resultsObject.containsKey("feature_influence"), is(false));
|
||||
|
||||
double outlierScore = (double) resultsObject.get("outlier_score");
|
||||
assertThat(outlierScore, allOf(greaterThanOrEqualTo(0.0), lessThanOrEqualTo(1.0)));
|
||||
if (hit.getId().equals("outlier")) {
|
||||
scoreOfOutlier = outlierScore;
|
||||
} else {
|
||||
if (scoreOfNonOutlier < 0) {
|
||||
scoreOfNonOutlier = outlierScore;
|
||||
} else {
|
||||
assertThat(outlierScore, equalTo(scoreOfNonOutlier));
|
||||
}
|
||||
}
|
||||
}
|
||||
assertThat(scoreOfOutlier, is(greaterThan(scoreOfNonOutlier)));
|
||||
|
||||
assertProgress(id, 100, 100, 100, 100);
|
||||
assertThat(searchStoredProgress(id).getHits().getTotalHits().value, equalTo(1L));
|
||||
assertThatAuditMessagesMatch(id,
|
||||
"Created analytics with analysis type [outlier_detection]",
|
||||
"Estimated memory usage for this analytics to be",
|
||||
"Started analytics",
|
||||
"Creating destination index [test-outlier-detection-with-custom-params-results]",
|
||||
"Finished reindexing to destination index [test-outlier-detection-with-custom-params-results]",
|
||||
"Finished analysis");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -62,7 +62,7 @@ public class DataFrameAnalyticsIndexTests extends ESTestCase {
|
|||
.setId(ANALYTICS_ID)
|
||||
.setSource(new DataFrameAnalyticsSource(SOURCE_INDEX, null))
|
||||
.setDest(new DataFrameAnalyticsDest(DEST_INDEX, null))
|
||||
.setAnalysis(new OutlierDetection())
|
||||
.setAnalysis(new OutlierDetection.Builder().build())
|
||||
.build();
|
||||
private static final int CURRENT_TIME_MILLIS = 123456789;
|
||||
private static final String CREATED_BY = "data-frame-analytics";
|
||||
|
|
|
@ -67,7 +67,7 @@ public class SourceDestValidatorTests extends ESTestCase {
|
|||
.setId("test")
|
||||
.setSource(createSource("source-1"))
|
||||
.setDest(new DataFrameAnalyticsDest("dest", null))
|
||||
.setAnalysis(new OutlierDetection())
|
||||
.setAnalysis(new OutlierDetection.Builder().build())
|
||||
.build();
|
||||
|
||||
SourceDestValidator validator = new SourceDestValidator(CLUSTER_STATE, new IndexNameExpressionResolver());
|
||||
|
@ -79,7 +79,7 @@ public class SourceDestValidatorTests extends ESTestCase {
|
|||
.setId("test")
|
||||
.setSource(createSource("missing"))
|
||||
.setDest(new DataFrameAnalyticsDest("dest", null))
|
||||
.setAnalysis(new OutlierDetection())
|
||||
.setAnalysis(new OutlierDetection.Builder().build())
|
||||
.build();
|
||||
|
||||
SourceDestValidator validator = new SourceDestValidator(CLUSTER_STATE, new IndexNameExpressionResolver());
|
||||
|
@ -94,7 +94,7 @@ public class SourceDestValidatorTests extends ESTestCase {
|
|||
.setId("test")
|
||||
.setSource(createSource("missing*"))
|
||||
.setDest(new DataFrameAnalyticsDest("dest", null))
|
||||
.setAnalysis(new OutlierDetection())
|
||||
.setAnalysis(new OutlierDetection.Builder().build())
|
||||
.build();
|
||||
|
||||
SourceDestValidator validator = new SourceDestValidator(CLUSTER_STATE, new IndexNameExpressionResolver());
|
||||
|
@ -109,7 +109,7 @@ public class SourceDestValidatorTests extends ESTestCase {
|
|||
.setId("test")
|
||||
.setSource(createSource("source-1"))
|
||||
.setDest(new DataFrameAnalyticsDest("source-1", null))
|
||||
.setAnalysis(new OutlierDetection())
|
||||
.setAnalysis(new OutlierDetection.Builder().build())
|
||||
.build();
|
||||
|
||||
SourceDestValidator validator = new SourceDestValidator(CLUSTER_STATE, new IndexNameExpressionResolver());
|
||||
|
@ -124,7 +124,7 @@ public class SourceDestValidatorTests extends ESTestCase {
|
|||
.setId("test")
|
||||
.setSource(createSource("source-*"))
|
||||
.setDest(new DataFrameAnalyticsDest(SOURCE_2, null))
|
||||
.setAnalysis(new OutlierDetection())
|
||||
.setAnalysis(new OutlierDetection.Builder().build())
|
||||
.build();
|
||||
|
||||
SourceDestValidator validator = new SourceDestValidator(CLUSTER_STATE, new IndexNameExpressionResolver());
|
||||
|
@ -139,7 +139,7 @@ public class SourceDestValidatorTests extends ESTestCase {
|
|||
.setId("test")
|
||||
.setSource(createSource("source-1,source-*"))
|
||||
.setDest(new DataFrameAnalyticsDest(SOURCE_2, null))
|
||||
.setAnalysis(new OutlierDetection())
|
||||
.setAnalysis(new OutlierDetection.Builder().build())
|
||||
.build();
|
||||
|
||||
SourceDestValidator validator = new SourceDestValidator(CLUSTER_STATE, new IndexNameExpressionResolver());
|
||||
|
@ -154,7 +154,7 @@ public class SourceDestValidatorTests extends ESTestCase {
|
|||
.setId("test")
|
||||
.setSource(createSource(SOURCE_1))
|
||||
.setDest(new DataFrameAnalyticsDest("dest-alias", null))
|
||||
.setAnalysis(new OutlierDetection())
|
||||
.setAnalysis(new OutlierDetection.Builder().build())
|
||||
.build();
|
||||
|
||||
SourceDestValidator validator = new SourceDestValidator(CLUSTER_STATE, new IndexNameExpressionResolver());
|
||||
|
@ -170,7 +170,7 @@ public class SourceDestValidatorTests extends ESTestCase {
|
|||
.setId("test")
|
||||
.setSource(createSource("source-1"))
|
||||
.setDest(new DataFrameAnalyticsDest("source-1-alias", null))
|
||||
.setAnalysis(new OutlierDetection())
|
||||
.setAnalysis(new OutlierDetection.Builder().build())
|
||||
.build();
|
||||
|
||||
SourceDestValidator validator = new SourceDestValidator(CLUSTER_STATE, new IndexNameExpressionResolver());
|
||||
|
|
|
@ -508,7 +508,7 @@ public class ExtractedFieldsDetectorTests extends ESTestCase {
|
|||
.setSource(new DataFrameAnalyticsSource(SOURCE_INDEX, null))
|
||||
.setDest(new DataFrameAnalyticsDest(DEST_INDEX, RESULTS_FIELD))
|
||||
.setAnalyzedFields(analyzedFields)
|
||||
.setAnalysis(new OutlierDetection())
|
||||
.setAnalysis(new OutlierDetection.Builder().build())
|
||||
.build();
|
||||
}
|
||||
|
||||
|
|
|
@ -53,7 +53,13 @@ setup:
|
|||
- match: { source.index: ["index-source"] }
|
||||
- match: { source.query: {"term" : { "user" : "Kimchy"} } }
|
||||
- match: { dest.index: "index-dest" }
|
||||
- match: { analysis: {"outlier_detection":{}} }
|
||||
- match: { analysis: {
|
||||
"outlier_detection":{
|
||||
"compute_feature_influence": true,
|
||||
"outlier_fraction": 0.05,
|
||||
"standardization_enabled": true
|
||||
}
|
||||
}}
|
||||
- match: { analyzed_fields: {"includes" : ["obj1.*", "obj2.*" ], "excludes": [] } }
|
||||
- is_true: create_time
|
||||
- is_true: version
|
||||
|
@ -66,7 +72,13 @@ setup:
|
|||
- match: { data_frame_analytics.0.source.index: ["index-source"] }
|
||||
- match: { data_frame_analytics.0.source.query: {"term" : { "user" : "Kimchy"} } }
|
||||
- match: { data_frame_analytics.0.dest.index: "index-dest" }
|
||||
- match: { data_frame_analytics.0.analysis: {"outlier_detection":{}} }
|
||||
- match: { data_frame_analytics.0.analysis: {
|
||||
"outlier_detection":{
|
||||
"compute_feature_influence": true,
|
||||
"outlier_fraction": 0.05,
|
||||
"standardization_enabled": true
|
||||
}
|
||||
}}
|
||||
- match: { data_frame_analytics.0.analyzed_fields: {"includes" : ["obj1.*", "obj2.*" ], "excludes": [] } }
|
||||
- is_true: data_frame_analytics.0.create_time
|
||||
- is_true: data_frame_analytics.0.version
|
||||
|
@ -148,7 +160,13 @@ setup:
|
|||
- match: { source.index: ["index-source"] }
|
||||
- match: { source.query: {"match_all" : {} } }
|
||||
- match: { dest.index: "index-dest" }
|
||||
- match: { analysis: {"outlier_detection":{}} }
|
||||
- match: { analysis: {
|
||||
"outlier_detection":{
|
||||
"compute_feature_influence": true,
|
||||
"outlier_fraction": 0.05,
|
||||
"standardization_enabled": true
|
||||
}
|
||||
}}
|
||||
- is_true: create_time
|
||||
- is_true: version
|
||||
|
||||
|
@ -170,7 +188,10 @@ setup:
|
|||
"outlier_detection":{
|
||||
"n_neighbors": 5,
|
||||
"method": "lof",
|
||||
"feature_influence_threshold": 0.0
|
||||
"feature_influence_threshold": 0.0,
|
||||
"compute_feature_influence": false,
|
||||
"outlier_fraction": 0.95,
|
||||
"standardization_enabled": false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -178,9 +199,16 @@ setup:
|
|||
- match: { source.index: ["index-source"] }
|
||||
- match: { source.query: {"match_all" : {} } }
|
||||
- match: { dest.index: "index-dest" }
|
||||
- match: { analysis.outlier_detection.n_neighbors: 5 }
|
||||
- match: { analysis.outlier_detection.method: "lof" }
|
||||
- match: { analysis.outlier_detection.feature_influence_threshold: 0.0 }
|
||||
- match: { analysis: {
|
||||
"outlier_detection":{
|
||||
"n_neighbors": 5,
|
||||
"method": "lof",
|
||||
"feature_influence_threshold": 0.0,
|
||||
"compute_feature_influence": false,
|
||||
"outlier_fraction": 0.95,
|
||||
"standardization_enabled": false
|
||||
}
|
||||
}}
|
||||
- is_true: create_time
|
||||
- is_true: version
|
||||
|
||||
|
@ -924,7 +952,13 @@ setup:
|
|||
- match: { source.index: ["index-source"] }
|
||||
- match: { source.query: {"term" : { "user" : "Kimchy"} } }
|
||||
- match: { dest.index: "index-dest" }
|
||||
- match: { analysis: {"outlier_detection":{}} }
|
||||
- match: { analysis: {
|
||||
"outlier_detection":{
|
||||
"compute_feature_influence": true,
|
||||
"outlier_fraction": 0.05,
|
||||
"standardization_enabled": true
|
||||
}
|
||||
}}
|
||||
- match: { analyzed_fields: {"includes" : ["obj1.*", "obj2.*" ], "excludes": [] } }
|
||||
- match: { model_memory_limit: "20mb" }
|
||||
|
||||
|
@ -938,6 +972,138 @@ setup:
|
|||
xpack.ml.max_model_memory_limit: null
|
||||
- match: {transient: {}}
|
||||
|
||||
---
|
||||
"Test put outlier_detection given n_neighbors is negative":
|
||||
|
||||
- do:
|
||||
catch: /\[n_neighbors\] must be a positive integer/
|
||||
ml.put_data_frame_analytics:
|
||||
id: "outlier_detection-with-negative-n_neighbors"
|
||||
body: >
|
||||
{
|
||||
"source": {
|
||||
"index": "index-source"
|
||||
},
|
||||
"dest": {
|
||||
"index": "index-dest"
|
||||
},
|
||||
"analysis": {
|
||||
"outlier_detection": {
|
||||
"n_neighbors": -1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
---
|
||||
"Test put outlier_detection given n_neighbors is zero":
|
||||
|
||||
- do:
|
||||
catch: /\[n_neighbors\] must be a positive integer/
|
||||
ml.put_data_frame_analytics:
|
||||
id: "outlier_detection-with-zero-n_neighbors"
|
||||
body: >
|
||||
{
|
||||
"source": {
|
||||
"index": "index-source"
|
||||
},
|
||||
"dest": {
|
||||
"index": "index-dest"
|
||||
},
|
||||
"analysis": {
|
||||
"outlier_detection": {
|
||||
"n_neighbors": 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
---
|
||||
"Test put outlier_detection given feature_influence_threshold is negative":
|
||||
|
||||
- do:
|
||||
catch: /\[feature_influence_threshold\] must be in \[0, 1\]/
|
||||
ml.put_data_frame_analytics:
|
||||
id: "outlier_detection-with-negative-feature_influence_threshold"
|
||||
body: >
|
||||
{
|
||||
"source": {
|
||||
"index": "index-source"
|
||||
},
|
||||
"dest": {
|
||||
"index": "index-dest"
|
||||
},
|
||||
"analysis": {
|
||||
"outlier_detection": {
|
||||
"feature_influence_threshold": -0.001
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
---
|
||||
"Test put outlier_detection given feature_influence_threshold is greater than one":
|
||||
|
||||
- do:
|
||||
catch: /\[feature_influence_threshold\] must be in \[0, 1\]/
|
||||
ml.put_data_frame_analytics:
|
||||
id: "outlier_detection-with-negative-feature_influence_threshold"
|
||||
body: >
|
||||
{
|
||||
"source": {
|
||||
"index": "index-source"
|
||||
},
|
||||
"dest": {
|
||||
"index": "index-dest"
|
||||
},
|
||||
"analysis": {
|
||||
"outlier_detection": {
|
||||
"feature_influence_threshold": 1.001
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
---
|
||||
"Test put outlier_detection given outlier_fraction is negative":
|
||||
|
||||
- do:
|
||||
catch: /\[outlier_fraction\] must be in \[0, 1\]/
|
||||
ml.put_data_frame_analytics:
|
||||
id: "outlier_detection-with-negative-outlier_fraction"
|
||||
body: >
|
||||
{
|
||||
"source": {
|
||||
"index": "index-source"
|
||||
},
|
||||
"dest": {
|
||||
"index": "index-dest"
|
||||
},
|
||||
"analysis": {
|
||||
"outlier_detection": {
|
||||
"outlier_fraction": -0.001
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
---
|
||||
"Test put outlier_detection given outlier_fraction is greater than one":
|
||||
|
||||
- do:
|
||||
catch: /\[outlier_fraction\] must be in \[0, 1\]/
|
||||
ml.put_data_frame_analytics:
|
||||
id: "outlier_detection-with-negative-outlier_fraction"
|
||||
body: >
|
||||
{
|
||||
"source": {
|
||||
"index": "index-source"
|
||||
},
|
||||
"dest": {
|
||||
"index": "index-dest"
|
||||
},
|
||||
"analysis": {
|
||||
"outlier_detection": {
|
||||
"outlier_fraction": 1.001
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
---
|
||||
"Test put regression given dependent_variable is not defined":
|
||||
|
||||
|
|
Loading…
Reference in New Issue