Rename binary_soft_classification evaluation to outlier_detection (#59951) (#59970)

This commit is contained in:
Przemysław Witek 2020-07-21 15:15:04 +02:00 committed by GitHub
parent c6d3af35b9
commit 283a1f605c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
38 changed files with 301 additions and 308 deletions

View File

@ -21,16 +21,16 @@ package org.elasticsearch.client.ml.dataframe.evaluation;
import org.elasticsearch.client.ml.dataframe.evaluation.classification.AccuracyMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.classification.Classification;
import org.elasticsearch.client.ml.dataframe.evaluation.classification.MulticlassConfusionMatrixMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.AucRocMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.ConfusionMatrixMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.OutlierDetection;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.PrecisionMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.RecallMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.regression.HuberMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.regression.MeanSquaredErrorMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.regression.MeanSquaredLogarithmicErrorMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.regression.HuberMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.regression.RSquaredMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.regression.Regression;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.AucRocMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.BinarySoftClassification;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetric;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.plugins.spi.NamedXContentProvider;
@ -57,25 +57,25 @@ public class MlEvaluationNamedXContentProvider implements NamedXContentProvider
return Arrays.asList(
// Evaluations
new NamedXContentRegistry.Entry(
Evaluation.class, new ParseField(BinarySoftClassification.NAME), BinarySoftClassification::fromXContent),
Evaluation.class, new ParseField(OutlierDetection.NAME), OutlierDetection::fromXContent),
new NamedXContentRegistry.Entry(Evaluation.class, new ParseField(Classification.NAME), Classification::fromXContent),
new NamedXContentRegistry.Entry(Evaluation.class, new ParseField(Regression.NAME), Regression::fromXContent),
// Evaluation metrics
new NamedXContentRegistry.Entry(
EvaluationMetric.class,
new ParseField(registeredMetricName(BinarySoftClassification.NAME, AucRocMetric.NAME)),
new ParseField(registeredMetricName(OutlierDetection.NAME, AucRocMetric.NAME)),
AucRocMetric::fromXContent),
new NamedXContentRegistry.Entry(
EvaluationMetric.class,
new ParseField(registeredMetricName(BinarySoftClassification.NAME, PrecisionMetric.NAME)),
new ParseField(registeredMetricName(OutlierDetection.NAME, PrecisionMetric.NAME)),
PrecisionMetric::fromXContent),
new NamedXContentRegistry.Entry(
EvaluationMetric.class,
new ParseField(registeredMetricName(BinarySoftClassification.NAME, RecallMetric.NAME)),
new ParseField(registeredMetricName(OutlierDetection.NAME, RecallMetric.NAME)),
RecallMetric::fromXContent),
new NamedXContentRegistry.Entry(
EvaluationMetric.class,
new ParseField(registeredMetricName(BinarySoftClassification.NAME, ConfusionMatrixMetric.NAME)),
new ParseField(registeredMetricName(OutlierDetection.NAME, ConfusionMatrixMetric.NAME)),
ConfusionMatrixMetric::fromXContent),
new NamedXContentRegistry.Entry(
EvaluationMetric.class,
@ -114,19 +114,19 @@ public class MlEvaluationNamedXContentProvider implements NamedXContentProvider
// Evaluation metrics results
new NamedXContentRegistry.Entry(
EvaluationMetric.Result.class,
new ParseField(registeredMetricName(BinarySoftClassification.NAME, AucRocMetric.NAME)),
new ParseField(registeredMetricName(OutlierDetection.NAME, AucRocMetric.NAME)),
AucRocMetric.Result::fromXContent),
new NamedXContentRegistry.Entry(
EvaluationMetric.Result.class,
new ParseField(registeredMetricName(BinarySoftClassification.NAME, PrecisionMetric.NAME)),
new ParseField(registeredMetricName(OutlierDetection.NAME, PrecisionMetric.NAME)),
PrecisionMetric.Result::fromXContent),
new NamedXContentRegistry.Entry(
EvaluationMetric.Result.class,
new ParseField(registeredMetricName(BinarySoftClassification.NAME, RecallMetric.NAME)),
new ParseField(registeredMetricName(OutlierDetection.NAME, RecallMetric.NAME)),
RecallMetric.Result::fromXContent),
new NamedXContentRegistry.Entry(
EvaluationMetric.Result.class,
new ParseField(registeredMetricName(BinarySoftClassification.NAME, ConfusionMatrixMetric.NAME)),
new ParseField(registeredMetricName(OutlierDetection.NAME, ConfusionMatrixMetric.NAME)),
ConfusionMatrixMetric.Result::fromXContent),
new NamedXContentRegistry.Entry(
EvaluationMetric.Result.class,

View File

@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.client.ml.dataframe.evaluation.EvaluationMetric;
import org.elasticsearch.common.ParseField;

View File

@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.client.ml.dataframe.evaluation.EvaluationMetric;
import org.elasticsearch.common.Nullable;

View File

@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.client.ml.dataframe.evaluation.EvaluationMetric;
import org.elasticsearch.common.ParseField;

View File

@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.client.ml.dataframe.evaluation.Evaluation;
import org.elasticsearch.client.ml.dataframe.evaluation.EvaluationMetric;
@ -38,24 +38,22 @@ import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constru
import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg;
/**
* Evaluation of binary soft classification methods, e.g. outlier detection.
* This is useful to evaluate problems where a model outputs a probability of whether
* a data frame row belongs to one of two groups.
* Evaluation of outlier detection results.
*/
public class BinarySoftClassification implements Evaluation {
public class OutlierDetection implements Evaluation {
public static final String NAME = "binary_soft_classification";
public static final String NAME = "outlier_detection";
private static final ParseField ACTUAL_FIELD = new ParseField("actual_field");
private static final ParseField PREDICTED_PROBABILITY_FIELD = new ParseField("predicted_probability_field");
private static final ParseField METRICS = new ParseField("metrics");
@SuppressWarnings("unchecked")
public static final ConstructingObjectParser<BinarySoftClassification, Void> PARSER =
public static final ConstructingObjectParser<OutlierDetection, Void> PARSER =
new ConstructingObjectParser<>(
NAME,
true,
args -> new BinarySoftClassification((String) args[0], (String) args[1], (List<EvaluationMetric>) args[2]));
args -> new OutlierDetection((String) args[0], (String) args[1], (List<EvaluationMetric>) args[2]));
static {
PARSER.declareString(constructorArg(), ACTUAL_FIELD);
@ -64,7 +62,7 @@ public class BinarySoftClassification implements Evaluation {
optionalConstructorArg(), (p, c, n) -> p.namedObject(EvaluationMetric.class, registeredMetricName(NAME, n), null), METRICS);
}
public static BinarySoftClassification fromXContent(XContentParser parser) {
public static OutlierDetection fromXContent(XContentParser parser) {
return PARSER.apply(parser, null);
}
@ -84,16 +82,16 @@ public class BinarySoftClassification implements Evaluation {
*/
private final List<EvaluationMetric> metrics;
public BinarySoftClassification(String actualField, String predictedField) {
public OutlierDetection(String actualField, String predictedField) {
this(actualField, predictedField, (List<EvaluationMetric>)null);
}
public BinarySoftClassification(String actualField, String predictedProbabilityField, EvaluationMetric... metric) {
public OutlierDetection(String actualField, String predictedProbabilityField, EvaluationMetric... metric) {
this(actualField, predictedProbabilityField, Arrays.asList(metric));
}
public BinarySoftClassification(String actualField, String predictedProbabilityField,
@Nullable List<EvaluationMetric> metrics) {
public OutlierDetection(String actualField, String predictedProbabilityField,
@Nullable List<EvaluationMetric> metrics) {
this.actualField = Objects.requireNonNull(actualField);
this.predictedProbabilityField = Objects.requireNonNull(predictedProbabilityField);
if (metrics != null) {
@ -129,7 +127,7 @@ public class BinarySoftClassification implements Evaluation {
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
BinarySoftClassification that = (BinarySoftClassification) o;
OutlierDetection that = (OutlierDetection) o;
return Objects.equals(actualField, that.actualField)
&& Objects.equals(predictedProbabilityField, that.predictedProbabilityField)
&& Objects.equals(metrics, that.metrics);

View File

@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.client.ml.dataframe.evaluation.EvaluationMetric;
import org.elasticsearch.common.Strings;

View File

@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.client.ml.dataframe.evaluation.EvaluationMetric;
import org.elasticsearch.common.Strings;

View File

@ -135,22 +135,21 @@ import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsDest;
import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsSource;
import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsState;
import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsStats;
import org.elasticsearch.client.ml.dataframe.OutlierDetection;
import org.elasticsearch.client.ml.dataframe.PhaseProgress;
import org.elasticsearch.client.ml.dataframe.QueryConfig;
import org.elasticsearch.client.ml.dataframe.evaluation.classification.AccuracyMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.classification.Classification;
import org.elasticsearch.client.ml.dataframe.evaluation.classification.MulticlassConfusionMatrixMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.AucRocMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.ConfusionMatrixMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.OutlierDetection;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.PrecisionMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.RecallMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.regression.HuberMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.regression.MeanSquaredErrorMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.regression.MeanSquaredLogarithmicErrorMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.regression.HuberMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.regression.RSquaredMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.regression.Regression;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.AucRocMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.BinarySoftClassification;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetric;
import org.elasticsearch.client.ml.dataframe.explain.FieldSelection;
import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation;
import org.elasticsearch.client.ml.dataframe.stats.common.DataCounts;
@ -1316,7 +1315,7 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
.setDest(DataFrameAnalyticsDest.builder()
.setIndex("put-test-dest-index")
.build())
.setAnalysis(OutlierDetection.createDefault())
.setAnalysis(org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault())
.setDescription("some description")
.build();
@ -1331,7 +1330,7 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
assertThat(createdConfig.getSource().getQueryConfig(), equalTo(new QueryConfig(new MatchAllQueryBuilder()))); // default value
assertThat(createdConfig.getDest().getIndex(), equalTo(config.getDest().getIndex()));
assertThat(createdConfig.getDest().getResultsField(), equalTo("ml")); // default value
assertThat(createdConfig.getAnalysis(), equalTo(OutlierDetection.builder()
assertThat(createdConfig.getAnalysis(), equalTo(org.elasticsearch.client.ml.dataframe.OutlierDetection.builder()
.setComputeFeatureInfluence(true)
.setOutlierFraction(0.05)
.setStandardizationEnabled(true).build()));
@ -1468,7 +1467,7 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
.setDest(DataFrameAnalyticsDest.builder()
.setIndex("get-test-dest-index")
.build())
.setAnalysis(OutlierDetection.createDefault())
.setAnalysis(org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault())
.build();
createIndex("get-test-source-index", defaultMappingForTest());
@ -1502,7 +1501,7 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
.setDest(DataFrameAnalyticsDest.builder()
.setIndex("get-test-dest-index")
.build())
.setAnalysis(OutlierDetection.createDefault())
.setAnalysis(org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault())
.build();
PutDataFrameAnalyticsResponse putDataFrameAnalyticsResponse = execute(
@ -1572,7 +1571,7 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
.setDest(DataFrameAnalyticsDest.builder()
.setIndex(destIndex)
.build())
.setAnalysis(OutlierDetection.createDefault())
.setAnalysis(org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault())
.build();
execute(
@ -1625,7 +1624,7 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
.setDest(DataFrameAnalyticsDest.builder()
.setIndex(destIndex)
.build())
.setAnalysis(OutlierDetection.createDefault())
.setAnalysis(org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault())
.build();
execute(
@ -1666,7 +1665,7 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
.setDest(DataFrameAnalyticsDest.builder()
.setIndex(destIndex)
.build())
.setAnalysis(OutlierDetection.createDefault())
.setAnalysis(org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault())
.build();
execute(
@ -1708,7 +1707,7 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
.setDest(DataFrameAnalyticsDest.builder()
.setIndex("delete-test-dest-index")
.build())
.setAnalysis(OutlierDetection.createDefault())
.setAnalysis(org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault())
.build();
createIndex("delete-test-source-index", defaultMappingForTest());
@ -1750,21 +1749,21 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
assertThat(exception.status().getStatus(), equalTo(404));
}
public void testEvaluateDataFrame_BinarySoftClassification() throws IOException {
public void testEvaluateDataFrame_OutlierDetection() throws IOException {
String indexName = "evaluate-test-index";
createIndex(indexName, mappingForSoftClassification());
createIndex(indexName, mappingForOutlierDetection());
BulkRequest bulk = new BulkRequest()
.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE)
.add(docForSoftClassification(indexName, "blue", false, 0.1)) // #0
.add(docForSoftClassification(indexName, "blue", false, 0.2)) // #1
.add(docForSoftClassification(indexName, "blue", false, 0.3)) // #2
.add(docForSoftClassification(indexName, "blue", false, 0.4)) // #3
.add(docForSoftClassification(indexName, "blue", false, 0.7)) // #4
.add(docForSoftClassification(indexName, "blue", true, 0.2)) // #5
.add(docForSoftClassification(indexName, "green", true, 0.3)) // #6
.add(docForSoftClassification(indexName, "green", true, 0.4)) // #7
.add(docForSoftClassification(indexName, "green", true, 0.8)) // #8
.add(docForSoftClassification(indexName, "green", true, 0.9)); // #9
.add(docForOutlierDetection(indexName, "blue", false, 0.1)) // #0
.add(docForOutlierDetection(indexName, "blue", false, 0.2)) // #1
.add(docForOutlierDetection(indexName, "blue", false, 0.3)) // #2
.add(docForOutlierDetection(indexName, "blue", false, 0.4)) // #3
.add(docForOutlierDetection(indexName, "blue", false, 0.7)) // #4
.add(docForOutlierDetection(indexName, "blue", true, 0.2)) // #5
.add(docForOutlierDetection(indexName, "green", true, 0.3)) // #6
.add(docForOutlierDetection(indexName, "green", true, 0.4)) // #7
.add(docForOutlierDetection(indexName, "green", true, 0.8)) // #8
.add(docForOutlierDetection(indexName, "green", true, 0.9)); // #9
highLevelClient().bulk(bulk, RequestOptions.DEFAULT);
MachineLearningClient machineLearningClient = highLevelClient().machineLearning();
@ -1772,14 +1771,14 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
new EvaluateDataFrameRequest(
indexName,
null,
new BinarySoftClassification(
new OutlierDetection(
actualField,
probabilityField,
PrecisionMetric.at(0.4, 0.5, 0.6), RecallMetric.at(0.5, 0.7), ConfusionMatrixMetric.at(0.5), AucRocMetric.withCurve()));
EvaluateDataFrameResponse evaluateDataFrameResponse =
execute(evaluateDataFrameRequest, machineLearningClient::evaluateDataFrame, machineLearningClient::evaluateDataFrameAsync);
assertThat(evaluateDataFrameResponse.getEvaluationName(), equalTo(BinarySoftClassification.NAME));
assertThat(evaluateDataFrameResponse.getEvaluationName(), equalTo(OutlierDetection.NAME));
assertThat(evaluateDataFrameResponse.getMetrics().size(), equalTo(4));
PrecisionMetric.Result precisionResult = evaluateDataFrameResponse.getMetricByName(PrecisionMetric.NAME);
@ -1824,21 +1823,21 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
assertThat(curvePointAtThreshold1.getThreshold(), equalTo(1.0));
}
public void testEvaluateDataFrame_BinarySoftClassification_WithQuery() throws IOException {
public void testEvaluateDataFrame_OutlierDetection_WithQuery() throws IOException {
String indexName = "evaluate-with-query-test-index";
createIndex(indexName, mappingForSoftClassification());
createIndex(indexName, mappingForOutlierDetection());
BulkRequest bulk = new BulkRequest()
.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE)
.add(docForSoftClassification(indexName, "blue", true, 1.0)) // #0
.add(docForSoftClassification(indexName, "blue", true, 1.0)) // #1
.add(docForSoftClassification(indexName, "blue", true, 1.0)) // #2
.add(docForSoftClassification(indexName, "blue", true, 1.0)) // #3
.add(docForSoftClassification(indexName, "blue", true, 0.0)) // #4
.add(docForSoftClassification(indexName, "blue", true, 0.0)) // #5
.add(docForSoftClassification(indexName, "green", true, 0.0)) // #6
.add(docForSoftClassification(indexName, "green", true, 0.0)) // #7
.add(docForSoftClassification(indexName, "green", true, 0.0)) // #8
.add(docForSoftClassification(indexName, "green", true, 1.0)); // #9
.add(docForOutlierDetection(indexName, "blue", true, 1.0)) // #0
.add(docForOutlierDetection(indexName, "blue", true, 1.0)) // #1
.add(docForOutlierDetection(indexName, "blue", true, 1.0)) // #2
.add(docForOutlierDetection(indexName, "blue", true, 1.0)) // #3
.add(docForOutlierDetection(indexName, "blue", true, 0.0)) // #4
.add(docForOutlierDetection(indexName, "blue", true, 0.0)) // #5
.add(docForOutlierDetection(indexName, "green", true, 0.0)) // #6
.add(docForOutlierDetection(indexName, "green", true, 0.0)) // #7
.add(docForOutlierDetection(indexName, "green", true, 0.0)) // #8
.add(docForOutlierDetection(indexName, "green", true, 1.0)); // #9
highLevelClient().bulk(bulk, RequestOptions.DEFAULT);
MachineLearningClient machineLearningClient = highLevelClient().machineLearning();
@ -1847,11 +1846,11 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
indexName,
// Request only "blue" subset to be evaluated
new QueryConfig(QueryBuilders.termQuery(datasetField, "blue")),
new BinarySoftClassification(actualField, probabilityField, ConfusionMatrixMetric.at(0.5)));
new OutlierDetection(actualField, probabilityField, ConfusionMatrixMetric.at(0.5)));
EvaluateDataFrameResponse evaluateDataFrameResponse =
execute(evaluateDataFrameRequest, machineLearningClient::evaluateDataFrame, machineLearningClient::evaluateDataFrameAsync);
assertThat(evaluateDataFrameResponse.getEvaluationName(), equalTo(BinarySoftClassification.NAME));
assertThat(evaluateDataFrameResponse.getEvaluationName(), equalTo(OutlierDetection.NAME));
assertThat(evaluateDataFrameResponse.getMetrics().size(), equalTo(1));
ConfusionMatrixMetric.Result confusionMatrixResult = evaluateDataFrameResponse.getMetricByName(ConfusionMatrixMetric.NAME);
@ -2123,7 +2122,7 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
private static final String actualField = "label";
private static final String probabilityField = "p";
private static XContentBuilder mappingForSoftClassification() throws IOException {
private static XContentBuilder mappingForOutlierDetection() throws IOException {
return XContentFactory.jsonBuilder().startObject()
.startObject("properties")
.startObject(datasetField)
@ -2139,7 +2138,7 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
.endObject();
}
private static IndexRequest docForSoftClassification(String indexName, String dataset, boolean isTrue, double p) {
private static IndexRequest docForOutlierDetection(String indexName, String dataset, boolean isTrue, double p) {
return new IndexRequest()
.index(indexName)
.source(XContentType.JSON, datasetField, dataset, actualField, Boolean.toString(isTrue), probabilityField, p);
@ -2195,11 +2194,11 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
public void testExplainDataFrameAnalytics() throws IOException {
String indexName = "explain-df-test-index";
createIndex(indexName, mappingForSoftClassification());
createIndex(indexName, mappingForOutlierDetection());
BulkRequest bulk1 = new BulkRequest()
.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
for (int i = 0; i < 10; ++i) {
bulk1.add(docForSoftClassification(indexName, randomAlphaOfLength(10), randomBoolean(), randomDoubleBetween(0.0, 1.0, true)));
bulk1.add(docForOutlierDetection(indexName, randomAlphaOfLength(10), randomBoolean(), randomDoubleBetween(0.0, 1.0, true)));
}
highLevelClient().bulk(bulk1, RequestOptions.DEFAULT);
@ -2208,7 +2207,7 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
new ExplainDataFrameAnalyticsRequest(
DataFrameAnalyticsConfig.builder()
.setSource(DataFrameAnalyticsSource.builder().setIndex(indexName).build())
.setAnalysis(OutlierDetection.createDefault())
.setAnalysis(org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault())
.build());
// We are pretty liberal here as this test does not aim at verifying concrete numbers but rather end-to-end user workflow.
@ -2230,7 +2229,7 @@ public class MachineLearningIT extends ESRestHighLevelClientTestCase {
BulkRequest bulk2 = new BulkRequest()
.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
for (int i = 10; i < 100; ++i) {
bulk2.add(docForSoftClassification(indexName, randomAlphaOfLength(10), randomBoolean(), randomDoubleBetween(0.0, 1.0, true)));
bulk2.add(docForOutlierDetection(indexName, randomAlphaOfLength(10), randomBoolean(), randomDoubleBetween(0.0, 1.0, true)));
}
highLevelClient().bulk(bulk2, RequestOptions.DEFAULT);

View File

@ -56,7 +56,6 @@ import org.elasticsearch.client.indexlifecycle.SetPriorityAction;
import org.elasticsearch.client.indexlifecycle.ShrinkAction;
import org.elasticsearch.client.indexlifecycle.UnfollowAction;
import org.elasticsearch.client.ml.dataframe.DataFrameAnalysis;
import org.elasticsearch.client.ml.dataframe.OutlierDetection;
import org.elasticsearch.client.ml.dataframe.evaluation.classification.AccuracyMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.classification.Classification;
import org.elasticsearch.client.ml.dataframe.evaluation.classification.MulticlassConfusionMatrixMetric;
@ -65,11 +64,11 @@ import org.elasticsearch.client.ml.dataframe.evaluation.regression.MeanSquaredLo
import org.elasticsearch.client.ml.dataframe.evaluation.regression.HuberMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.regression.RSquaredMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.regression.Regression;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.AucRocMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.BinarySoftClassification;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.AucRocMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.OutlierDetection;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.ConfusionMatrixMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.PrecisionMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.RecallMetric;
import org.elasticsearch.client.ml.dataframe.stats.classification.ClassificationStats;
import org.elasticsearch.client.ml.dataframe.stats.outlierdetection.OutlierDetectionStats;
import org.elasticsearch.client.ml.dataframe.stats.regression.RegressionStats;
@ -742,7 +741,7 @@ public class RestHighLevelClientTests extends ESTestCase {
assertTrue(names.contains(FreezeAction.NAME));
assertTrue(names.contains(SetPriorityAction.NAME));
assertEquals(Integer.valueOf(3), categories.get(DataFrameAnalysis.class));
assertTrue(names.contains(OutlierDetection.NAME.getPreferredName()));
assertTrue(names.contains(org.elasticsearch.client.ml.dataframe.OutlierDetection.NAME.getPreferredName()));
assertTrue(names.contains(org.elasticsearch.client.ml.dataframe.Regression.NAME.getPreferredName()));
assertTrue(names.contains(org.elasticsearch.client.ml.dataframe.Classification.NAME.getPreferredName()));
assertTrue(names.contains(OutlierDetectionStats.NAME.getPreferredName()));
@ -751,14 +750,14 @@ public class RestHighLevelClientTests extends ESTestCase {
assertEquals(Integer.valueOf(1), categories.get(SyncConfig.class));
assertTrue(names.contains(TimeSyncConfig.NAME));
assertEquals(Integer.valueOf(3), categories.get(org.elasticsearch.client.ml.dataframe.evaluation.Evaluation.class));
assertThat(names, hasItems(BinarySoftClassification.NAME, Classification.NAME, Regression.NAME));
assertThat(names, hasItems(OutlierDetection.NAME, Classification.NAME, Regression.NAME));
assertEquals(Integer.valueOf(12), categories.get(org.elasticsearch.client.ml.dataframe.evaluation.EvaluationMetric.class));
assertThat(names,
hasItems(
registeredMetricName(BinarySoftClassification.NAME, AucRocMetric.NAME),
registeredMetricName(BinarySoftClassification.NAME, PrecisionMetric.NAME),
registeredMetricName(BinarySoftClassification.NAME, RecallMetric.NAME),
registeredMetricName(BinarySoftClassification.NAME, ConfusionMatrixMetric.NAME),
registeredMetricName(OutlierDetection.NAME, AucRocMetric.NAME),
registeredMetricName(OutlierDetection.NAME, PrecisionMetric.NAME),
registeredMetricName(OutlierDetection.NAME, RecallMetric.NAME),
registeredMetricName(OutlierDetection.NAME, ConfusionMatrixMetric.NAME),
registeredMetricName(Classification.NAME, AccuracyMetric.NAME),
registeredMetricName(
Classification.NAME, org.elasticsearch.client.ml.dataframe.evaluation.classification.PrecisionMetric.NAME),
@ -772,10 +771,10 @@ public class RestHighLevelClientTests extends ESTestCase {
assertEquals(Integer.valueOf(12), categories.get(org.elasticsearch.client.ml.dataframe.evaluation.EvaluationMetric.Result.class));
assertThat(names,
hasItems(
registeredMetricName(BinarySoftClassification.NAME, AucRocMetric.NAME),
registeredMetricName(BinarySoftClassification.NAME, PrecisionMetric.NAME),
registeredMetricName(BinarySoftClassification.NAME, RecallMetric.NAME),
registeredMetricName(BinarySoftClassification.NAME, ConfusionMatrixMetric.NAME),
registeredMetricName(OutlierDetection.NAME, AucRocMetric.NAME),
registeredMetricName(OutlierDetection.NAME, PrecisionMetric.NAME),
registeredMetricName(OutlierDetection.NAME, RecallMetric.NAME),
registeredMetricName(OutlierDetection.NAME, ConfusionMatrixMetric.NAME),
registeredMetricName(Classification.NAME, AccuracyMetric.NAME),
registeredMetricName(
Classification.NAME, org.elasticsearch.client.ml.dataframe.evaluation.classification.PrecisionMetric.NAME),

View File

@ -151,7 +151,6 @@ import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsDest;
import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsSource;
import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsState;
import org.elasticsearch.client.ml.dataframe.DataFrameAnalyticsStats;
import org.elasticsearch.client.ml.dataframe.OutlierDetection;
import org.elasticsearch.client.ml.dataframe.QueryConfig;
import org.elasticsearch.client.ml.dataframe.Regression;
import org.elasticsearch.client.ml.dataframe.evaluation.Evaluation;
@ -160,16 +159,16 @@ import org.elasticsearch.client.ml.dataframe.evaluation.classification.AccuracyM
import org.elasticsearch.client.ml.dataframe.evaluation.classification.MulticlassConfusionMatrixMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.classification.MulticlassConfusionMatrixMetric.ActualClass;
import org.elasticsearch.client.ml.dataframe.evaluation.classification.MulticlassConfusionMatrixMetric.PredictedClass;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.AucRocMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.ConfusionMatrixMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.ConfusionMatrixMetric.ConfusionMatrix;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.OutlierDetection;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.PrecisionMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.RecallMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.regression.HuberMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.regression.MeanSquaredErrorMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.regression.MeanSquaredLogarithmicErrorMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.regression.HuberMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.regression.RSquaredMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.AucRocMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.BinarySoftClassification;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetric.ConfusionMatrix;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetric;
import org.elasticsearch.client.ml.dataframe.explain.FieldSelection;
import org.elasticsearch.client.ml.dataframe.explain.MemoryEstimation;
import org.elasticsearch.client.ml.filestructurefinder.FileStructure;
@ -2977,12 +2976,12 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
// end::put-data-frame-analytics-dest-config
// tag::put-data-frame-analytics-outlier-detection-default
DataFrameAnalysis outlierDetection = OutlierDetection.createDefault(); // <1>
DataFrameAnalysis outlierDetection = org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault(); // <1>
// end::put-data-frame-analytics-outlier-detection-default
// tag::put-data-frame-analytics-outlier-detection-customized
DataFrameAnalysis outlierDetectionCustomized = OutlierDetection.builder() // <1>
.setMethod(OutlierDetection.Method.DISTANCE_KNN) // <2>
DataFrameAnalysis outlierDetectionCustomized = org.elasticsearch.client.ml.dataframe.OutlierDetection.builder() // <1>
.setMethod(org.elasticsearch.client.ml.dataframe.OutlierDetection.Method.DISTANCE_KNN) // <2>
.setNNeighbors(5) // <3>
.setFeatureInfluenceThreshold(0.1) // <4>
.setComputeFeatureInfluence(true) // <5>
@ -3351,9 +3350,9 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
client.indices().create(createIndexRequest, RequestOptions.DEFAULT);
client.bulk(bulkRequest, RequestOptions.DEFAULT);
{
// tag::evaluate-data-frame-evaluation-softclassification
// tag::evaluate-data-frame-evaluation-outlierdetection
Evaluation evaluation =
new BinarySoftClassification( // <1>
new OutlierDetection( // <1>
"label", // <2>
"p", // <3>
// Evaluation metrics // <4>
@ -3361,7 +3360,7 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
RecallMetric.at(0.5, 0.7), // <6>
ConfusionMatrixMetric.at(0.5), // <7>
AucRocMetric.withCurve()); // <8>
// end::evaluate-data-frame-evaluation-softclassification
// end::evaluate-data-frame-evaluation-outlierdetection
// tag::evaluate-data-frame-request
EvaluateDataFrameRequest request =
@ -3379,13 +3378,13 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
List<EvaluationMetric.Result> metrics = response.getMetrics(); // <1>
// end::evaluate-data-frame-response
// tag::evaluate-data-frame-results-softclassification
// tag::evaluate-data-frame-results-outlierdetection
PrecisionMetric.Result precisionResult = response.getMetricByName(PrecisionMetric.NAME); // <1>
double precision = precisionResult.getScoreByThreshold("0.4"); // <2>
ConfusionMatrixMetric.Result confusionMatrixResult = response.getMetricByName(ConfusionMatrixMetric.NAME); // <3>
ConfusionMatrix confusionMatrix = confusionMatrixResult.getScoreByThreshold("0.5"); // <4>
// end::evaluate-data-frame-results-softclassification
// end::evaluate-data-frame-results-outlierdetection
assertThat(
metrics.stream().map(EvaluationMetric.Result::getMetricName).collect(Collectors.toList()),
@ -3400,7 +3399,7 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
EvaluateDataFrameRequest request = new EvaluateDataFrameRequest(
indexName,
new QueryConfig(QueryBuilders.termQuery("dataset", "blue")),
new BinarySoftClassification(
new OutlierDetection(
"label",
"p",
PrecisionMetric.at(0.4, 0.5, 0.6),
@ -3622,7 +3621,7 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
// tag::explain-data-frame-analytics-config-request
DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder()
.setSource(DataFrameAnalyticsSource.builder().setIndex("explain-df-test-source-index").build())
.setAnalysis(OutlierDetection.createDefault())
.setAnalysis(org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault())
.build();
request = new ExplainDataFrameAnalyticsRequest(config); // <1>
// end::explain-data-frame-analytics-config-request
@ -3652,7 +3651,7 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
{
DataFrameAnalyticsConfig config = DataFrameAnalyticsConfig.builder()
.setSource(DataFrameAnalyticsSource.builder().setIndex("explain-df-test-source-index").build())
.setAnalysis(OutlierDetection.createDefault())
.setAnalysis(org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault())
.build();
ExplainDataFrameAnalyticsRequest request = new ExplainDataFrameAnalyticsRequest(config);
// tag::explain-data-frame-analytics-execute-listener
@ -4357,6 +4356,6 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
.setDest(DataFrameAnalyticsDest.builder()
.setIndex("put-test-dest-index")
.build())
.setAnalysis(OutlierDetection.createDefault())
.setAnalysis(org.elasticsearch.client.ml.dataframe.OutlierDetection.createDefault())
.build();
}

View File

@ -21,8 +21,9 @@ package org.elasticsearch.client.ml;
import org.elasticsearch.client.ml.dataframe.QueryConfig;
import org.elasticsearch.client.ml.dataframe.evaluation.Evaluation;
import org.elasticsearch.client.ml.dataframe.evaluation.MlEvaluationNamedXContentProvider;
import org.elasticsearch.client.ml.dataframe.evaluation.classification.ClassificationTests;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.OutlierDetectionTests;
import org.elasticsearch.client.ml.dataframe.evaluation.regression.RegressionTests;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.BinarySoftClassificationTests;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.common.xcontent.XContentParser;
@ -47,7 +48,8 @@ public class EvaluateDataFrameRequestTests extends AbstractXContentTestCase<Eval
QueryConfig queryConfig = randomBoolean()
? new QueryConfig(QueryBuilders.termQuery(randomAlphaOfLength(10), randomAlphaOfLength(10)))
: null;
Evaluation evaluation = randomBoolean() ? BinarySoftClassificationTests.createRandom() : RegressionTests.createRandom();
Evaluation evaluation =
randomFrom(OutlierDetectionTests.createRandom(), ClassificationTests.createRandom(), RegressionTests.createRandom());
return new EvaluateDataFrameRequest(indices, queryConfig, evaluation);
}

View File

@ -26,11 +26,11 @@ import org.elasticsearch.client.ml.dataframe.evaluation.classification.Multiclas
import org.elasticsearch.client.ml.dataframe.evaluation.regression.MeanSquaredErrorMetricResultTests;
import org.elasticsearch.client.ml.dataframe.evaluation.regression.RSquaredMetricResultTests;
import org.elasticsearch.client.ml.dataframe.evaluation.regression.Regression;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.AucRocMetricResultTests;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.BinarySoftClassification;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetricResultTests;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.PrecisionMetricResultTests;
import org.elasticsearch.client.ml.dataframe.evaluation.softclassification.RecallMetricResultTests;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.AucRocMetricResultTests;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.OutlierDetection;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.ConfusionMatrixMetricResultTests;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.PrecisionMetricResultTests;
import org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.RecallMetricResultTests;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.AbstractXContentTestCase;
@ -43,10 +43,10 @@ import java.util.function.Predicate;
public class EvaluateDataFrameResponseTests extends AbstractXContentTestCase<EvaluateDataFrameResponse> {
public static EvaluateDataFrameResponse randomResponse() {
String evaluationName = randomFrom(BinarySoftClassification.NAME, Classification.NAME, Regression.NAME);
String evaluationName = randomFrom(OutlierDetection.NAME, Classification.NAME, Regression.NAME);
List<EvaluationMetric.Result> metrics;
switch (evaluationName) {
case BinarySoftClassification.NAME:
case OutlierDetection.NAME:
metrics = randomSubsetOf(
Arrays.asList(
AucRocMetricResultTests.randomResult(),

View File

@ -36,7 +36,7 @@ public class ClassificationTests extends AbstractXContentTestCase<Classification
return new NamedXContentRegistry(new MlEvaluationNamedXContentProvider().getNamedXContentParsers());
}
static Classification createRandom() {
public static Classification createRandom() {
List<EvaluationMetric> metrics =
randomSubsetOf(
Arrays.asList(

View File

@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.AbstractXContentTestCase;

View File

@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.AbstractXContentTestCase;
@ -26,15 +26,13 @@ import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.elasticsearch.client.ml.dataframe.evaluation.softclassification.AucRocMetricAucRocPointTests.randomPoint;
public class AucRocMetricResultTests extends AbstractXContentTestCase<AucRocMetric.Result> {
public static AucRocMetric.Result randomResult() {
return new AucRocMetric.Result(
randomDouble(),
Stream
.generate(() -> randomPoint())
.generate(AucRocMetricAucRocPointTests::randomPoint)
.limit(randomIntBetween(1, 10))
.collect(Collectors.toList()));
}

View File

@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.AbstractXContentTestCase;

View File

@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.AbstractXContentTestCase;
@ -26,7 +26,7 @@ import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.elasticsearch.client.ml.dataframe.evaluation.softclassification.ConfusionMatrixMetricConfusionMatrixTests.randomConfusionMatrix;
import static org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection.ConfusionMatrixMetricConfusionMatrixTests.randomConfusionMatrix;
public class ConfusionMatrixMetricResultTests extends AbstractXContentTestCase<ConfusionMatrixMetric.Result> {

View File

@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.client.ml.dataframe.evaluation.EvaluationMetric;
import org.elasticsearch.client.ml.dataframe.evaluation.MlEvaluationNamedXContentProvider;
@ -30,14 +30,14 @@ import java.util.Arrays;
import java.util.List;
import java.util.function.Predicate;
public class BinarySoftClassificationTests extends AbstractXContentTestCase<BinarySoftClassification> {
public class OutlierDetectionTests extends AbstractXContentTestCase<OutlierDetection> {
@Override
protected NamedXContentRegistry xContentRegistry() {
return new NamedXContentRegistry(new MlEvaluationNamedXContentProvider().getNamedXContentParsers());
}
public static BinarySoftClassification createRandom() {
public static OutlierDetection createRandom() {
List<EvaluationMetric> metrics = new ArrayList<>();
if (randomBoolean()) {
metrics.add(new AucRocMetric(randomBoolean()));
@ -46,33 +46,33 @@ public class BinarySoftClassificationTests extends AbstractXContentTestCase<Bina
metrics.add(new PrecisionMetric(Arrays.asList(randomArray(1,
4,
Double[]::new,
BinarySoftClassificationTests::randomDouble))));
OutlierDetectionTests::randomDouble))));
}
if (randomBoolean()) {
metrics.add(new RecallMetric(Arrays.asList(randomArray(1,
4,
Double[]::new,
BinarySoftClassificationTests::randomDouble))));
OutlierDetectionTests::randomDouble))));
}
if (randomBoolean()) {
metrics.add(new ConfusionMatrixMetric(Arrays.asList(randomArray(1,
4,
Double[]::new,
BinarySoftClassificationTests::randomDouble))));
OutlierDetectionTests::randomDouble))));
}
return randomBoolean() ?
new BinarySoftClassification(randomAlphaOfLength(10), randomAlphaOfLength(10)) :
new BinarySoftClassification(randomAlphaOfLength(10), randomAlphaOfLength(10), metrics.isEmpty() ? null : metrics);
new OutlierDetection(randomAlphaOfLength(10), randomAlphaOfLength(10)) :
new OutlierDetection(randomAlphaOfLength(10), randomAlphaOfLength(10), metrics.isEmpty() ? null : metrics);
}
@Override
protected BinarySoftClassification createTestInstance() {
protected OutlierDetection createTestInstance() {
return createRandom();
}
@Override
protected BinarySoftClassification doParseInstance(XContentParser parser) throws IOException {
return BinarySoftClassification.fromXContent(parser);
protected OutlierDetection doParseInstance(XContentParser parser) throws IOException {
return OutlierDetection.fromXContent(parser);
}
@Override

View File

@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.AbstractXContentTestCase;

View File

@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.client.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.AbstractXContentTestCase;

View File

@ -25,13 +25,13 @@ include-tagged::{doc-tests-file}[{api}-request]
==== Evaluation
Evaluation to be performed.
Currently, supported evaluations include: +BinarySoftClassification+, +Classification+, +Regression+.
Currently, supported evaluations include: +OutlierDetection+, +Classification+, +Regression+.
===== Binary soft classification
===== Outlier detection
["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{doc-tests-file}[{api}-evaluation-softclassification]
include-tagged::{doc-tests-file}[{api}-evaluation-outlierdetection]
--------------------------------------------------
<1> Constructing a new evaluation
<2> Name of the field in the index. Its value denotes the actual (i.e. ground truth) label for an example. Must be either true or false.
@ -87,11 +87,11 @@ include-tagged::{doc-tests-file}[{api}-response]
==== Results
===== Binary soft classification
===== Outlier detection
["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{doc-tests-file}[{api}-results-softclassification]
include-tagged::{doc-tests-file}[{api}-results-outlierdetection]
--------------------------------------------------
<1> Fetching precision metric by name

View File

@ -48,7 +48,7 @@ See <<ml-evaluate-dfanalytics-resources>>.
--
Available evaluation types:
* `binary_soft_classification`
* `outlier_detection`
* `regression`
* `classification`
@ -65,13 +65,11 @@ source index. See <<query-dsl>>.
[[ml-evaluate-dfanalytics-resources]]
== {dfanalytics-cap} evaluation resources
[[binary-sc-resources]]
=== Binary soft classification evaluation objects
[[oldetection-resources]]
=== {oldetection-cap} evaluation objects
Binary soft classification evaluates the results of an analysis which outputs
the probability that each document belongs to a certain class. For example, in
the context of {oldetection}, the analysis outputs the probability whether each
document is an outlier.
{oldetection-cap} evaluates the results of an {oldetection} analysis which outputs
the probability that each document is an outlier.
`actual_field`::
(Required, string) The field of the `index` which contains the `ground truth`.
@ -179,8 +177,8 @@ belongs.
[[ml-evaluate-dfanalytics-results]]
== {api-response-body-title}
`binary_soft_classification`::
(object) If you chose to do binary soft classification, the API returns the
`outlier_detection`::
(object) If you chose to do outlier detection, the API returns the
following evaluation metrics:
`auc_roc`::: TBD
@ -197,8 +195,8 @@ belongs.
== {api-examples-title}
[[ml-evaluate-binary-soft-class-example]]
=== Binary soft classification
[[ml-evaluate-oldetection-example]]
=== {oldetection-cap}
[source,console]
--------------------------------------------------
@ -206,7 +204,7 @@ POST _ml/data_frame/_evaluate
{
"index": "my_analytics_dest_index",
"evaluation": {
"binary_soft_classification": {
"outlier_detection": {
"actual_field": "is_outlier",
"predicted_probability_field": "ml.outlier_score"
}
@ -220,7 +218,7 @@ The API returns the following results:
[source,console-result]
----
{
"binary_soft_classification": {
"outlier_detection": {
"auc_roc": {
"score": 0.92584757746414444
},

View File

@ -12,17 +12,17 @@ import org.elasticsearch.plugins.spi.NamedXContentProvider;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.classification.Accuracy;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.classification.Classification;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.classification.MulticlassConfusionMatrix;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection.AucRoc;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection.ConfusionMatrix;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection.OutlierDetection;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection.Precision;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection.Recall;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection.ScoreByThresholdResult;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.regression.Huber;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.regression.MeanSquaredError;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.regression.MeanSquaredLogarithmicError;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.regression.RSquared;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.regression.Regression;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.AucRoc;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.BinarySoftClassification;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.ConfusionMatrix;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.Precision;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.Recall;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.ScoreByThresholdResult;
import java.util.Arrays;
import java.util.List;
@ -57,22 +57,22 @@ public class MlEvaluationNamedXContentProvider implements NamedXContentProvider
public List<NamedXContentRegistry.Entry> getNamedXContentParsers() {
return Arrays.asList(
// Evaluations
new NamedXContentRegistry.Entry(Evaluation.class, BinarySoftClassification.NAME, BinarySoftClassification::fromXContent),
new NamedXContentRegistry.Entry(Evaluation.class, OutlierDetection.NAME, OutlierDetection::fromXContent),
new NamedXContentRegistry.Entry(Evaluation.class, Classification.NAME, Classification::fromXContent),
new NamedXContentRegistry.Entry(Evaluation.class, Regression.NAME, Regression::fromXContent),
// Soft classification metrics
// Outlier detection metrics
new NamedXContentRegistry.Entry(EvaluationMetric.class,
new ParseField(registeredMetricName(BinarySoftClassification.NAME, AucRoc.NAME)),
new ParseField(registeredMetricName(OutlierDetection.NAME, AucRoc.NAME)),
AucRoc::fromXContent),
new NamedXContentRegistry.Entry(EvaluationMetric.class,
new ParseField(registeredMetricName(BinarySoftClassification.NAME, Precision.NAME)),
new ParseField(registeredMetricName(OutlierDetection.NAME, Precision.NAME)),
Precision::fromXContent),
new NamedXContentRegistry.Entry(EvaluationMetric.class,
new ParseField(registeredMetricName(BinarySoftClassification.NAME, Recall.NAME)),
new ParseField(registeredMetricName(OutlierDetection.NAME, Recall.NAME)),
Recall::fromXContent),
new NamedXContentRegistry.Entry(EvaluationMetric.class,
new ParseField(registeredMetricName(BinarySoftClassification.NAME, ConfusionMatrix.NAME)),
new ParseField(registeredMetricName(OutlierDetection.NAME, ConfusionMatrix.NAME)),
ConfusionMatrix::fromXContent),
// Classification metrics
@ -113,8 +113,8 @@ public class MlEvaluationNamedXContentProvider implements NamedXContentProvider
return Arrays.asList(
// Evaluations
new NamedWriteableRegistry.Entry(Evaluation.class,
BinarySoftClassification.NAME.getPreferredName(),
BinarySoftClassification::new),
OutlierDetection.NAME.getPreferredName(),
OutlierDetection::new),
new NamedWriteableRegistry.Entry(Evaluation.class,
Classification.NAME.getPreferredName(),
Classification::new),
@ -124,16 +124,16 @@ public class MlEvaluationNamedXContentProvider implements NamedXContentProvider
// Evaluation metrics
new NamedWriteableRegistry.Entry(EvaluationMetric.class,
registeredMetricName(BinarySoftClassification.NAME, AucRoc.NAME),
registeredMetricName(OutlierDetection.NAME, AucRoc.NAME),
AucRoc::new),
new NamedWriteableRegistry.Entry(EvaluationMetric.class,
registeredMetricName(BinarySoftClassification.NAME, Precision.NAME),
registeredMetricName(OutlierDetection.NAME, Precision.NAME),
Precision::new),
new NamedWriteableRegistry.Entry(EvaluationMetric.class,
registeredMetricName(BinarySoftClassification.NAME, Recall.NAME),
registeredMetricName(OutlierDetection.NAME, Recall.NAME),
Recall::new),
new NamedWriteableRegistry.Entry(EvaluationMetric.class,
registeredMetricName(BinarySoftClassification.NAME, ConfusionMatrix.NAME),
registeredMetricName(OutlierDetection.NAME, ConfusionMatrix.NAME),
ConfusionMatrix::new),
new NamedWriteableRegistry.Entry(EvaluationMetric.class,
registeredMetricName(Classification.NAME, MulticlassConfusionMatrix.NAME),
@ -164,13 +164,13 @@ public class MlEvaluationNamedXContentProvider implements NamedXContentProvider
// Evaluation metrics results
new NamedWriteableRegistry.Entry(EvaluationMetricResult.class,
registeredMetricName(BinarySoftClassification.NAME, AucRoc.NAME),
registeredMetricName(OutlierDetection.NAME, AucRoc.NAME),
AucRoc.Result::new),
new NamedWriteableRegistry.Entry(EvaluationMetricResult.class,
registeredMetricName(BinarySoftClassification.NAME, ScoreByThresholdResult.NAME),
registeredMetricName(OutlierDetection.NAME, ScoreByThresholdResult.NAME),
ScoreByThresholdResult::new),
new NamedWriteableRegistry.Entry(EvaluationMetricResult.class,
registeredMetricName(BinarySoftClassification.NAME, ConfusionMatrix.NAME),
registeredMetricName(OutlierDetection.NAME, ConfusionMatrix.NAME),
ConfusionMatrix.Result::new),
new NamedWriteableRegistry.Entry(EvaluationMetricResult.class,
registeredMetricName(Classification.NAME, MulticlassConfusionMatrix.NAME),

View File

@ -3,7 +3,7 @@
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.collect.Tuple;
@ -27,7 +27,7 @@ import java.util.Collections;
import java.util.List;
import java.util.Optional;
import static org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.BinarySoftClassification.actualIsTrueQuery;
import static org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection.OutlierDetection.actualIsTrueQuery;
abstract class AbstractConfusionMatrixMetric implements EvaluationMetric {

View File

@ -3,7 +3,7 @@
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.Strings;
@ -38,7 +38,7 @@ import java.util.Optional;
import java.util.stream.IntStream;
import static org.elasticsearch.xpack.core.ml.dataframe.evaluation.MlEvaluationNamedXContentProvider.registeredMetricName;
import static org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.BinarySoftClassification.actualIsTrueQuery;
import static org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection.OutlierDetection.actualIsTrueQuery;
/**
* Area under the curve (AUC) of the receiver operating characteristic (ROC).
@ -93,7 +93,7 @@ public class AucRoc implements EvaluationMetric {
@Override
public String getWriteableName() {
return registeredMetricName(BinarySoftClassification.NAME, NAME);
return registeredMetricName(OutlierDetection.NAME, NAME);
}
@Override
@ -339,7 +339,7 @@ public class AucRoc implements EvaluationMetric {
@Override
public String getWriteableName() {
return registeredMetricName(BinarySoftClassification.NAME, NAME);
return registeredMetricName(OutlierDetection.NAME, NAME);
}
@Override

View File

@ -3,7 +3,7 @@
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput;
@ -48,7 +48,7 @@ public class ConfusionMatrix extends AbstractConfusionMatrixMetric {
@Override
public String getWriteableName() {
return registeredMetricName(BinarySoftClassification.NAME, NAME);
return registeredMetricName(OutlierDetection.NAME, NAME);
}
@Override
@ -131,7 +131,7 @@ public class ConfusionMatrix extends AbstractConfusionMatrixMetric {
@Override
public String getWriteableName() {
return registeredMetricName(BinarySoftClassification.NAME, NAME);
return registeredMetricName(OutlierDetection.NAME, NAME);
}
@Override

View File

@ -3,7 +3,7 @@
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.ParseField;
@ -26,20 +26,18 @@ import java.util.Objects;
import static org.elasticsearch.xpack.core.ml.dataframe.evaluation.MlEvaluationNamedXContentProvider.registeredMetricName;
/**
* Evaluation of binary soft classification methods, e.g. outlier detection.
* This is useful to evaluate problems where a model outputs a probability of whether
* a data frame row belongs to one of two groups.
* Evaluation of outlier detection results.
*/
public class BinarySoftClassification implements Evaluation {
public class OutlierDetection implements Evaluation {
public static final ParseField NAME = new ParseField("binary_soft_classification");
public static final ParseField NAME = new ParseField("outlier_detection", "binary_soft_classification");
private static final ParseField ACTUAL_FIELD = new ParseField("actual_field");
private static final ParseField PREDICTED_PROBABILITY_FIELD = new ParseField("predicted_probability_field");
private static final ParseField METRICS = new ParseField("metrics");
public static final ConstructingObjectParser<BinarySoftClassification, Void> PARSER = new ConstructingObjectParser<>(
NAME.getPreferredName(), a -> new BinarySoftClassification((String) a[0], (String) a[1], (List<EvaluationMetric>) a[2]));
public static final ConstructingObjectParser<OutlierDetection, Void> PARSER = new ConstructingObjectParser<>(
NAME.getPreferredName(), a -> new OutlierDetection((String) a[0], (String) a[1], (List<EvaluationMetric>) a[2]));
static {
PARSER.declareString(ConstructingObjectParser.constructorArg(), ACTUAL_FIELD);
@ -48,7 +46,7 @@ public class BinarySoftClassification implements Evaluation {
(p, c, n) -> p.namedObject(EvaluationMetric.class, registeredMetricName(NAME.getPreferredName(), n), c), METRICS);
}
public static BinarySoftClassification fromXContent(XContentParser parser) {
public static OutlierDetection fromXContent(XContentParser parser) {
return PARSER.apply(parser, null);
}
@ -72,11 +70,11 @@ public class BinarySoftClassification implements Evaluation {
*/
private final List<EvaluationMetric> metrics;
public BinarySoftClassification(String actualField, String predictedProbabilityField,
@Nullable List<EvaluationMetric> metrics) {
public OutlierDetection(String actualField, String predictedProbabilityField,
@Nullable List<EvaluationMetric> metrics) {
this.actualField = ExceptionsHelper.requireNonNull(actualField, ACTUAL_FIELD);
this.predictedProbabilityField = ExceptionsHelper.requireNonNull(predictedProbabilityField, PREDICTED_PROBABILITY_FIELD);
this.metrics = initMetrics(metrics, BinarySoftClassification::defaultMetrics);
this.metrics = initMetrics(metrics, OutlierDetection::defaultMetrics);
}
private static List<EvaluationMetric> defaultMetrics() {
@ -87,7 +85,7 @@ public class BinarySoftClassification implements Evaluation {
new ConfusionMatrix(Arrays.asList(0.25, 0.5, 0.75)));
}
public BinarySoftClassification(StreamInput in) throws IOException {
public OutlierDetection(StreamInput in) throws IOException {
this.actualField = in.readString();
this.predictedProbabilityField = in.readString();
this.metrics = in.readNamedWriteableList(EvaluationMetric.class);
@ -145,7 +143,7 @@ public class BinarySoftClassification implements Evaluation {
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
BinarySoftClassification that = (BinarySoftClassification) o;
OutlierDetection that = (OutlierDetection) o;
return Objects.equals(actualField, that.actualField)
&& Objects.equals(predictedProbabilityField, that.predictedProbabilityField)
&& Objects.equals(metrics, that.metrics);

View File

@ -3,7 +3,7 @@
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput;
@ -46,7 +46,7 @@ public class Precision extends AbstractConfusionMatrixMetric {
@Override
public String getWriteableName() {
return registeredMetricName(BinarySoftClassification.NAME, NAME);
return registeredMetricName(OutlierDetection.NAME, NAME);
}
@Override

View File

@ -3,7 +3,7 @@
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput;
@ -46,7 +46,7 @@ public class Recall extends AbstractConfusionMatrixMetric {
@Override
public String getWriteableName() {
return registeredMetricName(BinarySoftClassification.NAME, NAME);
return registeredMetricName(OutlierDetection.NAME, NAME);
}
@Override

View File

@ -3,7 +3,7 @@
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput;
@ -39,7 +39,7 @@ public class ScoreByThresholdResult implements EvaluationMetricResult {
@Override
public String getWriteableName() {
return registeredMetricName(BinarySoftClassification.NAME, NAME);
return registeredMetricName(OutlierDetection.NAME, NAME);
}
@Override

View File

@ -16,8 +16,9 @@ import org.elasticsearch.test.AbstractSerializingTestCase;
import org.elasticsearch.xpack.core.ml.action.EvaluateDataFrameAction.Request;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.Evaluation;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.MlEvaluationNamedXContentProvider;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.classification.ClassificationTests;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.regression.RegressionTests;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification.BinarySoftClassificationTests;
import org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection.OutlierDetectionTests;
import org.elasticsearch.xpack.core.ml.utils.QueryProvider;
import java.io.IOException;
@ -60,7 +61,8 @@ public class EvaluateDataFrameActionRequestTests extends AbstractSerializingTest
throw new UncheckedIOException(e);
}
}
Evaluation evaluation = randomBoolean() ? BinarySoftClassificationTests.createRandom() : RegressionTests.createRandom();
Evaluation evaluation =
randomFrom(OutlierDetectionTests.createRandom(), ClassificationTests.createRandom(), RegressionTests.createRandom());
return new Request()
.setIndices(indices)
.setQueryProvider(queryProvider)

View File

@ -3,7 +3,7 @@
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.XContentParser;

View File

@ -3,7 +3,7 @@
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.Writeable;

View File

@ -3,7 +3,7 @@
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.ElasticsearchStatusException;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
@ -27,7 +27,7 @@ import java.util.List;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
public class BinarySoftClassificationTests extends AbstractSerializingTestCase<BinarySoftClassification> {
public class OutlierDetectionTests extends AbstractSerializingTestCase<OutlierDetection> {
private static final EvaluationParameters EVALUATION_PARAMETERS = new EvaluationParameters(100);
@ -41,7 +41,7 @@ public class BinarySoftClassificationTests extends AbstractSerializingTestCase<B
return new NamedXContentRegistry(new MlEvaluationNamedXContentProvider().getNamedXContentParsers());
}
public static BinarySoftClassification createRandom() {
public static OutlierDetection createRandom() {
List<EvaluationMetric> metrics = new ArrayList<>();
if (randomBoolean()) {
metrics.add(AucRocTests.createRandom());
@ -62,28 +62,28 @@ public class BinarySoftClassificationTests extends AbstractSerializingTestCase<B
metrics.add(RecallTests.createRandom());
metrics.add(ConfusionMatrixTests.createRandom());
}
return new BinarySoftClassification(randomAlphaOfLength(10), randomAlphaOfLength(10), metrics);
return new OutlierDetection(randomAlphaOfLength(10), randomAlphaOfLength(10), metrics);
}
@Override
protected BinarySoftClassification doParseInstance(XContentParser parser) throws IOException {
return BinarySoftClassification.fromXContent(parser);
protected OutlierDetection doParseInstance(XContentParser parser) throws IOException {
return OutlierDetection.fromXContent(parser);
}
@Override
protected BinarySoftClassification createTestInstance() {
protected OutlierDetection createTestInstance() {
return createRandom();
}
@Override
protected Writeable.Reader<BinarySoftClassification> instanceReader() {
return BinarySoftClassification::new;
protected Writeable.Reader<OutlierDetection> instanceReader() {
return OutlierDetection::new;
}
public void testConstructor_GivenEmptyMetrics() {
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class,
() -> new BinarySoftClassification("foo", "bar", Collections.emptyList()));
assertThat(e.getMessage(), equalTo("[binary_soft_classification] must have one or more metrics"));
() -> new OutlierDetection("foo", "bar", Collections.emptyList()));
assertThat(e.getMessage(), equalTo("[outlier_detection] must have one or more metrics"));
}
public void testBuildSearch() {
@ -99,7 +99,7 @@ public class BinarySoftClassificationTests extends AbstractSerializingTestCase<B
.filter(QueryBuilders.termQuery("field_A", "some-value"))
.filter(QueryBuilders.termQuery("field_B", "some-other-value")));
BinarySoftClassification evaluation = new BinarySoftClassification("act", "prob", Arrays.asList(new Precision(Arrays.asList(0.7))));
OutlierDetection evaluation = new OutlierDetection("act", "prob", Arrays.asList(new Precision(Arrays.asList(0.7))));
SearchSourceBuilder searchSourceBuilder = evaluation.buildSearch(EVALUATION_PARAMETERS, userProvidedQuery);
assertThat(searchSourceBuilder.query(), equalTo(expectedSearchQuery));

View File

@ -3,7 +3,7 @@
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.Writeable;

View File

@ -3,7 +3,7 @@
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.softclassification;
package org.elasticsearch.xpack.core.ml.dataframe.evaluation.outlierdetection;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.Writeable;

View File

@ -104,17 +104,17 @@ integTest.runner {
'ml/evaluate_data_frame/Test given missing index',
'ml/evaluate_data_frame/Test given index does not exist',
'ml/evaluate_data_frame/Test given missing evaluation',
'ml/evaluate_data_frame/Test binary_soft_classification auc_roc given actual_field is always true',
'ml/evaluate_data_frame/Test binary_soft_classification auc_roc given actual_field is always false',
'ml/evaluate_data_frame/Test binary_soft_classification given evaluation with empty metrics',
'ml/evaluate_data_frame/Test binary_soft_classification given missing actual_field',
'ml/evaluate_data_frame/Test binary_soft_classification given missing predicted_probability_field',
'ml/evaluate_data_frame/Test binary_soft_classification given precision with threshold less than zero',
'ml/evaluate_data_frame/Test binary_soft_classification given recall with threshold less than zero',
'ml/evaluate_data_frame/Test binary_soft_classification given confusion_matrix with threshold less than zero',
'ml/evaluate_data_frame/Test binary_soft_classification given precision with empty thresholds',
'ml/evaluate_data_frame/Test binary_soft_classification given recall with empty thresholds',
'ml/evaluate_data_frame/Test binary_soft_classification given confusion_matrix with empty thresholds',
'ml/evaluate_data_frame/Test outlier_detection auc_roc given actual_field is always true',
'ml/evaluate_data_frame/Test outlier_detection auc_roc given actual_field is always false',
'ml/evaluate_data_frame/Test outlier_detection given evaluation with empty metrics',
'ml/evaluate_data_frame/Test outlier_detection given missing actual_field',
'ml/evaluate_data_frame/Test outlier_detection given missing predicted_probability_field',
'ml/evaluate_data_frame/Test outlier_detection given precision with threshold less than zero',
'ml/evaluate_data_frame/Test outlier_detection given recall with threshold less than zero',
'ml/evaluate_data_frame/Test outlier_detection given confusion_matrix with threshold less than zero',
'ml/evaluate_data_frame/Test outlier_detection given precision with empty thresholds',
'ml/evaluate_data_frame/Test outlier_detection given recall with empty thresholds',
'ml/evaluate_data_frame/Test outlier_detection given confusion_matrix with empty thresholds',
'ml/evaluate_data_frame/Test classification given evaluation with empty metrics',
'ml/evaluate_data_frame/Test classification given missing actual_field',
'ml/evaluate_data_frame/Test classification given missing predicted_field',

View File

@ -149,14 +149,14 @@ setup:
indices.refresh: {}
---
"Test binary_soft_classification auc_roc":
"Test outlier_detection auc_roc":
- do:
ml.evaluate_data_frame:
body: >
{
"index": "utopia",
"evaluation": {
"binary_soft_classification": {
"outlier_detection": {
"actual_field": "is_outlier",
"predicted_probability_field": "outlier_score",
"metrics": {
@ -165,18 +165,18 @@ setup:
}
}
}
- match: { binary_soft_classification.auc_roc.score: 0.9899 }
- is_false: binary_soft_classification.auc_roc.curve
- match: { outlier_detection.auc_roc.score: 0.9899 }
- is_false: outlier_detection.auc_roc.curve
---
"Test binary_soft_classification auc_roc given actual_field is int":
"Test outlier_detection auc_roc given actual_field is int":
- do:
ml.evaluate_data_frame:
body: >
{
"index": "utopia",
"evaluation": {
"binary_soft_classification": {
"outlier_detection": {
"actual_field": "is_outlier_int",
"predicted_probability_field": "outlier_score",
"metrics": {
@ -185,18 +185,18 @@ setup:
}
}
}
- match: { binary_soft_classification.auc_roc.score: 0.9899 }
- is_false: binary_soft_classification.auc_roc.curve
- match: { outlier_detection.auc_roc.score: 0.9899 }
- is_false: outlier_detection.auc_roc.curve
---
"Test binary_soft_classification auc_roc include curve":
"Test outlier_detection auc_roc include curve":
- do:
ml.evaluate_data_frame:
body: >
{
"index": "utopia",
"evaluation": {
"binary_soft_classification": {
"outlier_detection": {
"actual_field": "is_outlier",
"predicted_probability_field": "outlier_score",
"metrics": {
@ -205,11 +205,11 @@ setup:
}
}
}
- match: { binary_soft_classification.auc_roc.score: 0.9899 }
- is_true: binary_soft_classification.auc_roc.curve
- match: { outlier_detection.auc_roc.score: 0.9899 }
- is_true: outlier_detection.auc_roc.curve
---
"Test binary_soft_classification auc_roc given actual_field is always true":
"Test outlier_detection auc_roc given actual_field is always true":
- do:
catch: /\[auc_roc\] requires at least one actual_field to have a different value than \[true\]/
ml.evaluate_data_frame:
@ -217,7 +217,7 @@ setup:
{
"index": "utopia",
"evaluation": {
"binary_soft_classification": {
"outlier_detection": {
"actual_field": "all_true_field",
"predicted_probability_field": "outlier_score",
"metrics": {
@ -228,7 +228,7 @@ setup:
}
---
"Test binary_soft_classification auc_roc given actual_field is always false":
"Test outlier_detection auc_roc given actual_field is always false":
- do:
catch: /\[auc_roc\] requires at least one actual_field to have the value \[true\]/
ml.evaluate_data_frame:
@ -236,7 +236,7 @@ setup:
{
"index": "utopia",
"evaluation": {
"binary_soft_classification": {
"outlier_detection": {
"actual_field": "all_false_field",
"predicted_probability_field": "outlier_score",
"metrics": {
@ -247,14 +247,14 @@ setup:
}
---
"Test binary_soft_classification precision":
"Test outlier_detection precision":
- do:
ml.evaluate_data_frame:
body: >
{
"index": "utopia",
"evaluation": {
"binary_soft_classification": {
"outlier_detection": {
"actual_field": "is_outlier",
"predicted_probability_field": "outlier_score",
"metrics": {
@ -264,20 +264,20 @@ setup:
}
}
- match:
binary_soft_classification:
outlier_detection:
precision:
'0.0': 0.625
'0.5': 1.0
---
"Test binary_soft_classification recall":
"Test outlier_detection recall":
- do:
ml.evaluate_data_frame:
body: >
{
"index": "utopia",
"evaluation": {
"binary_soft_classification": {
"outlier_detection": {
"actual_field": "is_outlier",
"predicted_probability_field": "outlier_score",
"metrics": {
@ -287,21 +287,21 @@ setup:
}
}
- match:
binary_soft_classification:
outlier_detection:
recall:
'0.0': 1.0
'0.4': 0.8
'0.5': 0.6
---
"Test binary_soft_classification confusion_matrix":
"Test outlier_detection confusion_matrix":
- do:
ml.evaluate_data_frame:
body: >
{
"index": "utopia",
"evaluation": {
"binary_soft_classification": {
"outlier_detection": {
"actual_field": "is_outlier",
"predicted_probability_field": "outlier_score",
"metrics": {
@ -311,7 +311,7 @@ setup:
}
}
- match:
binary_soft_classification:
outlier_detection:
confusion_matrix:
'0.0':
tp: 5
@ -330,7 +330,7 @@ setup:
fn: 2
---
"Test binary_soft_classification with query":
"Test outlier_detection with query":
- do:
ml.evaluate_data_frame:
body: >
@ -338,7 +338,7 @@ setup:
"index": "utopia",
"query": { "bool": { "filter": { "term": { "dataset": "blue" } } } },
"evaluation": {
"binary_soft_classification": {
"outlier_detection": {
"actual_field": "is_outlier",
"predicted_probability_field": "outlier_score",
"metrics": {
@ -348,7 +348,7 @@ setup:
}
}
- match:
binary_soft_classification:
outlier_detection:
confusion_matrix:
'0.5':
tp: 0
@ -357,29 +357,29 @@ setup:
fn: 1
---
"Test binary_soft_classification default metrics":
"Test outlier_detection default metrics":
- do:
ml.evaluate_data_frame:
body: >
{
"index": "utopia",
"evaluation": {
"binary_soft_classification": {
"outlier_detection": {
"actual_field": "is_outlier",
"predicted_probability_field": "outlier_score"
}
}
}
- is_true: binary_soft_classification.auc_roc.score
- is_true: binary_soft_classification.precision.0\.25
- is_true: binary_soft_classification.precision.0\.5
- is_true: binary_soft_classification.precision.0\.75
- is_true: binary_soft_classification.recall.0\.25
- is_true: binary_soft_classification.recall.0\.5
- is_true: binary_soft_classification.recall.0\.75
- is_true: binary_soft_classification.confusion_matrix.0\.25
- is_true: binary_soft_classification.confusion_matrix.0\.5
- is_true: binary_soft_classification.confusion_matrix.0\.75
- is_true: outlier_detection.auc_roc.score
- is_true: outlier_detection.precision.0\.25
- is_true: outlier_detection.precision.0\.5
- is_true: outlier_detection.precision.0\.75
- is_true: outlier_detection.recall.0\.25
- is_true: outlier_detection.recall.0\.5
- is_true: outlier_detection.recall.0\.75
- is_true: outlier_detection.confusion_matrix.0\.25
- is_true: outlier_detection.confusion_matrix.0\.5
- is_true: outlier_detection.confusion_matrix.0\.75
---
"Test given missing index":
@ -389,7 +389,7 @@ setup:
body: >
{
"evaluation": {
"binary_soft_classification": {
"outlier_detection": {
"actual_field": "is_outlier",
"predicted_probability_field": "outlier_score"
}
@ -405,7 +405,7 @@ setup:
{
"index": "missing_index",
"evaluation": {
"binary_soft_classification": {
"outlier_detection": {
"actual_field": "is_outlier",
"predicted_probability_field": "outlier_score"
}
@ -423,15 +423,15 @@ setup:
}
---
"Test binary_soft_classification given evaluation with empty metrics":
"Test outlier_detection given evaluation with empty metrics":
- do:
catch: /\[binary_soft_classification\] must have one or more metrics/
catch: /\[outlier_detection\] must have one or more metrics/
ml.evaluate_data_frame:
body: >
{
"index": "utopia",
"evaluation": {
"binary_soft_classification": {
"outlier_detection": {
"actual_field": "is_outlier",
"predicted_probability_field": "outlier_score",
"metrics": {
@ -441,7 +441,7 @@ setup:
}
---
"Test binary_soft_classification given missing actual_field":
"Test outlier_detection given missing actual_field":
- do:
catch: /No documents found containing both \[missing, outlier_score\] fields/
ml.evaluate_data_frame:
@ -449,7 +449,7 @@ setup:
{
"index": "utopia",
"evaluation": {
"binary_soft_classification": {
"outlier_detection": {
"actual_field": "missing",
"predicted_probability_field": "outlier_score"
}
@ -457,7 +457,7 @@ setup:
}
---
"Test binary_soft_classification given missing predicted_probability_field":
"Test outlier_detection given missing predicted_probability_field":
- do:
catch: /No documents found containing both \[is_outlier, missing\] fields/
ml.evaluate_data_frame:
@ -465,7 +465,7 @@ setup:
{
"index": "utopia",
"evaluation": {
"binary_soft_classification": {
"outlier_detection": {
"actual_field": "is_outlier",
"predicted_probability_field": "missing"
}
@ -473,7 +473,7 @@ setup:
}
---
"Test binary_soft_classification given precision with threshold less than zero":
"Test outlier_detection given precision with threshold less than zero":
- do:
catch: /\[precision.at\] values must be in \[0.0, 1.0\]/
ml.evaluate_data_frame:
@ -481,7 +481,7 @@ setup:
{
"index": "utopia",
"evaluation": {
"binary_soft_classification": {
"outlier_detection": {
"actual_field": "is_outlier",
"predicted_probability_field": "outlier_score",
"metrics": {
@ -492,7 +492,7 @@ setup:
}
---
"Test binary_soft_classification given recall with threshold less than zero":
"Test outlier_detection given recall with threshold less than zero":
- do:
catch: /\[recall.at\] values must be in \[0.0, 1.0\]/
ml.evaluate_data_frame:
@ -500,7 +500,7 @@ setup:
{
"index": "utopia",
"evaluation": {
"binary_soft_classification": {
"outlier_detection": {
"actual_field": "is_outlier",
"predicted_probability_field": "outlier_score",
"metrics": {
@ -511,7 +511,7 @@ setup:
}
---
"Test binary_soft_classification given confusion_matrix with threshold less than zero":
"Test outlier_detection given confusion_matrix with threshold less than zero":
- do:
catch: /\[confusion_matrix.at\] values must be in \[0.0, 1.0\]/
ml.evaluate_data_frame:
@ -519,7 +519,7 @@ setup:
{
"index": "utopia",
"evaluation": {
"binary_soft_classification": {
"outlier_detection": {
"actual_field": "is_outlier",
"predicted_probability_field": "outlier_score",
"metrics": {
@ -530,7 +530,7 @@ setup:
}
---
"Test binary_soft_classification given precision with empty thresholds":
"Test outlier_detection given precision with empty thresholds":
- do:
catch: /\[precision.at\] must have at least one value/
ml.evaluate_data_frame:
@ -538,7 +538,7 @@ setup:
{
"index": "utopia",
"evaluation": {
"binary_soft_classification": {
"outlier_detection": {
"actual_field": "is_outlier",
"predicted_probability_field": "outlier_score",
"metrics": {
@ -549,7 +549,7 @@ setup:
}
---
"Test binary_soft_classification given recall with empty thresholds":
"Test outlier_detection given recall with empty thresholds":
- do:
catch: /\[recall.at\] must have at least one value/
ml.evaluate_data_frame:
@ -557,7 +557,7 @@ setup:
{
"index": "utopia",
"evaluation": {
"binary_soft_classification": {
"outlier_detection": {
"actual_field": "is_outlier",
"predicted_probability_field": "outlier_score",
"metrics": {
@ -568,7 +568,7 @@ setup:
}
---
"Test binary_soft_classification given confusion_matrix with empty thresholds":
"Test outlier_detection given confusion_matrix with empty thresholds":
- do:
catch: /\[confusion_matrix.at\] must have at least one value/
ml.evaluate_data_frame:
@ -576,7 +576,7 @@ setup:
{
"index": "utopia",
"evaluation": {
"binary_soft_classification": {
"outlier_detection": {
"actual_field": "is_outlier",
"predicted_probability_field": "outlier_score",
"metrics": {