From a1cb22836c7eb0f74bb4fce1f61762a78086ebee Mon Sep 17 00:00:00 2001 From: Dimitris Athanasiou Date: Sun, 9 Apr 2017 18:18:17 +0100 Subject: [PATCH] [ML] Add more categorization validations (elastic/x-pack-elasticsearch#1019) - validates that when mlcategory is used, categorization_field_name is set - validates that when categorization_field_name is set, mlcategory is used relates elastic/x-pack-elasticsearch#986 Original commit: elastic/x-pack-elasticsearch@e861a3ed58b476058e1b90cd92ee62e3b6449b6e --- .../xpack/ml/job/config/AnalysisConfig.java | 53 +++++---- .../xpack/ml/job/config/Detector.java | 17 +++ .../ml/job/config/AnalysisConfigTests.java | 105 ++++++++++++++---- .../xpack/ml/job/config/JobUpdateTests.java | 4 +- .../writer/FieldConfigWriterTests.java | 8 +- .../rest-api-spec/test/ml/jobs_crud.yaml | 3 +- 6 files changed, 143 insertions(+), 47 deletions(-) diff --git a/plugin/src/main/java/org/elasticsearch/xpack/ml/job/config/AnalysisConfig.java b/plugin/src/main/java/org/elasticsearch/xpack/ml/job/config/AnalysisConfig.java index 1295f0e8038..dbf22665a43 100644 --- a/plugin/src/main/java/org/elasticsearch/xpack/ml/job/config/AnalysisConfig.java +++ b/plugin/src/main/java/org/elasticsearch/xpack/ml/job/config/AnalysisConfig.java @@ -263,11 +263,7 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable { public Set termFields() { Set termFields = new TreeSet<>(); - for (Detector d : getDetectors()) { - addIfNotNull(termFields, d.getByFieldName()); - addIfNotNull(termFields, d.getOverFieldName()); - addIfNotNull(termFields, d.getPartitionFieldName()); - } + getDetectors().stream().forEach(d -> termFields.addAll(d.getByOverPartitionTerms())); for (String i : getInfluencers()) { addIfNotNull(termFields, i); @@ -561,11 +557,12 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable { } checkFieldIsNotNegativeIfSpecified(PERIOD.getPreferredName(), period); - verifyDetectorAreDefined(detectors); + verifyDetectorAreDefined(); verifyFieldName(summaryCountFieldName); verifyFieldName(categorizationFieldName); - verifyCategorizationFilters(categorizationFilters, categorizationFieldName); + verifyMlCategoryIsUsedWhenCategorizationFieldNameIsSet(); + verifyCategorizationFilters(); checkFieldIsNotNegativeIfSpecified(RESULT_FINALIZATION_WINDOW.getPreferredName(), resultFinalizationWindow); verifyMultipleBucketSpans(); @@ -588,44 +585,58 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable { } } - private static void verifyDetectorAreDefined(List detectors) { + private void verifyDetectorAreDefined() { if (detectors == null || detectors.isEmpty()) { throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_NO_DETECTORS)); } } - private static void verifyCategorizationFilters(List filters, String categorizationFieldName) { - if (filters == null || filters.isEmpty()) { + private void verifyMlCategoryIsUsedWhenCategorizationFieldNameIsSet() { + Set byOverPartitionFields = new TreeSet<>(); + detectors.stream().forEach(d -> byOverPartitionFields.addAll(d.getByOverPartitionTerms())); + boolean isMlCategoryUsed = byOverPartitionFields.contains(ML_CATEGORY_FIELD); + if (isMlCategoryUsed && categorizationFieldName == null) { + throw new IllegalArgumentException(CATEGORIZATION_FIELD_NAME.getPreferredName() + + " must be set for " + ML_CATEGORY_FIELD + " to be available"); + } + if (categorizationFieldName != null && isMlCategoryUsed == false) { + throw new IllegalArgumentException(CATEGORIZATION_FIELD_NAME.getPreferredName() + + " is set but " + ML_CATEGORY_FIELD + " is not used in any detector by/over/partition field"); + } + } + + private void verifyCategorizationFilters() { + if (categorizationFilters == null || categorizationFilters.isEmpty()) { return; } - verifyCategorizationFieldNameSetIfFiltersAreSet(categorizationFieldName); - verifyCategorizationFiltersAreDistinct(filters); - verifyCategorizationFiltersContainNoneEmpty(filters); - verifyCategorizationFiltersAreValidRegex(filters); + verifyCategorizationFieldNameSetIfFiltersAreSet(); + verifyCategorizationFiltersAreDistinct(); + verifyCategorizationFiltersContainNoneEmpty(); + verifyCategorizationFiltersAreValidRegex(); } - private static void verifyCategorizationFieldNameSetIfFiltersAreSet(String categorizationFieldName) { + private void verifyCategorizationFieldNameSetIfFiltersAreSet() { if (categorizationFieldName == null) { throw new IllegalArgumentException(Messages.getMessage( Messages.JOB_CONFIG_CATEGORIZATION_FILTERS_REQUIRE_CATEGORIZATION_FIELD_NAME)); } } - private static void verifyCategorizationFiltersAreDistinct(List filters) { - if (filters.stream().distinct().count() != filters.size()) { + private void verifyCategorizationFiltersAreDistinct() { + if (categorizationFilters.stream().distinct().count() != categorizationFilters.size()) { throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_CATEGORIZATION_FILTERS_CONTAINS_DUPLICATES)); } } - private static void verifyCategorizationFiltersContainNoneEmpty(List filters) { - if (filters.stream().anyMatch(f -> f.isEmpty())) { + private void verifyCategorizationFiltersContainNoneEmpty() { + if (categorizationFilters.stream().anyMatch(String::isEmpty)) { throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_CATEGORIZATION_FILTERS_CONTAINS_EMPTY)); } } - private static void verifyCategorizationFiltersAreValidRegex(List filters) { - for (String filter : filters) { + private void verifyCategorizationFiltersAreValidRegex() { + for (String filter : categorizationFilters) { if (!isValidRegex(filter)) { throw new IllegalArgumentException( Messages.getMessage(Messages.JOB_CONFIG_CATEGORIZATION_FILTERS_CONTAINS_INVALID_REGEX, filter)); diff --git a/plugin/src/main/java/org/elasticsearch/xpack/ml/job/config/Detector.java b/plugin/src/main/java/org/elasticsearch/xpack/ml/job/config/Detector.java index 12289296d97..2c989b8fee6 100644 --- a/plugin/src/main/java/org/elasticsearch/xpack/ml/job/config/Detector.java +++ b/plugin/src/main/java/org/elasticsearch/xpack/ml/job/config/Detector.java @@ -475,6 +475,23 @@ public class Detector extends ToXContentToBytes implements Writeable { .flatMap(Set::stream).collect(Collectors.toSet()); } + /** + * Returns the set of by/over/partition terms + */ + public Set getByOverPartitionTerms() { + Set terms = new HashSet<>(); + if (byFieldName != null) { + terms.add(byFieldName); + } + if (overFieldName != null) { + terms.add(overFieldName); + } + if (partitionFieldName != null) { + terms.add(partitionFieldName); + } + return terms; + } + @Override public boolean equals(Object other) { if (this == other) { diff --git a/plugin/src/test/java/org/elasticsearch/xpack/ml/job/config/AnalysisConfigTests.java b/plugin/src/test/java/org/elasticsearch/xpack/ml/job/config/AnalysisConfigTests.java index 568ec7dea2f..c741e6197c5 100644 --- a/plugin/src/test/java/org/elasticsearch/xpack/ml/job/config/AnalysisConfigTests.java +++ b/plugin/src/test/java/org/elasticsearch/xpack/ml/job/config/AnalysisConfigTests.java @@ -20,6 +20,8 @@ import java.util.List; import java.util.Set; import java.util.TreeSet; +import static org.hamcrest.Matchers.equalTo; + public class AnalysisConfigTests extends AbstractSerializingTestCase { @Override @@ -28,11 +30,12 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase detectors = new ArrayList<>(); int numDetectors = randomIntBetween(1, 10); for (int i = 0; i < numDetectors; i++) { Detector.Builder builder = new Detector.Builder("count", null); - builder.setPartitionFieldName("part"); + builder.setPartitionFieldName(isCategorization ? "mlcategory" : "part"); detectors.add(builder.build()); } AnalysisConfig.Builder builder = new AnalysisConfig.Builder(detectors); @@ -45,7 +48,7 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase config.build()); @@ -678,8 +736,7 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase config.build()); @@ -722,9 +779,7 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase config.build()); @@ -743,4 +798,16 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase { public void testMergeWithJob() { List detectorUpdates = new ArrayList<>(); - List detectionRules1 = Collections.singletonList(new DetectionRule("client", null, Connective.OR, + List detectionRules1 = Collections.singletonList(new DetectionRule("mlcategory", null, Connective.OR, Collections.singletonList( new RuleCondition(RuleConditionType.NUMERICAL_ACTUAL, null, null, new Condition(Operator.GT, "5"), null)))); detectorUpdates.add(new JobUpdate.DetectorUpdate(0, "description-1", detectionRules1)); @@ -120,7 +120,7 @@ public class JobUpdateTests extends AbstractSerializingTestCase { Job.Builder jobBuilder = new Job.Builder("foo"); Detector.Builder d1 = new Detector.Builder("info_content", "domain"); - d1.setOverFieldName("client"); + d1.setOverFieldName("mlcategory"); Detector.Builder d2 = new Detector.Builder("min", "field"); d2.setOverFieldName("host"); AnalysisConfig.Builder ac = new AnalysisConfig.Builder(Arrays.asList(d1.build(), d2.build())); diff --git a/plugin/src/test/java/org/elasticsearch/xpack/ml/job/process/autodetect/writer/FieldConfigWriterTests.java b/plugin/src/test/java/org/elasticsearch/xpack/ml/job/process/autodetect/writer/FieldConfigWriterTests.java index 29ac535d97e..22a13e88d51 100644 --- a/plugin/src/test/java/org/elasticsearch/xpack/ml/job/process/autodetect/writer/FieldConfigWriterTests.java +++ b/plugin/src/test/java/org/elasticsearch/xpack/ml/job/process/autodetect/writer/FieldConfigWriterTests.java @@ -119,7 +119,7 @@ public class FieldConfigWriterTests extends ESTestCase { public void testWrite_GivenConfigHasCategorizationField() throws IOException { Detector.Builder d = new Detector.Builder("metric", "Integer_Value"); - d.setByFieldName("ts_hash"); + d.setByFieldName("mlcategory"); AnalysisConfig.Builder builder = new AnalysisConfig.Builder(Arrays.asList(d.build())); builder.setCategorizationFieldName("foo"); @@ -128,7 +128,7 @@ public class FieldConfigWriterTests extends ESTestCase { createFieldConfigWriter().write(); - verify(writer).write("detector.0.clause = metric(Integer_Value) by ts_hash categorizationfield=foo\n"); + verify(writer).write("detector.0.clause = metric(Integer_Value) by mlcategory categorizationfield=foo\n"); verifyNoMoreInteractions(writer); } @@ -153,7 +153,7 @@ public class FieldConfigWriterTests extends ESTestCase { public void testWrite_GivenConfigHasCategorizationFieldAndFiltersAndInfluencer() throws IOException { Detector.Builder d = new Detector.Builder("metric", "Integer_Value"); - d.setByFieldName("ts_hash"); + d.setByFieldName("mlcategory"); AnalysisConfig.Builder builder = new AnalysisConfig.Builder(Arrays.asList(d.build())); builder.setInfluencers(Arrays.asList("sun")); @@ -166,7 +166,7 @@ public class FieldConfigWriterTests extends ESTestCase { createFieldConfigWriter().write(); verify(writer).write( - "detector.0.clause = metric(Integer_Value) by ts_hash categorizationfield=myCategory\n" + + "detector.0.clause = metric(Integer_Value) by mlcategory categorizationfield=myCategory\n" + "categorizationfilter.0 = foo\n" + "categorizationfilter.1 = \" \"\n" + "categorizationfilter.2 = \"abc,def\"\n" + diff --git a/plugin/src/test/resources/rest-api-spec/test/ml/jobs_crud.yaml b/plugin/src/test/resources/rest-api-spec/test/ml/jobs_crud.yaml index cf976e82bc8..62972ad604f 100644 --- a/plugin/src/test/resources/rest-api-spec/test/ml/jobs_crud.yaml +++ b/plugin/src/test/resources/rest-api-spec/test/ml/jobs_crud.yaml @@ -182,7 +182,8 @@ { "description":"Pre update description", "analysis_config" : { - "detectors" :[{"function":"mean","field_name":"responsetime","by_field_name":"airline"}, {"function":"count"}], + "detectors" :[{"function":"mean","field_name":"responsetime","by_field_name":"airline"}, + {"function":"count","by_field_name":"mlcategory"}], "categorization_field_name": "some_category", "categorization_filters" : ["cat1.*", "cat2.*"] },