[ML] Add more categorization validations (elastic/x-pack-elasticsearch#1019)
- validates that when mlcategory is used, categorization_field_name is set - validates that when categorization_field_name is set, mlcategory is used relates elastic/x-pack-elasticsearch#986 Original commit: elastic/x-pack-elasticsearch@e861a3ed58
This commit is contained in:
parent
0df726f6dd
commit
a1cb22836c
|
@ -263,11 +263,7 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
|
|||
public Set<String> termFields() {
|
||||
Set<String> termFields = new TreeSet<>();
|
||||
|
||||
for (Detector d : getDetectors()) {
|
||||
addIfNotNull(termFields, d.getByFieldName());
|
||||
addIfNotNull(termFields, d.getOverFieldName());
|
||||
addIfNotNull(termFields, d.getPartitionFieldName());
|
||||
}
|
||||
getDetectors().stream().forEach(d -> termFields.addAll(d.getByOverPartitionTerms()));
|
||||
|
||||
for (String i : getInfluencers()) {
|
||||
addIfNotNull(termFields, i);
|
||||
|
@ -561,11 +557,12 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
|
|||
}
|
||||
checkFieldIsNotNegativeIfSpecified(PERIOD.getPreferredName(), period);
|
||||
|
||||
verifyDetectorAreDefined(detectors);
|
||||
verifyDetectorAreDefined();
|
||||
verifyFieldName(summaryCountFieldName);
|
||||
verifyFieldName(categorizationFieldName);
|
||||
|
||||
verifyCategorizationFilters(categorizationFilters, categorizationFieldName);
|
||||
verifyMlCategoryIsUsedWhenCategorizationFieldNameIsSet();
|
||||
verifyCategorizationFilters();
|
||||
checkFieldIsNotNegativeIfSpecified(RESULT_FINALIZATION_WINDOW.getPreferredName(), resultFinalizationWindow);
|
||||
verifyMultipleBucketSpans();
|
||||
|
||||
|
@ -588,44 +585,58 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
|
|||
}
|
||||
}
|
||||
|
||||
private static void verifyDetectorAreDefined(List<Detector> detectors) {
|
||||
private void verifyDetectorAreDefined() {
|
||||
if (detectors == null || detectors.isEmpty()) {
|
||||
throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_NO_DETECTORS));
|
||||
}
|
||||
}
|
||||
|
||||
private static void verifyCategorizationFilters(List<String> filters, String categorizationFieldName) {
|
||||
if (filters == null || filters.isEmpty()) {
|
||||
private void verifyMlCategoryIsUsedWhenCategorizationFieldNameIsSet() {
|
||||
Set<String> byOverPartitionFields = new TreeSet<>();
|
||||
detectors.stream().forEach(d -> byOverPartitionFields.addAll(d.getByOverPartitionTerms()));
|
||||
boolean isMlCategoryUsed = byOverPartitionFields.contains(ML_CATEGORY_FIELD);
|
||||
if (isMlCategoryUsed && categorizationFieldName == null) {
|
||||
throw new IllegalArgumentException(CATEGORIZATION_FIELD_NAME.getPreferredName()
|
||||
+ " must be set for " + ML_CATEGORY_FIELD + " to be available");
|
||||
}
|
||||
if (categorizationFieldName != null && isMlCategoryUsed == false) {
|
||||
throw new IllegalArgumentException(CATEGORIZATION_FIELD_NAME.getPreferredName()
|
||||
+ " is set but " + ML_CATEGORY_FIELD + " is not used in any detector by/over/partition field");
|
||||
}
|
||||
}
|
||||
|
||||
private void verifyCategorizationFilters() {
|
||||
if (categorizationFilters == null || categorizationFilters.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
verifyCategorizationFieldNameSetIfFiltersAreSet(categorizationFieldName);
|
||||
verifyCategorizationFiltersAreDistinct(filters);
|
||||
verifyCategorizationFiltersContainNoneEmpty(filters);
|
||||
verifyCategorizationFiltersAreValidRegex(filters);
|
||||
verifyCategorizationFieldNameSetIfFiltersAreSet();
|
||||
verifyCategorizationFiltersAreDistinct();
|
||||
verifyCategorizationFiltersContainNoneEmpty();
|
||||
verifyCategorizationFiltersAreValidRegex();
|
||||
}
|
||||
|
||||
private static void verifyCategorizationFieldNameSetIfFiltersAreSet(String categorizationFieldName) {
|
||||
private void verifyCategorizationFieldNameSetIfFiltersAreSet() {
|
||||
if (categorizationFieldName == null) {
|
||||
throw new IllegalArgumentException(Messages.getMessage(
|
||||
Messages.JOB_CONFIG_CATEGORIZATION_FILTERS_REQUIRE_CATEGORIZATION_FIELD_NAME));
|
||||
}
|
||||
}
|
||||
|
||||
private static void verifyCategorizationFiltersAreDistinct(List<String> filters) {
|
||||
if (filters.stream().distinct().count() != filters.size()) {
|
||||
private void verifyCategorizationFiltersAreDistinct() {
|
||||
if (categorizationFilters.stream().distinct().count() != categorizationFilters.size()) {
|
||||
throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_CATEGORIZATION_FILTERS_CONTAINS_DUPLICATES));
|
||||
}
|
||||
}
|
||||
|
||||
private static void verifyCategorizationFiltersContainNoneEmpty(List<String> filters) {
|
||||
if (filters.stream().anyMatch(f -> f.isEmpty())) {
|
||||
private void verifyCategorizationFiltersContainNoneEmpty() {
|
||||
if (categorizationFilters.stream().anyMatch(String::isEmpty)) {
|
||||
throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_CATEGORIZATION_FILTERS_CONTAINS_EMPTY));
|
||||
}
|
||||
}
|
||||
|
||||
private static void verifyCategorizationFiltersAreValidRegex(List<String> filters) {
|
||||
for (String filter : filters) {
|
||||
private void verifyCategorizationFiltersAreValidRegex() {
|
||||
for (String filter : categorizationFilters) {
|
||||
if (!isValidRegex(filter)) {
|
||||
throw new IllegalArgumentException(
|
||||
Messages.getMessage(Messages.JOB_CONFIG_CATEGORIZATION_FILTERS_CONTAINS_INVALID_REGEX, filter));
|
||||
|
|
|
@ -475,6 +475,23 @@ public class Detector extends ToXContentToBytes implements Writeable {
|
|||
.flatMap(Set::stream).collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the set of by/over/partition terms
|
||||
*/
|
||||
public Set<String> getByOverPartitionTerms() {
|
||||
Set<String> terms = new HashSet<>();
|
||||
if (byFieldName != null) {
|
||||
terms.add(byFieldName);
|
||||
}
|
||||
if (overFieldName != null) {
|
||||
terms.add(overFieldName);
|
||||
}
|
||||
if (partitionFieldName != null) {
|
||||
terms.add(partitionFieldName);
|
||||
}
|
||||
return terms;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (this == other) {
|
||||
|
|
|
@ -20,6 +20,8 @@ import java.util.List;
|
|||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
||||
public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisConfig> {
|
||||
|
||||
@Override
|
||||
|
@ -28,11 +30,12 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
|
|||
}
|
||||
|
||||
public static AnalysisConfig.Builder createRandomized() {
|
||||
boolean isCategorization = randomBoolean();
|
||||
List<Detector> detectors = new ArrayList<>();
|
||||
int numDetectors = randomIntBetween(1, 10);
|
||||
for (int i = 0; i < numDetectors; i++) {
|
||||
Detector.Builder builder = new Detector.Builder("count", null);
|
||||
builder.setPartitionFieldName("part");
|
||||
builder.setPartitionFieldName(isCategorization ? "mlcategory" : "part");
|
||||
detectors.add(builder.build());
|
||||
}
|
||||
AnalysisConfig.Builder builder = new AnalysisConfig.Builder(detectors);
|
||||
|
@ -45,7 +48,7 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
|
|||
bucketSpan = TimeValue.timeValueSeconds(randomIntBetween(1, 1_000_000));
|
||||
builder.setBucketSpan(bucketSpan);
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
if (isCategorization) {
|
||||
builder.setCategorizationFieldName(randomAlphaOfLength(10));
|
||||
builder.setCategorizationFilters(Arrays.asList(generateRandomStringArray(10, 10, false)));
|
||||
}
|
||||
|
@ -234,6 +237,60 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
|
|||
assertTrue(ac.getMultipleBucketSpans().contains(TimeValue.timeValueSeconds(24000)));
|
||||
}
|
||||
|
||||
public void testBuild_GivenMlCategoryUsedAsByFieldButNoCategorizationFieldName() {
|
||||
Detector.Builder detector = new Detector.Builder();
|
||||
detector.setFunction("count");
|
||||
detector.setByFieldName("mlcategory");
|
||||
AnalysisConfig.Builder ac = new AnalysisConfig.Builder(Arrays.asList(detector.build()));
|
||||
ac.setCategorizationFieldName(null);
|
||||
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, ac::build);
|
||||
assertThat(e.getMessage(), equalTo("categorization_field_name must be set for mlcategory to be available"));
|
||||
}
|
||||
|
||||
public void testBuild_GivenMlCategoryUsedAsOverFieldButNoCategorizationFieldName() {
|
||||
Detector.Builder detector = new Detector.Builder();
|
||||
detector.setFunction("count");
|
||||
detector.setOverFieldName("mlcategory");
|
||||
AnalysisConfig.Builder ac = new AnalysisConfig.Builder(Arrays.asList(detector.build()));
|
||||
ac.setCategorizationFieldName(null);
|
||||
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, ac::build);
|
||||
assertThat(e.getMessage(), equalTo("categorization_field_name must be set for mlcategory to be available"));
|
||||
}
|
||||
|
||||
public void testBuild_GivenMlCategoryUsedAsPartitionFieldButNoCategorizationFieldName() {
|
||||
Detector.Builder detector = new Detector.Builder();
|
||||
detector.setFunction("count");
|
||||
detector.setPartitionFieldName("mlcategory");
|
||||
AnalysisConfig.Builder ac = new AnalysisConfig.Builder(Arrays.asList(detector.build()));
|
||||
ac.setCategorizationFieldName(null);
|
||||
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, ac::build);
|
||||
assertThat(e.getMessage(), equalTo("categorization_field_name must be set for mlcategory to be available"));
|
||||
}
|
||||
|
||||
public void testBuild_GivenCategorizationFieldNameButNoUseOfMlCategory() {
|
||||
Detector.Builder detector = new Detector.Builder();
|
||||
detector.setFunction("count");
|
||||
detector.setOverFieldName("foo");
|
||||
AnalysisConfig.Builder ac = new AnalysisConfig.Builder(Arrays.asList(detector.build()));
|
||||
ac.setCategorizationFieldName("msg");
|
||||
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, ac::build);
|
||||
assertThat(e.getMessage(), equalTo("categorization_field_name is set but mlcategory is " +
|
||||
"not used in any detector by/over/partition field"));
|
||||
}
|
||||
|
||||
public void testBuild_GivenMlCategoryUsedAsByFieldAndCategorizationFieldName() {
|
||||
Detector.Builder detector = new Detector.Builder();
|
||||
detector.setFunction("count");
|
||||
detector.setOverFieldName("mlcategory");
|
||||
AnalysisConfig.Builder ac = new AnalysisConfig.Builder(Arrays.asList(detector.build()));
|
||||
ac.setCategorizationFieldName("msg");
|
||||
ac.build();
|
||||
}
|
||||
|
||||
public void testEquals_GivenSameReference() {
|
||||
AnalysisConfig config = createFullyPopulatedConfig();
|
||||
assertTrue(config.equals(config));
|
||||
|
@ -283,11 +340,11 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
|
|||
}
|
||||
|
||||
public void testEquals_GivenCategorizationField() {
|
||||
AnalysisConfig.Builder builder = createConfigBuilder();
|
||||
AnalysisConfig.Builder builder = createValidCategorizationConfig();
|
||||
builder.setCategorizationFieldName("foo");
|
||||
AnalysisConfig config1 = builder.build();
|
||||
|
||||
builder = createConfigBuilder();
|
||||
builder = createValidCategorizationConfig();
|
||||
builder.setCategorizationFieldName("bar");
|
||||
AnalysisConfig config2 = builder.build();
|
||||
|
||||
|
@ -370,11 +427,12 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
|
|||
}
|
||||
|
||||
public void testEquals_GivenDifferentCategorizationFilters() {
|
||||
AnalysisConfig config1 = createFullyPopulatedConfig();
|
||||
AnalysisConfig.Builder builder = createConfigBuilder();
|
||||
builder.setCategorizationFilters(Arrays.asList("foo", "bar"));
|
||||
builder.setCategorizationFieldName("cat");
|
||||
AnalysisConfig config2 = builder.build();
|
||||
AnalysisConfig.Builder configBuilder1 = createValidCategorizationConfig();
|
||||
AnalysisConfig.Builder configBuilder2 = createValidCategorizationConfig();
|
||||
configBuilder1.setCategorizationFilters(Arrays.asList("foo", "bar"));
|
||||
configBuilder2.setCategorizationFilters(Arrays.asList("foo", "foobar"));
|
||||
AnalysisConfig config1 = configBuilder1.build();
|
||||
AnalysisConfig config2 = configBuilder2.build();
|
||||
|
||||
assertFalse(config1.equals(config2));
|
||||
assertFalse(config2.equals(config1));
|
||||
|
@ -398,8 +456,10 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
|
|||
}
|
||||
|
||||
private static AnalysisConfig createFullyPopulatedConfig() {
|
||||
Detector.Builder detector = new Detector.Builder("min", "count");
|
||||
detector.setOverFieldName("mlcategory");
|
||||
AnalysisConfig.Builder builder = new AnalysisConfig.Builder(
|
||||
Collections.singletonList(new Detector.Builder("min", "count").build()));
|
||||
Collections.singletonList(detector.build()));
|
||||
builder.setBucketSpan(TimeValue.timeValueHours(1));
|
||||
builder.setBatchSpan(TimeValue.timeValueHours(24));
|
||||
builder.setCategorizationFieldName("cat");
|
||||
|
@ -508,8 +568,7 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
|
|||
}
|
||||
|
||||
public void testVerify_GivenValidConfigWithCategorizationFieldNameAndCategorizationFilters() {
|
||||
AnalysisConfig.Builder analysisConfig = createValidConfig();
|
||||
analysisConfig.setCategorizationFieldName("myCategory");
|
||||
AnalysisConfig.Builder analysisConfig = createValidCategorizationConfig();
|
||||
analysisConfig.setCategorizationFilters(Arrays.asList("foo", "bar"));
|
||||
|
||||
analysisConfig.build();
|
||||
|
@ -668,8 +727,7 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
|
|||
}
|
||||
|
||||
public void testVerify_GivenDuplicateCategorizationFilters() {
|
||||
AnalysisConfig.Builder config = createValidConfig();
|
||||
config.setCategorizationFieldName("myCategory");
|
||||
AnalysisConfig.Builder config = createValidCategorizationConfig();
|
||||
config.setCategorizationFilters(Arrays.asList("foo", "bar", "foo"));
|
||||
|
||||
IllegalArgumentException e = ESTestCase.expectThrows(IllegalArgumentException.class, () -> config.build());
|
||||
|
@ -678,8 +736,7 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
|
|||
}
|
||||
|
||||
public void testVerify_GivenEmptyCategorizationFilter() {
|
||||
AnalysisConfig.Builder config = createValidConfig();
|
||||
config.setCategorizationFieldName("myCategory");
|
||||
AnalysisConfig.Builder config = createValidCategorizationConfig();
|
||||
config.setCategorizationFilters(Arrays.asList("foo", ""));
|
||||
|
||||
IllegalArgumentException e = ESTestCase.expectThrows(IllegalArgumentException.class, () -> config.build());
|
||||
|
@ -722,9 +779,7 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
|
|||
}
|
||||
|
||||
public void testVerify_GivenCategorizationFiltersContainInvalidRegex() {
|
||||
|
||||
AnalysisConfig.Builder config = createValidConfig();
|
||||
config.setCategorizationFieldName("myCategory");
|
||||
AnalysisConfig.Builder config = createValidCategorizationConfig();
|
||||
config.setCategorizationFilters(Arrays.asList("foo", "("));
|
||||
|
||||
IllegalArgumentException e = ESTestCase.expectThrows(IllegalArgumentException.class, () -> config.build());
|
||||
|
@ -743,4 +798,16 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
|
|||
analysisConfig.setPeriod(0L);
|
||||
return analysisConfig;
|
||||
}
|
||||
|
||||
private static AnalysisConfig.Builder createValidCategorizationConfig() {
|
||||
Detector.Builder detector = new Detector.Builder("count", null);
|
||||
detector.setByFieldName("mlcategory");
|
||||
AnalysisConfig.Builder analysisConfig = new AnalysisConfig.Builder(Collections.singletonList(detector.build()));
|
||||
analysisConfig.setBucketSpan(TimeValue.timeValueHours(1));
|
||||
analysisConfig.setBatchSpan(TimeValue.timeValueHours(2));
|
||||
analysisConfig.setLatency(TimeValue.ZERO);
|
||||
analysisConfig.setPeriod(0L);
|
||||
analysisConfig.setCategorizationFieldName("msg");
|
||||
return analysisConfig;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -90,7 +90,7 @@ public class JobUpdateTests extends AbstractSerializingTestCase<JobUpdate> {
|
|||
|
||||
public void testMergeWithJob() {
|
||||
List<JobUpdate.DetectorUpdate> detectorUpdates = new ArrayList<>();
|
||||
List<DetectionRule> detectionRules1 = Collections.singletonList(new DetectionRule("client", null, Connective.OR,
|
||||
List<DetectionRule> detectionRules1 = Collections.singletonList(new DetectionRule("mlcategory", null, Connective.OR,
|
||||
Collections.singletonList(
|
||||
new RuleCondition(RuleConditionType.NUMERICAL_ACTUAL, null, null, new Condition(Operator.GT, "5"), null))));
|
||||
detectorUpdates.add(new JobUpdate.DetectorUpdate(0, "description-1", detectionRules1));
|
||||
|
@ -120,7 +120,7 @@ public class JobUpdateTests extends AbstractSerializingTestCase<JobUpdate> {
|
|||
|
||||
Job.Builder jobBuilder = new Job.Builder("foo");
|
||||
Detector.Builder d1 = new Detector.Builder("info_content", "domain");
|
||||
d1.setOverFieldName("client");
|
||||
d1.setOverFieldName("mlcategory");
|
||||
Detector.Builder d2 = new Detector.Builder("min", "field");
|
||||
d2.setOverFieldName("host");
|
||||
AnalysisConfig.Builder ac = new AnalysisConfig.Builder(Arrays.asList(d1.build(), d2.build()));
|
||||
|
|
|
@ -119,7 +119,7 @@ public class FieldConfigWriterTests extends ESTestCase {
|
|||
|
||||
public void testWrite_GivenConfigHasCategorizationField() throws IOException {
|
||||
Detector.Builder d = new Detector.Builder("metric", "Integer_Value");
|
||||
d.setByFieldName("ts_hash");
|
||||
d.setByFieldName("mlcategory");
|
||||
|
||||
AnalysisConfig.Builder builder = new AnalysisConfig.Builder(Arrays.asList(d.build()));
|
||||
builder.setCategorizationFieldName("foo");
|
||||
|
@ -128,7 +128,7 @@ public class FieldConfigWriterTests extends ESTestCase {
|
|||
|
||||
createFieldConfigWriter().write();
|
||||
|
||||
verify(writer).write("detector.0.clause = metric(Integer_Value) by ts_hash categorizationfield=foo\n");
|
||||
verify(writer).write("detector.0.clause = metric(Integer_Value) by mlcategory categorizationfield=foo\n");
|
||||
verifyNoMoreInteractions(writer);
|
||||
}
|
||||
|
||||
|
@ -153,7 +153,7 @@ public class FieldConfigWriterTests extends ESTestCase {
|
|||
|
||||
public void testWrite_GivenConfigHasCategorizationFieldAndFiltersAndInfluencer() throws IOException {
|
||||
Detector.Builder d = new Detector.Builder("metric", "Integer_Value");
|
||||
d.setByFieldName("ts_hash");
|
||||
d.setByFieldName("mlcategory");
|
||||
|
||||
AnalysisConfig.Builder builder = new AnalysisConfig.Builder(Arrays.asList(d.build()));
|
||||
builder.setInfluencers(Arrays.asList("sun"));
|
||||
|
@ -166,7 +166,7 @@ public class FieldConfigWriterTests extends ESTestCase {
|
|||
createFieldConfigWriter().write();
|
||||
|
||||
verify(writer).write(
|
||||
"detector.0.clause = metric(Integer_Value) by ts_hash categorizationfield=myCategory\n" +
|
||||
"detector.0.clause = metric(Integer_Value) by mlcategory categorizationfield=myCategory\n" +
|
||||
"categorizationfilter.0 = foo\n" +
|
||||
"categorizationfilter.1 = \" \"\n" +
|
||||
"categorizationfilter.2 = \"abc,def\"\n" +
|
||||
|
|
|
@ -182,7 +182,8 @@
|
|||
{
|
||||
"description":"Pre update description",
|
||||
"analysis_config" : {
|
||||
"detectors" :[{"function":"mean","field_name":"responsetime","by_field_name":"airline"}, {"function":"count"}],
|
||||
"detectors" :[{"function":"mean","field_name":"responsetime","by_field_name":"airline"},
|
||||
{"function":"count","by_field_name":"mlcategory"}],
|
||||
"categorization_field_name": "some_category",
|
||||
"categorization_filters" : ["cat1.*", "cat2.*"]
|
||||
},
|
||||
|
|
Loading…
Reference in New Issue