[ML] Add extra Java-side detector validation (elastic/x-pack-elasticsearch#894)

Detector configs are validated both by our C++ and by our Java code.
If the C++ is stricter than the Java then error reporting is poor.
This commit adds two extra validation checks to the Java code that
were already present in the C++ validation.

relates elastic/x-pack-elasticsearch#856

Original commit: elastic/x-pack-elasticsearch@bd4ce2377c
This commit is contained in:
David Roberts 2017-03-31 11:06:30 +01:00 committed by GitHub
parent 21bac70d87
commit 23123b9219
3 changed files with 79 additions and 0 deletions

View File

@ -596,6 +596,7 @@ public class Detector extends ToXContentToBytes implements Writeable {
boolean emptyField = Strings.isEmpty(fieldName);
boolean emptyByField = Strings.isEmpty(byFieldName);
boolean emptyOverField = Strings.isEmpty(overFieldName);
boolean emptyPartitionField = Strings.isEmpty(partitionFieldName);
if (Detector.ANALYSIS_FUNCTIONS.contains(function) == false) {
throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_UNKNOWN_FUNCTION, function));
}
@ -654,6 +655,37 @@ public class Detector extends ToXContentToBytes implements Writeable {
}
}
// partition, by and over field names cannot be duplicates
if (!emptyPartitionField) {
if (partitionFieldName.equals(byFieldName)) {
throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_DETECTOR_DUPLICATE_FIELD_NAME,
PARTITION_FIELD_NAME_FIELD.getPreferredName(), BY_FIELD_NAME_FIELD.getPreferredName(),
partitionFieldName));
}
if (partitionFieldName.equals(overFieldName)) {
throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_DETECTOR_DUPLICATE_FIELD_NAME,
PARTITION_FIELD_NAME_FIELD.getPreferredName(), OVER_FIELD_NAME_FIELD.getPreferredName(),
partitionFieldName));
}
}
if (!emptyByField && byFieldName.equals(overFieldName)) {
throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_DETECTOR_DUPLICATE_FIELD_NAME,
BY_FIELD_NAME_FIELD.getPreferredName(), OVER_FIELD_NAME_FIELD.getPreferredName(),
byFieldName));
}
// by/over field names cannot be "count" - this requirement dates back to the early
// days of the Splunk app and could be removed now BUT ONLY IF THE C++ CODE IS CHANGED
// FIRST - see https://github.com/elastic/x-pack-elasticsearch/issues/858
if (COUNT.equals(byFieldName)) {
throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_DETECTOR_COUNT_DISALLOWED,
BY_FIELD_NAME_FIELD.getPreferredName()));
}
if (COUNT.equals(overFieldName)) {
throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_DETECTOR_COUNT_DISALLOWED,
OVER_FIELD_NAME_FIELD.getPreferredName()));
}
return new Detector(detectorDescription, function, fieldName, byFieldName, overFieldName, partitionFieldName,
useNull, excludeFrequent, detectorRules);
}

View File

@ -113,6 +113,10 @@ public final class Messages {
public static final String JOB_CONFIG_UNKNOWN_FUNCTION = "Unknown function ''{0}''";
public static final String JOB_CONFIG_UPDATE_ANALYSIS_LIMITS_MODEL_MEMORY_LIMIT_CANNOT_BE_DECREASED =
"Invalid update value for analysis_limits: model_memory_limit cannot be decreased; existing is {0}, update had {1}";
public static final String JOB_CONFIG_DETECTOR_DUPLICATE_FIELD_NAME =
"{0} and {1} cannot be the same: ''{2}''";
public static final String JOB_CONFIG_DETECTOR_COUNT_DISALLOWED =
"''count'' is not a permitted value for {0}";
public static final String JOB_DATA_CONCURRENT_USE_CLOSE = "Cannot close job {0} while the job is processing another request";
public static final String JOB_DATA_CONCURRENT_USE_FLUSH = "Cannot flush job {0} while the job is processing another request";

View File

@ -623,6 +623,49 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
detector.build();
}
public void testVerify_GivenSameByAndPartition() {
Detector.Builder detector = new Detector.Builder("count", "");
detector.setByFieldName("x");
detector.setPartitionFieldName("x");
IllegalArgumentException e = ESTestCase.expectThrows(IllegalArgumentException.class, detector::build);
assertEquals("partition_field_name and by_field_name cannot be the same: 'x'", e.getMessage());
}
public void testVerify_GivenSameByAndOver() {
Detector.Builder detector = new Detector.Builder("count", "");
detector.setByFieldName("x");
detector.setOverFieldName("x");
IllegalArgumentException e = ESTestCase.expectThrows(IllegalArgumentException.class, detector::build);
assertEquals("by_field_name and over_field_name cannot be the same: 'x'", e.getMessage());
}
public void testVerify_GivenSameOverAndPartition() {
Detector.Builder detector = new Detector.Builder("count", "");
detector.setOverFieldName("x");
detector.setPartitionFieldName("x");
IllegalArgumentException e = ESTestCase.expectThrows(IllegalArgumentException.class, detector::build);
assertEquals("partition_field_name and over_field_name cannot be the same: 'x'", e.getMessage());
}
public void testVerify_GivenByIsCount() {
Detector.Builder detector = new Detector.Builder("count", "");
detector.setByFieldName("count");
IllegalArgumentException e = ESTestCase.expectThrows(IllegalArgumentException.class, detector::build);
assertEquals("'count' is not a permitted value for by_field_name", e.getMessage());
}
public void testVerify_GivenOverIsCount() {
Detector.Builder detector = new Detector.Builder("count", "");
detector.setOverFieldName("count");
IllegalArgumentException e = ESTestCase.expectThrows(IllegalArgumentException.class, detector::build);
assertEquals("'count' is not a permitted value for over_field_name", e.getMessage());
}
public void testExcludeFrequentForString() {
assertEquals(Detector.ExcludeFrequent.ALL, Detector.ExcludeFrequent.forString("all"));
assertEquals(Detector.ExcludeFrequent.ALL, Detector.ExcludeFrequent.forString("ALL"));