[ML] Add validation that rejects duplicate detectors in PutJobAction (#40967) (#41072)

* [ML] Add validation that rejects duplicate detectors in PutJobAction

Closes #39704

* Add YML integration test for duplicate detectors fix.

* Use "== false" comparison rather than "!" operator.

* Refine error message to sound more natural.

* Put job description in square brackets in the error message.

* Use the new validation in ValidateJobConfigAction.

* Exclude YML tests for new validation from permission tests.
This commit is contained in:
Przemysław Witek 2019-04-10 15:43:35 +02:00 committed by GitHub
parent 799541e068
commit f5014ace64
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 142 additions and 14 deletions

View File

@ -59,6 +59,10 @@ public class PutJobAction extends Action<PutJobAction.Response> {
public Request(Job.Builder jobBuilder) {
// Validate the jobBuilder immediately so that errors can be detected prior to transportation.
jobBuilder.validateInputFields();
// Validate that detector configs are unique.
// This validation logically belongs to validateInputFields call but we perform it only for PUT action to avoid BWC issues which
// would occur when parsing an old job config that already had duplicate detectors.
jobBuilder.validateDetectorsAreUnique();
// Some fields cannot be set at create time
List<String> invalidJobCreationSettings = jobBuilder.invalidCreateTimeSettings();

View File

@ -49,19 +49,24 @@ public class ValidateJobConfigAction extends Action<AcknowledgedResponse> {
private Job job;
public static Request parseRequest(XContentParser parser) {
Job.Builder job = Job.STRICT_PARSER.apply(parser, null);
Job.Builder jobBuilder = Job.STRICT_PARSER.apply(parser, null);
// When jobs are PUT their ID must be supplied in the URL - assume this will
// be valid unless an invalid job ID is specified in the JSON to be validated
job.setId(job.getId() != null ? job.getId() : "ok");
jobBuilder.setId(jobBuilder.getId() != null ? jobBuilder.getId() : "ok");
// Validate that detector configs are unique.
// This validation logically belongs to validateInputFields call but we perform it only for PUT action to avoid BWC issues which
// would occur when parsing an old job config that already had duplicate detectors.
jobBuilder.validateDetectorsAreUnique();
// Some fields cannot be set at create time
List<String> invalidJobCreationSettings = job.invalidCreateTimeSettings();
List<String> invalidJobCreationSettings = jobBuilder.invalidCreateTimeSettings();
if (invalidJobCreationSettings.isEmpty() == false) {
throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_INVALID_CREATE_SETTINGS,
String.join(",", invalidJobCreationSettings)));
}
return new Request(job.build(new Date()));
return new Request(jobBuilder.build(new Date()));
}
public Request() {

View File

@ -489,44 +489,54 @@ public class Detector implements ToXContentObject, Writeable {
this.fieldName = fieldName;
}
public void setDetectorDescription(String detectorDescription) {
public Builder setDetectorDescription(String detectorDescription) {
this.detectorDescription = detectorDescription;
return this;
}
public void setFunction(String function) {
public Builder setFunction(String function) {
this.function = DetectorFunction.fromString(function);
return this;
}
public void setFieldName(String fieldName) {
public Builder setFieldName(String fieldName) {
this.fieldName = fieldName;
return this;
}
public void setByFieldName(String byFieldName) {
public Builder setByFieldName(String byFieldName) {
this.byFieldName = byFieldName;
return this;
}
public void setOverFieldName(String overFieldName) {
public Builder setOverFieldName(String overFieldName) {
this.overFieldName = overFieldName;
return this;
}
public void setPartitionFieldName(String partitionFieldName) {
public Builder setPartitionFieldName(String partitionFieldName) {
this.partitionFieldName = partitionFieldName;
return this;
}
public void setUseNull(boolean useNull) {
public Builder setUseNull(boolean useNull) {
this.useNull = useNull;
return this;
}
public void setExcludeFrequent(ExcludeFrequent excludeFrequent) {
public Builder setExcludeFrequent(ExcludeFrequent excludeFrequent) {
this.excludeFrequent = excludeFrequent;
return this;
}
public void setRules(List<DetectionRule> rules) {
public Builder setRules(List<DetectionRule> rules) {
this.rules = rules;
return this;
}
public void setDetectorIndex(int detectorIndex) {
public Builder setDetectorIndex(int detectorIndex) {
this.detectorIndex = detectorIndex;
return this;
}
public Detector build() {

View File

@ -1067,6 +1067,21 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContentO
}
}
/**
* Validates that the Detector configs are unique up to detectorIndex field (which is ignored).
*/
public void validateDetectorsAreUnique() {
Set<Detector> canonicalDetectors = new HashSet<>();
for (Detector detector : this.analysisConfig.getDetectors()) {
// While testing for equality, ignore detectorIndex field as this field is auto-generated.
Detector canonicalDetector = new Detector.Builder(detector).setDetectorIndex(0).build();
if (canonicalDetectors.add(canonicalDetector) == false) {
throw new IllegalArgumentException(
Messages.getMessage(Messages.JOB_CONFIG_DUPLICATE_DETECTORS_DISALLOWED, detector.getDetectorDescription()));
}
}
}
/**
* Builds a job with the given {@code createTime} and the current version.
* This should be used when a new job is created as opposed to {@link #build()}.

View File

@ -147,6 +147,8 @@ public final class Messages {
public static final String JOB_CONFIG_UPDATE_ANALYSIS_LIMITS_MODEL_MEMORY_LIMIT_CANNOT_BE_DECREASED =
"Invalid update value for analysis_limits: model_memory_limit cannot be decreased below current usage; " +
"current usage [{0}], update had [{1}]";
public static final String JOB_CONFIG_DUPLICATE_DETECTORS_DISALLOWED =
"Duplicate detectors are not allowed: [{0}]";
public static final String JOB_CONFIG_DETECTOR_DUPLICATE_FIELD_NAME =
"{0} and {1} cannot be the same: ''{2}''";
public static final String JOB_CONFIG_DETECTOR_COUNT_DISALLOWED =

View File

@ -513,6 +513,20 @@ public class JobTests extends AbstractSerializingTestCase<Job> {
assertEquals(e.getMessage(), "job and group names must be unique but job [foo] and group [foo] have the same name");
}
public void testInvalidAnalysisConfig_duplicateDetectors() throws Exception {
Job.Builder builder =
new Job.Builder("job_with_duplicate_detectors")
.setCreateTime(new Date())
.setDataDescription(new DataDescription.Builder())
.setAnalysisConfig(new AnalysisConfig.Builder(Arrays.asList(
new Detector.Builder("mean", "responsetime").build(),
new Detector.Builder("mean", "responsetime").build()
)));
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, builder::validateDetectorsAreUnique);
assertThat(e.getMessage(), containsString("Duplicate detectors are not allowed: [mean(responsetime)]"));
}
public void testEarliestValidTimestamp_GivenEmptyDataCounts() {
assertThat(createRandomizedJob().earliestValidTimestamp(new DataCounts("foo")), equalTo(0L));
}

View File

@ -57,6 +57,7 @@ integTestRunner {
'ml/jobs_crud/Test put job after closing state index',
'ml/jobs_crud/Test put job with inconsistent body/param ids',
'ml/jobs_crud/Test put job with time field in analysis_config',
'ml/jobs_crud/Test put job with duplicate detector configurations',
'ml/jobs_crud/Test job with categorization_analyzer and categorization_filters',
'ml/jobs_get/Test get job given missing job_id',
'ml/jobs_get_result_buckets/Test mutually-exclusive params',
@ -91,6 +92,7 @@ integTestRunner {
'ml/validate/Test invalid job config',
'ml/validate/Test job config is invalid because model snapshot id set',
'ml/validate/Test job config that is invalid only because of the job ID',
'ml/validate/Test job config with duplicate detector configurations',
'ml/validate_detector/Test invalid detector',
'ml/delete_forecast/Test delete on _all forecasts not allow no forecasts',
'ml/delete_forecast/Test delete forecast on missing forecast',

View File

@ -1023,6 +1023,45 @@
"data_description" : {}
}
---
"Test put job with duplicate detector configurations":
- do:
catch: /illegal_argument_exception.*Duplicate detectors are not allowed/
ml.put_job:
job_id: jobs-crud-duplicate-detectors
body: >
{
"analysis_config": {
"bucket_span": "1h",
"detectors": [
{"function":"max", "field_name":"responsetime"},
{"function":"max", "field_name":"responsetime"}
]
},
"data_description": {
"time_field": "@timestamp"
}
}
- do:
catch: /illegal_argument_exception.*Duplicate detectors are not allowed/
ml.put_job:
job_id: jobs-crud-duplicate-detectors-with-explicit-indices
body: >
{
"analysis_config": {
"bucket_span": "1h",
"detectors": [
{"function":"max", "field_name":"responsetime", "detector_index": 0},
{"function":"max", "field_name":"responsetime", "detector_index": 1}
]
},
"data_description": {
"time_field": "@timestamp"
}
}
---
"Test put job after closing results index":

View File

@ -106,3 +106,40 @@
"data_description" : {
}
}
---
"Test job config with duplicate detector configurations":
- do:
catch: /illegal_argument_exception.*Duplicate detectors are not allowed/
ml.validate:
body: >
{
"analysis_config": {
"bucket_span": "1h",
"detectors": [
{"function":"max", "field_name":"responsetime"},
{"function":"max", "field_name":"responsetime"}
]
},
"data_description": {
"time_field": "@timestamp"
}
}
- do:
catch: /illegal_argument_exception.*Duplicate detectors are not allowed/
ml.validate:
body: >
{
"analysis_config": {
"bucket_span": "1h",
"detectors": [
{"function":"max", "field_name":"responsetime", "detector_index": 0},
{"function":"max", "field_name":"responsetime", "detector_index": 1}
]
},
"data_description": {
"time_field": "@timestamp"
}
}