[ML] Missing validations in analysis config (elastic/x-pack-elasticsearch#2103)
* Don’t set detector field names to empty strings * Check summary count field and categorisation field names are not empty strings * Check a detector has a by field when using multivariate by fields Original commit: elastic/x-pack-elasticsearch@461be12f9f
This commit is contained in:
parent
8487e1e0fe
commit
466d421abe
|
@ -530,6 +530,8 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
|
|||
* <li>Check that MULTIPLE_BUCKETSPANS are set appropriately</li>
|
||||
* <li>If Per Partition normalization is configured at least one detector
|
||||
* must have a partition field and no influences can be used</li>
|
||||
* <li>Field names cannot be empty strings</li>
|
||||
* <li>If multivariate by fields are enabled a detector must have a by field</li>
|
||||
* </ol>
|
||||
*/
|
||||
public AnalysisConfig build() {
|
||||
|
@ -559,6 +561,12 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
|
|||
verifyNoInconsistentNestedFieldNames();
|
||||
verifyInfluencerNames();
|
||||
|
||||
if (multivariateByFields != null && multivariateByFields) {
|
||||
if (aDetectorHasAByField(detectors) == false) {
|
||||
throw ExceptionsHelper.badRequestException(Messages.JOB_CONFIG_MULTIVARIATE_REQUIRES_BY_FIELD);
|
||||
}
|
||||
}
|
||||
|
||||
return new AnalysisConfig(bucketSpan, categorizationFieldName, categorizationFilters,
|
||||
latency, summaryCountFieldName, detectors, influencers, overlappingBuckets,
|
||||
resultFinalizationWindow, multivariateByFields, multipleBucketSpans, usePerPartitionNormalization);
|
||||
|
@ -677,15 +685,19 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
|
|||
|
||||
private void verifyInfluencerNames() {
|
||||
for (String influencer : influencers) {
|
||||
if (influencer == null || influencer.isEmpty()) {
|
||||
throw ExceptionsHelper.badRequestException(
|
||||
Messages.getMessage(Messages.JOB_CONFIG_INFLUENCER_CANNOT_BE_EMPTY));
|
||||
}
|
||||
|
||||
Detector.Builder.verifyFieldName(influencer);
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean aDetectorHasAByField(List<Detector> detectors) {
|
||||
for (Detector detector : detectors) {
|
||||
if (Strings.hasLength(detector.getByFieldName())) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
private static void checkDetectorsHavePartitionFields(List<Detector> detectors) {
|
||||
for (Detector detector : detectors) {
|
||||
if (!Strings.isNullOrEmpty(detector.getPartitionFieldName())) {
|
||||
|
|
|
@ -779,13 +779,19 @@ public class Detector implements ToXContentObject, Writeable {
|
|||
* @param field The field name to be validated
|
||||
*/
|
||||
public static void verifyFieldName(String field) throws ElasticsearchParseException {
|
||||
if (field != null && containsInvalidChar(field)) {
|
||||
throw ExceptionsHelper.badRequestException(
|
||||
Messages.getMessage(Messages.JOB_CONFIG_INVALID_FIELDNAME_CHARS, field, Detector.PROHIBITED));
|
||||
}
|
||||
if (RecordWriter.CONTROL_FIELD_NAME.equals(field)) {
|
||||
throw ExceptionsHelper.badRequestException(
|
||||
Messages.getMessage(Messages.JOB_CONFIG_INVALID_FIELDNAME, field, RecordWriter.CONTROL_FIELD_NAME));
|
||||
if (field != null) {
|
||||
if (field.isEmpty()) {
|
||||
throw ExceptionsHelper.badRequestException(
|
||||
Messages.getMessage(Messages.JOB_CONFIG_INFLUENCER_FIELD_NAME_CANNOT_BE_EMPTY));
|
||||
}
|
||||
if (containsInvalidChar(field)) {
|
||||
throw ExceptionsHelper.badRequestException(
|
||||
Messages.getMessage(Messages.JOB_CONFIG_INVALID_FIELDNAME_CHARS, field, Detector.PROHIBITED));
|
||||
}
|
||||
if (RecordWriter.CONTROL_FIELD_NAME.equals(field)) {
|
||||
throw ExceptionsHelper.badRequestException(
|
||||
Messages.getMessage(Messages.JOB_CONFIG_INVALID_FIELDNAME, field, RecordWriter.CONTROL_FIELD_NAME));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -108,7 +108,7 @@ public final class Messages {
|
|||
public static final String JOB_CONFIG_FUNCTION_REQUIRES_OVERFIELD = "over_field_name must be set when the ''{0}'' function is used";
|
||||
public static final String JOB_CONFIG_ID_ALREADY_TAKEN = "The job cannot be created with the Id ''{0}''. The Id is already used.";
|
||||
public static final String JOB_CONFIG_ID_TOO_LONG = "The job id cannot contain more than {0,number,integer} characters.";
|
||||
public static final String JOB_CONFIG_INFLUENCER_CANNOT_BE_EMPTY = "Influencer names cannot be empty strings";
|
||||
public static final String JOB_CONFIG_INFLUENCER_FIELD_NAME_CANNOT_BE_EMPTY = "Influencers and field names cannot be empty strings";
|
||||
public static final String JOB_CONFIG_INVALID_CREATE_SETTINGS =
|
||||
"The job is configured with fields [{0}] that are illegal to set at job creation";
|
||||
public static final String JOB_CONFIG_INVALID_FIELDNAME_CHARS =
|
||||
|
@ -120,6 +120,8 @@ public final class Messages {
|
|||
public static final String JOB_CONFIG_MISSING_DATA_DESCRIPTION = "A data_description must be set";
|
||||
public static final String JOB_CONFIG_MULTIPLE_BUCKETSPANS_MUST_BE_MULTIPLE =
|
||||
"Multiple bucket_span ''{0}'' must be a multiple of the main bucket_span ''{1}''";
|
||||
public static final String JOB_CONFIG_MULTIVARIATE_REQUIRES_BY_FIELD =
|
||||
"The multivariate_by_fields setting requires a detector with a by field";
|
||||
public static final String JOB_CONFIG_NO_ANALYSIS_FIELD_NOT_COUNT =
|
||||
"Unless the function is 'count' one of field_name, by_field_name or over_field_name must be set";
|
||||
public static final String JOB_CONFIG_NO_DETECTORS = "No detectors configured";
|
||||
|
|
|
@ -86,7 +86,6 @@ public class DatafeedJobValidatorTests extends ESTestCase {
|
|||
DatafeedConfig.DOC_COUNT);
|
||||
Job.Builder builder = buildJobBuilder("foo");
|
||||
AnalysisConfig.Builder ac = createAnalysisConfig();
|
||||
ac.setSummaryCountFieldName("");
|
||||
ac.setBucketSpan(TimeValue.timeValueSeconds(1800));
|
||||
builder.setAnalysisConfig(ac);
|
||||
Job job = builder.build(new Date());
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
package org.elasticsearch.xpack.ml.job.config;
|
||||
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.ElasticsearchStatusException;
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
import org.elasticsearch.common.unit.TimeValue;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
|
@ -39,6 +40,9 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
|
|||
Detector.Builder builder = new Detector.Builder("count", null);
|
||||
builder.setPartitionFieldName(isCategorization ? "mlcategory" : "part");
|
||||
detectors.add(builder.build());
|
||||
builder = new Detector.Builder("avg", "foo");
|
||||
builder.setByFieldName("by_field");
|
||||
detectors.add(builder.build());
|
||||
}
|
||||
AnalysisConfig.Builder builder = new AnalysisConfig.Builder(detectors);
|
||||
|
||||
|
@ -428,19 +432,6 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
|
|||
assertFalse(config2.equals(config1));
|
||||
}
|
||||
|
||||
public void testEquals_GivenMultivariateByField() {
|
||||
AnalysisConfig.Builder builder = createConfigBuilder();
|
||||
builder.setMultivariateByFields(true);
|
||||
AnalysisConfig config1 = builder.build();
|
||||
|
||||
builder = createConfigBuilder();
|
||||
builder.setMultivariateByFields(false);
|
||||
AnalysisConfig config2 = builder.build();
|
||||
|
||||
assertFalse(config1.equals(config2));
|
||||
assertFalse(config2.equals(config1));
|
||||
}
|
||||
|
||||
public void testEquals_GivenDifferentCategorizationFilters() {
|
||||
AnalysisConfig.Builder configBuilder1 = createValidCategorizationConfig();
|
||||
AnalysisConfig.Builder configBuilder2 = createValidCategorizationConfig();
|
||||
|
@ -797,13 +788,38 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
|
|||
|
||||
config.setInfluencers(Arrays.asList("inf1", ""));
|
||||
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, config::build);
|
||||
assertEquals("Influencer names cannot be empty strings", e.getMessage());
|
||||
assertEquals("Influencers and field names cannot be empty strings", e.getMessage());
|
||||
|
||||
config.setInfluencers(Arrays.asList("invalid\\backslash"));
|
||||
e = ESTestCase.expectThrows(ElasticsearchException.class, config::build);
|
||||
assertEquals("Invalid field name 'invalid\\backslash'. Field names cannot contain any of these characters: \",\\", e.getMessage());
|
||||
}
|
||||
|
||||
public void testFieldsCannotBeEmptyStrings() {
|
||||
AnalysisConfig.Builder config = createValidConfig();
|
||||
|
||||
config.setSummaryCountFieldName("");
|
||||
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, config::build);
|
||||
assertEquals("Influencers and field names cannot be empty strings", e.getMessage());
|
||||
|
||||
config.setSummaryCountFieldName(null);
|
||||
config.setCategorizationFieldName("");
|
||||
e = ESTestCase.expectThrows(ElasticsearchException.class, config::build);
|
||||
assertEquals("Influencers and field names cannot be empty strings", e.getMessage());
|
||||
}
|
||||
|
||||
public void testMultiVariateByFieldsSetting() {
|
||||
Detector detector = new Detector.Builder("count", null).build();
|
||||
AnalysisConfig.Builder config = new AnalysisConfig.Builder(Collections.singletonList(detector));
|
||||
config.setMultivariateByFields(true);
|
||||
expectThrows(ElasticsearchStatusException.class, config::build);
|
||||
|
||||
Detector.Builder detectorWithByField = new Detector.Builder("avg", "foo");
|
||||
detectorWithByField.setByFieldName("bar");
|
||||
config.setDetectors(Collections.singletonList(detectorWithByField.build()));
|
||||
config.build();
|
||||
}
|
||||
|
||||
public void testVerify_GivenCategorizationFiltersContainInvalidRegex() {
|
||||
AnalysisConfig.Builder config = createValidCategorizationConfig();
|
||||
config.setCategorizationFilters(Arrays.asList("foo", "("));
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
package org.elasticsearch.xpack.ml.job.config;
|
||||
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.ElasticsearchStatusException;
|
||||
import org.elasticsearch.common.io.stream.Writeable.Reader;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.test.AbstractSerializingTestCase;
|
||||
|
@ -218,6 +219,28 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
|
|||
}
|
||||
}
|
||||
|
||||
public void testEmptyFieldValuesAreNotSet() {
|
||||
Detector.Builder builder = new Detector.Builder("avg", "");
|
||||
expectThrows(ElasticsearchStatusException.class, builder::build);
|
||||
builder.setFieldName("fname");
|
||||
builder.build();
|
||||
|
||||
builder.setByFieldName("");
|
||||
expectThrows(ElasticsearchStatusException.class, builder::build);
|
||||
builder.setByFieldName("bname");
|
||||
builder.build();
|
||||
|
||||
builder.setOverFieldName("");
|
||||
expectThrows(ElasticsearchStatusException.class, builder::build);
|
||||
builder.setOverFieldName("oname");
|
||||
builder.build();
|
||||
|
||||
builder.setPartitionFieldName("");
|
||||
expectThrows(ElasticsearchStatusException.class, builder::build);
|
||||
builder.setPartitionFieldName("pname");
|
||||
builder.build();
|
||||
}
|
||||
|
||||
public void testVerifyFunctionForPreSummariedInput() {
|
||||
Collection<Object[]> testCaseArguments = getCharactersAndValidity();
|
||||
for (Object [] args : testCaseArguments) {
|
||||
|
@ -536,7 +559,7 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
|
|||
}
|
||||
|
||||
public void testVerify_GivenSameByAndPartition() {
|
||||
Detector.Builder detector = new Detector.Builder("count", "");
|
||||
Detector.Builder detector = new Detector.Builder("count", null);
|
||||
detector.setByFieldName("x");
|
||||
detector.setPartitionFieldName("x");
|
||||
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, detector::build);
|
||||
|
@ -545,7 +568,7 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
|
|||
}
|
||||
|
||||
public void testVerify_GivenSameByAndOver() {
|
||||
Detector.Builder detector = new Detector.Builder("count", "");
|
||||
Detector.Builder detector = new Detector.Builder("count", null);
|
||||
detector.setByFieldName("x");
|
||||
detector.setOverFieldName("x");
|
||||
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, detector::build);
|
||||
|
@ -554,7 +577,7 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
|
|||
}
|
||||
|
||||
public void testVerify_GivenSameOverAndPartition() {
|
||||
Detector.Builder detector = new Detector.Builder("count", "");
|
||||
Detector.Builder detector = new Detector.Builder("count", null);
|
||||
detector.setOverFieldName("x");
|
||||
detector.setPartitionFieldName("x");
|
||||
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, detector::build);
|
||||
|
@ -563,7 +586,7 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
|
|||
}
|
||||
|
||||
public void testVerify_GivenByIsCount() {
|
||||
Detector.Builder detector = new Detector.Builder("count", "");
|
||||
Detector.Builder detector = new Detector.Builder("count", null);
|
||||
detector.setByFieldName("count");
|
||||
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, detector::build);
|
||||
|
||||
|
@ -571,7 +594,7 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
|
|||
}
|
||||
|
||||
public void testVerify_GivenOverIsCount() {
|
||||
Detector.Builder detector = new Detector.Builder("count", "");
|
||||
Detector.Builder detector = new Detector.Builder("count", null);
|
||||
detector.setOverFieldName("count");
|
||||
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, detector::build);
|
||||
|
||||
|
@ -579,7 +602,7 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
|
|||
}
|
||||
|
||||
public void testVerify_GivenByIsBy() {
|
||||
Detector.Builder detector = new Detector.Builder("count", "");
|
||||
Detector.Builder detector = new Detector.Builder("count", null);
|
||||
detector.setByFieldName("by");
|
||||
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, detector::build);
|
||||
|
||||
|
@ -587,7 +610,7 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
|
|||
}
|
||||
|
||||
public void testVerify_GivenOverIsBy() {
|
||||
Detector.Builder detector = new Detector.Builder("count", "");
|
||||
Detector.Builder detector = new Detector.Builder("count", null);
|
||||
detector.setOverFieldName("by");
|
||||
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, detector::build);
|
||||
|
||||
|
@ -595,7 +618,7 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
|
|||
}
|
||||
|
||||
public void testVerify_GivenByIsOver() {
|
||||
Detector.Builder detector = new Detector.Builder("count", "");
|
||||
Detector.Builder detector = new Detector.Builder("count", null);
|
||||
detector.setByFieldName("over");
|
||||
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, detector::build);
|
||||
|
||||
|
@ -603,7 +626,7 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
|
|||
}
|
||||
|
||||
public void testVerify_GivenOverIsOver() {
|
||||
Detector.Builder detector = new Detector.Builder("count", "");
|
||||
Detector.Builder detector = new Detector.Builder("count", null);
|
||||
detector.setOverFieldName("over");
|
||||
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, detector::build);
|
||||
|
||||
|
|
|
@ -35,6 +35,7 @@ public class ProcessCtrlTests extends ESTestCase {
|
|||
Job.Builder job = buildJobBuilder("unit-test-job");
|
||||
|
||||
Detector.Builder detectorBuilder = new Detector.Builder("mean", "value");
|
||||
detectorBuilder.setByFieldName("byField");
|
||||
detectorBuilder.setPartitionFieldName("foo");
|
||||
AnalysisConfig.Builder acBuilder = new AnalysisConfig.Builder(Collections.singletonList(detectorBuilder.build()));
|
||||
acBuilder.setBucketSpan(TimeValue.timeValueSeconds(120));
|
||||
|
|
Loading…
Reference in New Issue