[ML] Missing validations in analysis config (elastic/x-pack-elasticsearch#2103)

* Don’t set detector field names to empty strings
* Check summary count field and categorisation field names are not empty strings
* Check a detector has a by field when using multivariate by fields

Original commit: elastic/x-pack-elasticsearch@461be12f9f
This commit is contained in:
David Kyle 2017-08-01 15:54:46 +01:00 committed by GitHub
parent 8487e1e0fe
commit 466d421abe
7 changed files with 96 additions and 37 deletions

View File

@ -530,6 +530,8 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
* <li>Check that MULTIPLE_BUCKETSPANS are set appropriately</li>
* <li>If Per Partition normalization is configured at least one detector
* must have a partition field and no influences can be used</li>
* <li>Field names cannot be empty strings</li>
* <li>If multivariate by fields are enabled a detector must have a by field</li>
* </ol>
*/
public AnalysisConfig build() {
@ -559,6 +561,12 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
verifyNoInconsistentNestedFieldNames();
verifyInfluencerNames();
if (multivariateByFields != null && multivariateByFields) {
if (aDetectorHasAByField(detectors) == false) {
throw ExceptionsHelper.badRequestException(Messages.JOB_CONFIG_MULTIVARIATE_REQUIRES_BY_FIELD);
}
}
return new AnalysisConfig(bucketSpan, categorizationFieldName, categorizationFilters,
latency, summaryCountFieldName, detectors, influencers, overlappingBuckets,
resultFinalizationWindow, multivariateByFields, multipleBucketSpans, usePerPartitionNormalization);
@ -677,15 +685,19 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
private void verifyInfluencerNames() {
for (String influencer : influencers) {
if (influencer == null || influencer.isEmpty()) {
throw ExceptionsHelper.badRequestException(
Messages.getMessage(Messages.JOB_CONFIG_INFLUENCER_CANNOT_BE_EMPTY));
}
Detector.Builder.verifyFieldName(influencer);
}
}
private static boolean aDetectorHasAByField(List<Detector> detectors) {
for (Detector detector : detectors) {
if (Strings.hasLength(detector.getByFieldName())) {
return true;
}
}
return false;
}
private static void checkDetectorsHavePartitionFields(List<Detector> detectors) {
for (Detector detector : detectors) {
if (!Strings.isNullOrEmpty(detector.getPartitionFieldName())) {

View File

@ -779,13 +779,19 @@ public class Detector implements ToXContentObject, Writeable {
* @param field The field name to be validated
*/
public static void verifyFieldName(String field) throws ElasticsearchParseException {
if (field != null && containsInvalidChar(field)) {
throw ExceptionsHelper.badRequestException(
Messages.getMessage(Messages.JOB_CONFIG_INVALID_FIELDNAME_CHARS, field, Detector.PROHIBITED));
}
if (RecordWriter.CONTROL_FIELD_NAME.equals(field)) {
throw ExceptionsHelper.badRequestException(
Messages.getMessage(Messages.JOB_CONFIG_INVALID_FIELDNAME, field, RecordWriter.CONTROL_FIELD_NAME));
if (field != null) {
if (field.isEmpty()) {
throw ExceptionsHelper.badRequestException(
Messages.getMessage(Messages.JOB_CONFIG_INFLUENCER_FIELD_NAME_CANNOT_BE_EMPTY));
}
if (containsInvalidChar(field)) {
throw ExceptionsHelper.badRequestException(
Messages.getMessage(Messages.JOB_CONFIG_INVALID_FIELDNAME_CHARS, field, Detector.PROHIBITED));
}
if (RecordWriter.CONTROL_FIELD_NAME.equals(field)) {
throw ExceptionsHelper.badRequestException(
Messages.getMessage(Messages.JOB_CONFIG_INVALID_FIELDNAME, field, RecordWriter.CONTROL_FIELD_NAME));
}
}
}

View File

@ -108,7 +108,7 @@ public final class Messages {
public static final String JOB_CONFIG_FUNCTION_REQUIRES_OVERFIELD = "over_field_name must be set when the ''{0}'' function is used";
public static final String JOB_CONFIG_ID_ALREADY_TAKEN = "The job cannot be created with the Id ''{0}''. The Id is already used.";
public static final String JOB_CONFIG_ID_TOO_LONG = "The job id cannot contain more than {0,number,integer} characters.";
public static final String JOB_CONFIG_INFLUENCER_CANNOT_BE_EMPTY = "Influencer names cannot be empty strings";
public static final String JOB_CONFIG_INFLUENCER_FIELD_NAME_CANNOT_BE_EMPTY = "Influencers and field names cannot be empty strings";
public static final String JOB_CONFIG_INVALID_CREATE_SETTINGS =
"The job is configured with fields [{0}] that are illegal to set at job creation";
public static final String JOB_CONFIG_INVALID_FIELDNAME_CHARS =
@ -120,6 +120,8 @@ public final class Messages {
public static final String JOB_CONFIG_MISSING_DATA_DESCRIPTION = "A data_description must be set";
public static final String JOB_CONFIG_MULTIPLE_BUCKETSPANS_MUST_BE_MULTIPLE =
"Multiple bucket_span ''{0}'' must be a multiple of the main bucket_span ''{1}''";
public static final String JOB_CONFIG_MULTIVARIATE_REQUIRES_BY_FIELD =
"The multivariate_by_fields setting requires a detector with a by field";
public static final String JOB_CONFIG_NO_ANALYSIS_FIELD_NOT_COUNT =
"Unless the function is 'count' one of field_name, by_field_name or over_field_name must be set";
public static final String JOB_CONFIG_NO_DETECTORS = "No detectors configured";

View File

@ -86,7 +86,6 @@ public class DatafeedJobValidatorTests extends ESTestCase {
DatafeedConfig.DOC_COUNT);
Job.Builder builder = buildJobBuilder("foo");
AnalysisConfig.Builder ac = createAnalysisConfig();
ac.setSummaryCountFieldName("");
ac.setBucketSpan(TimeValue.timeValueSeconds(1800));
builder.setAnalysisConfig(ac);
Job job = builder.build(new Date());

View File

@ -6,6 +6,7 @@
package org.elasticsearch.xpack.ml.job.config;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ElasticsearchStatusException;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentParser;
@ -39,6 +40,9 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
Detector.Builder builder = new Detector.Builder("count", null);
builder.setPartitionFieldName(isCategorization ? "mlcategory" : "part");
detectors.add(builder.build());
builder = new Detector.Builder("avg", "foo");
builder.setByFieldName("by_field");
detectors.add(builder.build());
}
AnalysisConfig.Builder builder = new AnalysisConfig.Builder(detectors);
@ -428,19 +432,6 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
assertFalse(config2.equals(config1));
}
public void testEquals_GivenMultivariateByField() {
AnalysisConfig.Builder builder = createConfigBuilder();
builder.setMultivariateByFields(true);
AnalysisConfig config1 = builder.build();
builder = createConfigBuilder();
builder.setMultivariateByFields(false);
AnalysisConfig config2 = builder.build();
assertFalse(config1.equals(config2));
assertFalse(config2.equals(config1));
}
public void testEquals_GivenDifferentCategorizationFilters() {
AnalysisConfig.Builder configBuilder1 = createValidCategorizationConfig();
AnalysisConfig.Builder configBuilder2 = createValidCategorizationConfig();
@ -797,13 +788,38 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
config.setInfluencers(Arrays.asList("inf1", ""));
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, config::build);
assertEquals("Influencer names cannot be empty strings", e.getMessage());
assertEquals("Influencers and field names cannot be empty strings", e.getMessage());
config.setInfluencers(Arrays.asList("invalid\\backslash"));
e = ESTestCase.expectThrows(ElasticsearchException.class, config::build);
assertEquals("Invalid field name 'invalid\\backslash'. Field names cannot contain any of these characters: \",\\", e.getMessage());
}
public void testFieldsCannotBeEmptyStrings() {
AnalysisConfig.Builder config = createValidConfig();
config.setSummaryCountFieldName("");
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, config::build);
assertEquals("Influencers and field names cannot be empty strings", e.getMessage());
config.setSummaryCountFieldName(null);
config.setCategorizationFieldName("");
e = ESTestCase.expectThrows(ElasticsearchException.class, config::build);
assertEquals("Influencers and field names cannot be empty strings", e.getMessage());
}
public void testMultiVariateByFieldsSetting() {
Detector detector = new Detector.Builder("count", null).build();
AnalysisConfig.Builder config = new AnalysisConfig.Builder(Collections.singletonList(detector));
config.setMultivariateByFields(true);
expectThrows(ElasticsearchStatusException.class, config::build);
Detector.Builder detectorWithByField = new Detector.Builder("avg", "foo");
detectorWithByField.setByFieldName("bar");
config.setDetectors(Collections.singletonList(detectorWithByField.build()));
config.build();
}
public void testVerify_GivenCategorizationFiltersContainInvalidRegex() {
AnalysisConfig.Builder config = createValidCategorizationConfig();
config.setCategorizationFilters(Arrays.asList("foo", "("));

View File

@ -6,6 +6,7 @@
package org.elasticsearch.xpack.ml.job.config;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ElasticsearchStatusException;
import org.elasticsearch.common.io.stream.Writeable.Reader;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.AbstractSerializingTestCase;
@ -218,6 +219,28 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
}
}
public void testEmptyFieldValuesAreNotSet() {
Detector.Builder builder = new Detector.Builder("avg", "");
expectThrows(ElasticsearchStatusException.class, builder::build);
builder.setFieldName("fname");
builder.build();
builder.setByFieldName("");
expectThrows(ElasticsearchStatusException.class, builder::build);
builder.setByFieldName("bname");
builder.build();
builder.setOverFieldName("");
expectThrows(ElasticsearchStatusException.class, builder::build);
builder.setOverFieldName("oname");
builder.build();
builder.setPartitionFieldName("");
expectThrows(ElasticsearchStatusException.class, builder::build);
builder.setPartitionFieldName("pname");
builder.build();
}
public void testVerifyFunctionForPreSummariedInput() {
Collection<Object[]> testCaseArguments = getCharactersAndValidity();
for (Object [] args : testCaseArguments) {
@ -536,7 +559,7 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
}
public void testVerify_GivenSameByAndPartition() {
Detector.Builder detector = new Detector.Builder("count", "");
Detector.Builder detector = new Detector.Builder("count", null);
detector.setByFieldName("x");
detector.setPartitionFieldName("x");
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, detector::build);
@ -545,7 +568,7 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
}
public void testVerify_GivenSameByAndOver() {
Detector.Builder detector = new Detector.Builder("count", "");
Detector.Builder detector = new Detector.Builder("count", null);
detector.setByFieldName("x");
detector.setOverFieldName("x");
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, detector::build);
@ -554,7 +577,7 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
}
public void testVerify_GivenSameOverAndPartition() {
Detector.Builder detector = new Detector.Builder("count", "");
Detector.Builder detector = new Detector.Builder("count", null);
detector.setOverFieldName("x");
detector.setPartitionFieldName("x");
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, detector::build);
@ -563,7 +586,7 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
}
public void testVerify_GivenByIsCount() {
Detector.Builder detector = new Detector.Builder("count", "");
Detector.Builder detector = new Detector.Builder("count", null);
detector.setByFieldName("count");
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, detector::build);
@ -571,7 +594,7 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
}
public void testVerify_GivenOverIsCount() {
Detector.Builder detector = new Detector.Builder("count", "");
Detector.Builder detector = new Detector.Builder("count", null);
detector.setOverFieldName("count");
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, detector::build);
@ -579,7 +602,7 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
}
public void testVerify_GivenByIsBy() {
Detector.Builder detector = new Detector.Builder("count", "");
Detector.Builder detector = new Detector.Builder("count", null);
detector.setByFieldName("by");
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, detector::build);
@ -587,7 +610,7 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
}
public void testVerify_GivenOverIsBy() {
Detector.Builder detector = new Detector.Builder("count", "");
Detector.Builder detector = new Detector.Builder("count", null);
detector.setOverFieldName("by");
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, detector::build);
@ -595,7 +618,7 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
}
public void testVerify_GivenByIsOver() {
Detector.Builder detector = new Detector.Builder("count", "");
Detector.Builder detector = new Detector.Builder("count", null);
detector.setByFieldName("over");
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, detector::build);
@ -603,7 +626,7 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
}
public void testVerify_GivenOverIsOver() {
Detector.Builder detector = new Detector.Builder("count", "");
Detector.Builder detector = new Detector.Builder("count", null);
detector.setOverFieldName("over");
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, detector::build);

View File

@ -35,6 +35,7 @@ public class ProcessCtrlTests extends ESTestCase {
Job.Builder job = buildJobBuilder("unit-test-job");
Detector.Builder detectorBuilder = new Detector.Builder("mean", "value");
detectorBuilder.setByFieldName("byField");
detectorBuilder.setPartitionFieldName("foo");
AnalysisConfig.Builder acBuilder = new AnalysisConfig.Builder(Collections.singletonList(detectorBuilder.build()));
acBuilder.setBucketSpan(TimeValue.timeValueSeconds(120));