[ML] Allow categorical conditions to use all analysis fields (elastic/x-pack-elasticsearch#3615)

Categorical conditions should be allowed to use any of the analysis
fields as a field_name.

Original commit: elastic/x-pack-elasticsearch@a0d25e7445
This commit is contained in:
Dimitris Athanasiou 2018-01-18 15:48:56 +00:00 committed by GitHub
parent b785f9c61b
commit eefd8e7940
3 changed files with 50 additions and 5 deletions

View File

@ -767,8 +767,7 @@ public class Detector implements ToXContentObject, Writeable {
}
public List<String> extractAnalysisFields() {
List<String> analysisFields = Arrays.asList(byFieldName,
overFieldName, partitionFieldName);
List<String> analysisFields = Arrays.asList(byFieldName, overFieldName, partitionFieldName);
return analysisFields.stream().filter(item -> item != null).collect(Collectors.toList());
}
@ -800,8 +799,21 @@ public class Detector implements ToXContentObject, Writeable {
private void checkScoping(DetectionRule rule) throws ElasticsearchParseException {
String targetFieldName = rule.getTargetFieldName();
checkTargetFieldNameIsValid(extractAnalysisFields(), targetFieldName);
List<String> validOptions = getValidFieldNameOptions(rule);
for (RuleCondition condition : rule.getConditions()) {
List<String> validOptions = Collections.emptyList();
switch (condition.getType()) {
case CATEGORICAL:
validOptions = extractAnalysisFields();
break;
case NUMERICAL_ACTUAL:
case NUMERICAL_TYPICAL:
case NUMERICAL_DIFF_ABS:
validOptions = getValidFieldNameOptionsForNumeric(rule);
break;
case TIME:
default:
break;
}
if (!validOptions.contains(condition.getFieldName())) {
String msg = Messages.getMessage(Messages.JOB_CONFIG_DETECTION_RULE_CONDITION_INVALID_FIELD_NAME, validOptions,
condition.getFieldName());
@ -819,7 +831,7 @@ public class Detector implements ToXContentObject, Writeable {
}
}
private List<String> getValidFieldNameOptions(DetectionRule rule) {
private List<String> getValidFieldNameOptionsForNumeric(DetectionRule rule) {
List<String> result = new ArrayList<>();
if (overFieldName != null) {
result.add(byFieldName == null ? overFieldName : byFieldName);

View File

@ -535,6 +535,38 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
detector.build();
}
public void testVerify_GivenCategoricalRuleOnAllPartitioningFields() {
Detector.Builder detector = new Detector.Builder("count", null);
detector.setPartitionFieldName("my_partition");
detector.setOverFieldName("my_over");
detector.setByFieldName("my_by");
DetectionRule rule = new DetectionRule.Builder(Arrays.asList(
RuleCondition.createCategorical("my_partition", "my_filter_id"),
RuleCondition.createCategorical("my_over", "my_filter_id"),
RuleCondition.createCategorical("my_by", "my_filter_id")
)).build();
detector.setRules(Collections.singletonList(rule));
detector.build();
}
public void testVerify_GivenCategoricalRuleOnInvalidField() {
Detector.Builder detector = new Detector.Builder("mean", "my_metric");
detector.setPartitionFieldName("my_partition");
detector.setOverFieldName("my_over");
detector.setByFieldName("my_by");
DetectionRule rule = new DetectionRule.Builder(Collections.singletonList(
RuleCondition.createCategorical("my_metric", "my_filter_id")
)).build();
detector.setRules(Collections.singletonList(rule));
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, detector::build);
assertEquals(Messages.getMessage(Messages.JOB_CONFIG_DETECTION_RULE_CONDITION_INVALID_FIELD_NAME,
"[my_by, my_over, my_partition]", "my_metric"),
e.getMessage());
}
public void testVerify_GivenSameByAndPartition() {
Detector.Builder detector = new Detector.Builder("count", "");
detector.setByFieldName("x");

View File

@ -151,12 +151,13 @@ setup:
"description":"Analysis of response time by airline",
"analysis_config" : {
"bucket_span": "3600s",
"detectors" :[{"function":"mean","field_name":"airline",
"detectors" :[{"function":"mean","field_name":"responsetime", "by_field_name": "airline",
"rules": [
{
"conditions": [
{
"type": "categorical",
"field_name": "airline",
"filter_id": "filter-foo"
}
]