[ML] Expand detector function shortcuts (elastic/x-pack-elasticsearch#3789)

relates elastic/x-pack-elasticsearch#3162


Original commit: elastic/x-pack-elasticsearch@a3512c6693
This commit is contained in:
Dimitris Athanasiou 2018-01-31 13:11:32 +00:00 committed by GitHub
parent 71f68d3413
commit ed11dad855
11 changed files with 380 additions and 418 deletions

View File

@ -601,9 +601,9 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
private void verifyNoMetricFunctionsWhenSummaryCountFieldNameIsSet() {
if (Strings.isNullOrEmpty(summaryCountFieldName) == false &&
detectors.stream().anyMatch(d -> Detector.METRIC.equals(d.getFunction()))) {
detectors.stream().anyMatch(d -> DetectorFunction.METRIC.equals(d.getFunction()))) {
throw ExceptionsHelper.badRequestException(
Messages.getMessage(Messages.JOB_CONFIG_FUNCTION_INCOMPATIBLE_PRESUMMARIZED, Detector.METRIC));
Messages.getMessage(Messages.JOB_CONFIG_FUNCTION_INCOMPATIBLE_PRESUMMARIZED, DetectorFunction.METRIC));
}
}
@ -763,7 +763,7 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
// If any detector function is rare/freq_rare, mustn't use overlapping buckets
boolean mustNotUse = false;
List<String> illegalFunctions = new ArrayList<>();
List<DetectorFunction> illegalFunctions = new ArrayList<>();
for (Detector d : detectors) {
if (Detector.NO_OVERLAPPING_BUCKETS_FUNCTIONS.contains(d.getFunction())) {
illegalFunctions.add(d.getFunction());

View File

@ -41,7 +41,7 @@ public final class DefaultDetectorDescription {
* @param sb the {@code StringBuilder} to append to
*/
public static void appendOn(Detector detector, StringBuilder sb) {
if (isNotNullOrEmpty(detector.getFunction())) {
if (isNotNullOrEmpty(detector.getFunction().getFullName())) {
sb.append(detector.getFunction());
if (isNotNullOrEmpty(detector.getFieldName())) {
sb.append('(').append(quoteField(detector.getFieldName()))

View File

@ -27,6 +27,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.EnumMap;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
@ -117,200 +118,96 @@ public class Detector implements ToXContentObject, Writeable {
}
}
public static final String COUNT = "count";
public static final String HIGH_COUNT = "high_count";
public static final String LOW_COUNT = "low_count";
public static final String NON_ZERO_COUNT = "non_zero_count";
public static final String LOW_NON_ZERO_COUNT = "low_non_zero_count";
public static final String HIGH_NON_ZERO_COUNT = "high_non_zero_count";
public static final String NZC = "nzc";
public static final String LOW_NZC = "low_nzc";
public static final String HIGH_NZC = "high_nzc";
public static final String DISTINCT_COUNT = "distinct_count";
public static final String LOW_DISTINCT_COUNT = "low_distinct_count";
public static final String HIGH_DISTINCT_COUNT = "high_distinct_count";
public static final String DC = "dc";
public static final String LOW_DC = "low_dc";
public static final String HIGH_DC = "high_dc";
public static final String RARE = "rare";
public static final String FREQ_RARE = "freq_rare";
public static final String INFO_CONTENT = "info_content";
public static final String LOW_INFO_CONTENT = "low_info_content";
public static final String HIGH_INFO_CONTENT = "high_info_content";
public static final String METRIC = "metric";
public static final String MEAN = "mean";
public static final String MEDIAN = "median";
public static final String LOW_MEDIAN = "low_median";
public static final String HIGH_MEDIAN = "high_median";
public static final String HIGH_MEAN = "high_mean";
public static final String LOW_MEAN = "low_mean";
public static final String AVG = "avg";
public static final String HIGH_AVG = "high_avg";
public static final String LOW_AVG = "low_avg";
public static final String MIN = "min";
public static final String MAX = "max";
public static final String SUM = "sum";
public static final String LOW_SUM = "low_sum";
public static final String HIGH_SUM = "high_sum";
public static final String NON_NULL_SUM = "non_null_sum";
public static final String LOW_NON_NULL_SUM = "low_non_null_sum";
public static final String HIGH_NON_NULL_SUM = "high_non_null_sum";
public static final String BY = "by";
public static final String OVER = "over";
/**
* Population variance is called varp to match Splunk
*/
public static final String POPULATION_VARIANCE = "varp";
public static final String LOW_POPULATION_VARIANCE = "low_varp";
public static final String HIGH_POPULATION_VARIANCE = "high_varp";
public static final String TIME_OF_DAY = "time_of_day";
public static final String TIME_OF_WEEK = "time_of_week";
public static final String LAT_LONG = "lat_long";
/**
* The set of valid function names.
*/
public static final Set<String> ANALYSIS_FUNCTIONS =
new HashSet<>(Arrays.asList(
// The convention here is that synonyms (only) go on the same line
COUNT,
HIGH_COUNT,
LOW_COUNT,
NON_ZERO_COUNT, NZC,
LOW_NON_ZERO_COUNT, LOW_NZC,
HIGH_NON_ZERO_COUNT, HIGH_NZC,
DISTINCT_COUNT, DC,
LOW_DISTINCT_COUNT, LOW_DC,
HIGH_DISTINCT_COUNT, HIGH_DC,
RARE,
FREQ_RARE,
INFO_CONTENT,
LOW_INFO_CONTENT,
HIGH_INFO_CONTENT,
METRIC,
MEAN, AVG,
HIGH_MEAN, HIGH_AVG,
LOW_MEAN, LOW_AVG,
MEDIAN,
LOW_MEDIAN,
HIGH_MEDIAN,
MIN,
MAX,
SUM,
LOW_SUM,
HIGH_SUM,
NON_NULL_SUM,
LOW_NON_NULL_SUM,
HIGH_NON_NULL_SUM,
POPULATION_VARIANCE,
LOW_POPULATION_VARIANCE,
HIGH_POPULATION_VARIANCE,
TIME_OF_DAY,
TIME_OF_WEEK,
LAT_LONG
));
/**
* The set of functions that do not require a field, by field or over field
*/
public static final Set<String> COUNT_WITHOUT_FIELD_FUNCTIONS =
new HashSet<>(Arrays.asList(
COUNT,
HIGH_COUNT,
LOW_COUNT,
NON_ZERO_COUNT, NZC,
LOW_NON_ZERO_COUNT, LOW_NZC,
HIGH_NON_ZERO_COUNT, HIGH_NZC,
TIME_OF_DAY,
TIME_OF_WEEK
));
public static final EnumSet<DetectorFunction> COUNT_WITHOUT_FIELD_FUNCTIONS = EnumSet.of(
DetectorFunction.COUNT,
DetectorFunction.HIGH_COUNT,
DetectorFunction.LOW_COUNT,
DetectorFunction.NON_ZERO_COUNT,
DetectorFunction.LOW_NON_ZERO_COUNT,
DetectorFunction.HIGH_NON_ZERO_COUNT,
DetectorFunction.TIME_OF_DAY,
DetectorFunction.TIME_OF_WEEK
);
/**
* The set of functions that require a fieldname
*/
public static final Set<String> FIELD_NAME_FUNCTIONS =
new HashSet<>(Arrays.asList(
DISTINCT_COUNT, DC,
LOW_DISTINCT_COUNT, LOW_DC,
HIGH_DISTINCT_COUNT, HIGH_DC,
INFO_CONTENT,
LOW_INFO_CONTENT,
HIGH_INFO_CONTENT,
METRIC,
MEAN, AVG,
HIGH_MEAN, HIGH_AVG,
LOW_MEAN, LOW_AVG,
MEDIAN,
LOW_MEDIAN,
HIGH_MEDIAN,
MIN,
MAX,
SUM,
LOW_SUM,
HIGH_SUM,
NON_NULL_SUM,
LOW_NON_NULL_SUM,
HIGH_NON_NULL_SUM,
POPULATION_VARIANCE,
LOW_POPULATION_VARIANCE,
HIGH_POPULATION_VARIANCE,
LAT_LONG
));
public static final EnumSet<DetectorFunction> FIELD_NAME_FUNCTIONS = EnumSet.of(
DetectorFunction.DISTINCT_COUNT,
DetectorFunction.LOW_DISTINCT_COUNT,
DetectorFunction.HIGH_DISTINCT_COUNT,
DetectorFunction.INFO_CONTENT,
DetectorFunction.LOW_INFO_CONTENT,
DetectorFunction.HIGH_INFO_CONTENT,
DetectorFunction.METRIC,
DetectorFunction.MEAN, DetectorFunction.AVG,
DetectorFunction.HIGH_MEAN, DetectorFunction.HIGH_AVG,
DetectorFunction.LOW_MEAN, DetectorFunction.LOW_AVG,
DetectorFunction.MEDIAN,
DetectorFunction.LOW_MEDIAN,
DetectorFunction.HIGH_MEDIAN,
DetectorFunction.MIN,
DetectorFunction.MAX,
DetectorFunction.SUM,
DetectorFunction.LOW_SUM,
DetectorFunction.HIGH_SUM,
DetectorFunction.NON_NULL_SUM,
DetectorFunction.LOW_NON_NULL_SUM,
DetectorFunction.HIGH_NON_NULL_SUM,
DetectorFunction.VARP,
DetectorFunction.LOW_VARP,
DetectorFunction.HIGH_VARP,
DetectorFunction.LAT_LONG
);
/**
* The set of functions that require a by fieldname
*/
public static final Set<String> BY_FIELD_NAME_FUNCTIONS =
new HashSet<>(Arrays.asList(
RARE,
FREQ_RARE
));
public static final EnumSet<DetectorFunction> BY_FIELD_NAME_FUNCTIONS = EnumSet.of(
DetectorFunction.RARE,
DetectorFunction.FREQ_RARE
);
/**
* The set of functions that require a over fieldname
*/
public static final Set<String> OVER_FIELD_NAME_FUNCTIONS =
new HashSet<>(Arrays.asList(
FREQ_RARE
));
/**
* The set of functions that cannot have a by fieldname
*/
public static final Set<String> NO_BY_FIELD_NAME_FUNCTIONS =
new HashSet<>();
public static final EnumSet<DetectorFunction> OVER_FIELD_NAME_FUNCTIONS = EnumSet.of(
DetectorFunction.FREQ_RARE
);
/**
* The set of functions that cannot have an over fieldname
*/
public static final Set<String> NO_OVER_FIELD_NAME_FUNCTIONS =
new HashSet<>(Arrays.asList(
NON_ZERO_COUNT, NZC,
LOW_NON_ZERO_COUNT, LOW_NZC,
HIGH_NON_ZERO_COUNT, HIGH_NZC
));
public static final EnumSet<DetectorFunction> NO_OVER_FIELD_NAME_FUNCTIONS = EnumSet.of(
DetectorFunction.NON_ZERO_COUNT,
DetectorFunction.LOW_NON_ZERO_COUNT,
DetectorFunction.HIGH_NON_ZERO_COUNT
);
/**
* The set of functions that must not be used with overlapping buckets
*/
public static final Set<String> NO_OVERLAPPING_BUCKETS_FUNCTIONS =
new HashSet<>(Arrays.asList(
RARE,
FREQ_RARE
));
public static final EnumSet<DetectorFunction> NO_OVERLAPPING_BUCKETS_FUNCTIONS = EnumSet.of(
DetectorFunction.RARE,
DetectorFunction.FREQ_RARE
);
/**
* The set of functions that should not be used with overlapping buckets
* as they gain no benefit but have overhead
*/
public static final Set<String> OVERLAPPING_BUCKETS_FUNCTIONS_NOT_NEEDED =
new HashSet<>(Arrays.asList(
MIN,
MAX,
TIME_OF_DAY,
TIME_OF_WEEK
));
public static final EnumSet<DetectorFunction> OVERLAPPING_BUCKETS_FUNCTIONS_NOT_NEEDED = EnumSet.of(
DetectorFunction.MIN,
DetectorFunction.MAX,
DetectorFunction.TIME_OF_DAY,
DetectorFunction.TIME_OF_WEEK
);
/**
* field names cannot contain any of these characters
@ -323,7 +220,7 @@ public class Detector implements ToXContentObject, Writeable {
private final String detectorDescription;
private final String function;
private final DetectorFunction function;
private final String fieldName;
private final String byFieldName;
private final String overFieldName;
@ -335,7 +232,7 @@ public class Detector implements ToXContentObject, Writeable {
public Detector(StreamInput in) throws IOException {
detectorDescription = in.readString();
function = in.readString();
function = DetectorFunction.fromString(in.readString());
fieldName = in.readOptionalString();
byFieldName = in.readOptionalString();
overFieldName = in.readOptionalString();
@ -354,7 +251,7 @@ public class Detector implements ToXContentObject, Writeable {
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(detectorDescription);
out.writeString(function);
out.writeString(function.getFullName());
out.writeOptionalString(fieldName);
out.writeOptionalString(byFieldName);
out.writeOptionalString(overFieldName);
@ -406,7 +303,7 @@ public class Detector implements ToXContentObject, Writeable {
return builder;
}
private Detector(String detectorDescription, String function, String fieldName, String byFieldName, String overFieldName,
private Detector(String detectorDescription, DetectorFunction function, String fieldName, String byFieldName, String overFieldName,
String partitionFieldName, boolean useNull, ExcludeFrequent excludeFrequent, List<DetectionRule> rules,
int detectorIndex) {
this.function = function;
@ -426,12 +323,11 @@ public class Detector implements ToXContentObject, Writeable {
}
/**
* The analysis function used e.g. count, rare, min etc. There is no
* validation to check this value is one a predefined set
* The analysis function used e.g. count, rare, min etc.
*
* @return The function or <code>null</code> if not set
*/
public String getFunction() {
public DetectorFunction getFunction() {
return function;
}
@ -577,10 +473,11 @@ public class Detector implements ToXContentObject, Writeable {
* error-prone
* </ul>
*/
static final Set<String> FUNCTIONS_WITHOUT_RULE_SUPPORT = new HashSet<>(Arrays.asList(Detector.LAT_LONG, Detector.METRIC));
static final EnumSet<DetectorFunction> FUNCTIONS_WITHOUT_RULE_SUPPORT = EnumSet.of(
DetectorFunction.LAT_LONG, DetectorFunction.METRIC);
private String detectorDescription;
private String function;
private DetectorFunction function;
private String fieldName;
private String byFieldName;
private String overFieldName;
@ -608,6 +505,10 @@ public class Detector implements ToXContentObject, Writeable {
}
public Builder(String function, String fieldName) {
this(DetectorFunction.fromString(function), fieldName);
}
public Builder(DetectorFunction function, String fieldName) {
this.function = function;
this.fieldName = fieldName;
}
@ -617,7 +518,7 @@ public class Detector implements ToXContentObject, Writeable {
}
public void setFunction(String function) {
this.function = function;
this.function = DetectorFunction.fromString(function);
}
public void setFieldName(String fieldName) {
@ -657,13 +558,10 @@ public class Detector implements ToXContentObject, Writeable {
boolean emptyByField = Strings.isEmpty(byFieldName);
boolean emptyOverField = Strings.isEmpty(overFieldName);
boolean emptyPartitionField = Strings.isEmpty(partitionFieldName);
if (Detector.ANALYSIS_FUNCTIONS.contains(function) == false) {
throw ExceptionsHelper.badRequestException(Messages.getMessage(Messages.JOB_CONFIG_UNKNOWN_FUNCTION, function));
}
if (emptyField && emptyByField && emptyOverField) {
if (!Detector.COUNT_WITHOUT_FIELD_FUNCTIONS.contains(function)) {
throw ExceptionsHelper.badRequestException(Messages.getMessage(Messages.JOB_CONFIG_NO_ANALYSIS_FIELD_NOT_COUNT));
throw ExceptionsHelper.badRequestException(Messages.getMessage(Messages.JOB_CONFIG_ANALYSIS_FIELD_MUST_BE_SET));
}
}
@ -682,11 +580,6 @@ public class Detector implements ToXContentObject, Writeable {
throw ExceptionsHelper.badRequestException(Messages.getMessage(Messages.JOB_CONFIG_FUNCTION_REQUIRES_BYFIELD, function));
}
if (!emptyByField && Detector.NO_BY_FIELD_NAME_FUNCTIONS.contains(function)) {
throw ExceptionsHelper.badRequestException(
Messages.getMessage(Messages.JOB_CONFIG_BYFIELD_INCOMPATIBLE_FUNCTION, function));
}
if (emptyOverField && Detector.OVER_FIELD_NAME_FUNCTIONS.contains(function)) {
throw ExceptionsHelper.badRequestException(Messages.getMessage(Messages.JOB_CONFIG_FUNCTION_REQUIRES_OVERFIELD, function));
}
@ -702,7 +595,7 @@ public class Detector implements ToXContentObject, Writeable {
verifyFieldName(field);
}
String function = this.function == null ? Detector.METRIC : this.function;
DetectorFunction function = this.function == null ? DetectorFunction.METRIC : this.function;
if (rules.isEmpty() == false) {
if (FUNCTIONS_WITHOUT_RULE_SUPPORT.contains(function)) {
String msg = Messages.getMessage(Messages.JOB_CONFIG_DETECTION_RULE_NOT_SUPPORTED_BY_FUNCTION, function);
@ -733,13 +626,13 @@ public class Detector implements ToXContentObject, Writeable {
}
// by/over field names cannot be "count", "over', "by" - this requirement dates back to the early
// days of the Splunk app and could be removed now BUT ONLY IF THE C++ CODE IS CHANGED
// days of the ML code and could be removed now BUT ONLY IF THE C++ CODE IS CHANGED
// FIRST - see https://github.com/elastic/x-pack-elasticsearch/issues/858
if (COUNT.equals(byFieldName)) {
if (DetectorFunction.COUNT.getFullName().equals(byFieldName)) {
throw ExceptionsHelper.badRequestException(Messages.getMessage(Messages.JOB_CONFIG_DETECTOR_COUNT_DISALLOWED,
BY_FIELD_NAME_FIELD.getPreferredName()));
}
if (COUNT.equals(overFieldName)) {
if (DetectorFunction.COUNT.getFullName().equals(overFieldName)) {
throw ExceptionsHelper.badRequestException(Messages.getMessage(Messages.JOB_CONFIG_DETECTOR_COUNT_DISALLOWED,
OVER_FIELD_NAME_FIELD.getPreferredName()));
}

View File

@ -0,0 +1,84 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.core.ml.job.config;
import org.elasticsearch.xpack.core.ml.job.messages.Messages;
import java.util.Arrays;
import java.util.Collections;
import java.util.Locale;
import java.util.Set;
import java.util.stream.Collectors;
public enum DetectorFunction {
COUNT,
LOW_COUNT,
HIGH_COUNT,
NON_ZERO_COUNT("nzc"),
LOW_NON_ZERO_COUNT("low_nzc"),
HIGH_NON_ZERO_COUNT("high_nzc"),
DISTINCT_COUNT("dc"),
LOW_DISTINCT_COUNT("low_dc"),
HIGH_DISTINCT_COUNT("high_dc"),
RARE,
FREQ_RARE,
INFO_CONTENT,
LOW_INFO_CONTENT,
HIGH_INFO_CONTENT,
METRIC,
MEAN,
LOW_MEAN,
HIGH_MEAN,
AVG,
LOW_AVG,
HIGH_AVG,
MEDIAN,
LOW_MEDIAN,
HIGH_MEDIAN,
MIN,
MAX,
SUM,
LOW_SUM,
HIGH_SUM,
NON_NULL_SUM,
LOW_NON_NULL_SUM,
HIGH_NON_NULL_SUM,
VARP,
LOW_VARP,
HIGH_VARP,
TIME_OF_DAY,
TIME_OF_WEEK,
LAT_LONG;
private Set<String> shortcuts;
DetectorFunction() {
shortcuts = Collections.emptySet();
}
DetectorFunction(String... shortcuts) {
this.shortcuts = Arrays.stream(shortcuts).collect(Collectors.toSet());
}
public String getFullName() {
return name().toLowerCase(Locale.ROOT);
}
@Override
public String toString() {
return getFullName();
}
public static DetectorFunction fromString(String op) {
for (DetectorFunction function : values()) {
if (function.getFullName().equals(op) || function.shortcuts.contains(op)) {
return function;
}
}
throw new IllegalArgumentException(Messages.getMessage(Messages.JOB_CONFIG_UNKNOWN_FUNCTION, op));
}
}

View File

@ -69,7 +69,6 @@ public final class Messages {
public static final String JOB_AUDIT_REVERTED = "Job model snapshot reverted to ''{0}''";
public static final String JOB_AUDIT_SNAPSHOT_DELETED = "Model snapshot [{0}] with description ''{1}'' deleted";
public static final String JOB_CONFIG_BYFIELD_INCOMPATIBLE_FUNCTION = "by_field_name cannot be used with function ''{0}''";
public static final String JOB_CONFIG_CATEGORIZATION_FILTERS_CONTAINS_DUPLICATES = "categorization_filters contain duplicates";
public static final String JOB_CONFIG_CATEGORIZATION_FILTERS_CONTAINS_EMPTY =
"categorization_filters are not allowed to contain empty strings";
@ -137,8 +136,8 @@ public final class Messages {
public static final String JOB_CONFIG_MISSING_DATA_DESCRIPTION = "A data_description must be set";
public static final String JOB_CONFIG_MULTIPLE_BUCKETSPANS_MUST_BE_MULTIPLE =
"Multiple bucket_span ''{0}'' must be a multiple of the main bucket_span ''{1}''";
public static final String JOB_CONFIG_NO_ANALYSIS_FIELD_NOT_COUNT =
"Unless the function is 'count' one of field_name, by_field_name or over_field_name must be set";
public static final String JOB_CONFIG_ANALYSIS_FIELD_MUST_BE_SET =
"Unless a count or temporal function is used one of field_name, by_field_name or over_field_name must be set";
public static final String JOB_CONFIG_NO_DETECTORS = "No detectors configured";
public static final String JOB_CONFIG_OVERFIELD_INCOMPATIBLE_FUNCTION =
"over_field_name cannot be used with function ''{0}''";

View File

@ -526,42 +526,6 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
return new AnalysisConfig.Builder(Collections.singletonList(new Detector.Builder("min", "count").build()));
}
public void testVerify_throws() {
// count works with no fields
Detector d = new Detector.Builder("count", null).build();
new AnalysisConfig.Builder(Collections.singletonList(d)).build();
try {
d = new Detector.Builder("distinct_count", null).build();
new AnalysisConfig.Builder(Collections.singletonList(d)).build();
assertTrue(false); // shouldn't get here
} catch (ElasticsearchException e) {
assertEquals("Unless the function is 'count' one of field_name, by_field_name or over_field_name must be set", e.getMessage());
}
// should work now
Detector.Builder builder = new Detector.Builder("distinct_count", "somefield");
builder.setOverFieldName("over_field");
new AnalysisConfig.Builder(Collections.singletonList(builder.build())).build();
builder = new Detector.Builder("info_content", "somefield");
builder.setOverFieldName("over_field");
builder.build();
new AnalysisConfig.Builder(Collections.singletonList(builder.build())).build();
builder.setByFieldName("by_field");
new AnalysisConfig.Builder(Collections.singletonList(builder.build())).build();
try {
builder = new Detector.Builder("made_up_function", "somefield");
builder.setOverFieldName("over_field");
new AnalysisConfig.Builder(Collections.singletonList(builder.build())).build();
assertTrue(false); // shouldn't get here
} catch (ElasticsearchException e) {
assertEquals("Unknown function 'made_up_function'", e.getMessage());
}
}
public void testVerify_GivenNegativeBucketSpan() {
AnalysisConfig.Builder config = createValidConfig();
config.setBucketSpan(TimeValue.timeValueSeconds(-1));
@ -728,7 +692,7 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
AnalysisConfig.Builder ac = new AnalysisConfig.Builder(Collections.singletonList(d));
ac.setSummaryCountFieldName("my_summary_count");
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, ac::build);
assertEquals(Messages.getMessage(Messages.JOB_CONFIG_FUNCTION_INCOMPATIBLE_PRESUMMARIZED, Detector.METRIC), e.getMessage());
assertEquals(Messages.getMessage(Messages.JOB_CONFIG_FUNCTION_INCOMPATIBLE_PRESUMMARIZED, DetectorFunction.METRIC), e.getMessage());
}
public void testMultipleBucketsConfig() {

View File

@ -6,21 +6,23 @@
package org.elasticsearch.xpack.core.ml.job.config;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ElasticsearchStatusException;
import org.elasticsearch.common.io.stream.Writeable.Reader;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.AbstractSerializingTestCase;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.core.ml.job.messages.Messages;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.writer.RecordWriter;
import org.junit.Assert;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import static org.hamcrest.Matchers.equalTo;
public class DetectorTests extends AbstractSerializingTestCase<Detector> {
@ -151,12 +153,12 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
@Override
protected Detector createTestInstance() {
String function;
DetectorFunction function;
Detector.Builder detector;
if (randomBoolean()) {
detector = new Detector.Builder(function = randomFrom(Detector.COUNT_WITHOUT_FIELD_FUNCTIONS), null);
} else {
Set<String> functions = new HashSet<>(Detector.FIELD_NAME_FUNCTIONS);
EnumSet<DetectorFunction> functions = EnumSet.copyOf(Detector.FIELD_NAME_FUNCTIONS);
functions.removeAll(Detector.Builder.FUNCTIONS_WITHOUT_RULE_SUPPORT);
detector = new Detector.Builder(function = randomFrom(functions), randomAlphaOfLengthBetween(1, 20));
}
@ -168,7 +170,7 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
detector.setPartitionFieldName(fieldName = randomAlphaOfLengthBetween(6, 20));
} else if (randomBoolean() && Detector.NO_OVER_FIELD_NAME_FUNCTIONS.contains(function) == false) {
detector.setOverFieldName(fieldName = randomAlphaOfLengthBetween(6, 20));
} else if (randomBoolean() && Detector.NO_BY_FIELD_NAME_FUNCTIONS.contains(function) == false) {
} else if (randomBoolean()) {
detector.setByFieldName(fieldName = randomAlphaOfLengthBetween(6, 20));
}
if (randomBoolean()) {
@ -295,217 +297,169 @@ public class DetectorTests extends AbstractSerializingTestCase<Detector> {
});
}
public void testVerify() throws Exception {
public void testVerify_GivenFunctionOnly() {
// if nothing else is set the count functions (excluding distinct count)
// are the only allowable functions
new Detector.Builder(Detector.COUNT, null).build();
new Detector.Builder(DetectorFunction.COUNT, null).build();
Set<String> difference = new HashSet<String>(Detector.ANALYSIS_FUNCTIONS);
difference.remove(Detector.COUNT);
difference.remove(Detector.HIGH_COUNT);
difference.remove(Detector.LOW_COUNT);
difference.remove(Detector.NON_ZERO_COUNT);
difference.remove(Detector.NZC);
difference.remove(Detector.LOW_NON_ZERO_COUNT);
difference.remove(Detector.LOW_NZC);
difference.remove(Detector.HIGH_NON_ZERO_COUNT);
difference.remove(Detector.HIGH_NZC);
difference.remove(Detector.TIME_OF_DAY);
difference.remove(Detector.TIME_OF_WEEK);
for (String f : difference) {
try {
new Detector.Builder(f, null).build();
Assert.fail("ElasticsearchException not thrown when expected");
} catch (ElasticsearchException e) {
}
EnumSet<DetectorFunction> difference = EnumSet.allOf(DetectorFunction.class);
difference.remove(DetectorFunction.COUNT);
difference.remove(DetectorFunction.HIGH_COUNT);
difference.remove(DetectorFunction.LOW_COUNT);
difference.remove(DetectorFunction.NON_ZERO_COUNT);
difference.remove(DetectorFunction.LOW_NON_ZERO_COUNT);
difference.remove(DetectorFunction.HIGH_NON_ZERO_COUNT);
difference.remove(DetectorFunction.TIME_OF_DAY);
difference.remove(DetectorFunction.TIME_OF_WEEK);
for (DetectorFunction f : difference) {
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class,
() -> new Detector.Builder(f, null).build());
assertThat(e.getMessage(), equalTo("Unless a count or temporal function is used one of field_name," +
" by_field_name or over_field_name must be set"));
}
}
// certain fields aren't allowed with certain functions
// first do the over field
for (String f : new String[]{Detector.NON_ZERO_COUNT, Detector.NZC,
Detector.LOW_NON_ZERO_COUNT, Detector.LOW_NZC, Detector.HIGH_NON_ZERO_COUNT,
Detector.HIGH_NZC}) {
public void testVerify_GivenFunctionsNotSupportingOverField() {
EnumSet<DetectorFunction> noOverFieldFunctions = EnumSet.of(
DetectorFunction.NON_ZERO_COUNT,
DetectorFunction.LOW_NON_ZERO_COUNT,
DetectorFunction.HIGH_NON_ZERO_COUNT
);
for (DetectorFunction f: noOverFieldFunctions) {
Detector.Builder builder = new Detector.Builder(f, null);
builder.setOverFieldName("over_field");
try {
builder.build();
Assert.fail("ElasticsearchException not thrown when expected");
} catch (ElasticsearchException e) {
}
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, () -> builder.build());
assertThat(e.getMessage(), equalTo("over_field_name cannot be used with function '" + f + "'"));
}
}
// these functions cannot have just an over field
difference = new HashSet<>(Detector.ANALYSIS_FUNCTIONS);
difference.remove(Detector.COUNT);
difference.remove(Detector.HIGH_COUNT);
difference.remove(Detector.LOW_COUNT);
difference.remove(Detector.TIME_OF_DAY);
difference.remove(Detector.TIME_OF_WEEK);
for (String f : difference) {
public void testVerify_GivenFunctionsCannotHaveJustOverField() {
EnumSet<DetectorFunction> difference = EnumSet.allOf(DetectorFunction.class);
difference.remove(DetectorFunction.COUNT);
difference.remove(DetectorFunction.LOW_COUNT);
difference.remove(DetectorFunction.HIGH_COUNT);
difference.remove(DetectorFunction.TIME_OF_DAY);
difference.remove(DetectorFunction.TIME_OF_WEEK);
for (DetectorFunction f: difference) {
Detector.Builder builder = new Detector.Builder(f, null);
builder.setOverFieldName("over_field");
try {
builder.build();
Assert.fail("ElasticsearchException not thrown when expected");
} catch (ElasticsearchException e) {
}
expectThrows(ElasticsearchStatusException.class, () -> builder.build());
}
}
// these functions can have just an over field
for (String f : new String[]{Detector.COUNT, Detector.HIGH_COUNT,
Detector.LOW_COUNT}) {
public void testVerify_GivenFunctionsCanHaveJustOverField() {
EnumSet<DetectorFunction> noOverFieldFunctions = EnumSet.of(
DetectorFunction.COUNT,
DetectorFunction.LOW_COUNT,
DetectorFunction.HIGH_COUNT
);
for (DetectorFunction f: noOverFieldFunctions) {
Detector.Builder builder = new Detector.Builder(f, null);
builder.setOverFieldName("over_field");
builder.build();
}
}
for (String f : new String[]{Detector.RARE, Detector.FREQ_RARE}) {
public void testVerify_GivenFunctionsCannotHaveFieldName() {
for (DetectorFunction f : Detector.COUNT_WITHOUT_FIELD_FUNCTIONS) {
Detector.Builder builder = new Detector.Builder(f, "field");
builder.setByFieldName("b");
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, () -> builder.build());
assertThat(e.getMessage(), equalTo("field_name cannot be used with function '" + f + "'"));
}
// Nor rare
{
Detector.Builder builder = new Detector.Builder(DetectorFunction.RARE, "field");
builder.setByFieldName("b");
builder.setOverFieldName("over_field");
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, () -> builder.build());
assertThat(e.getMessage(), equalTo("field_name cannot be used with function 'rare'"));
}
// Nor freq_rare
{
Detector.Builder builder = new Detector.Builder(DetectorFunction.FREQ_RARE, "field");
builder.setByFieldName("b");
builder.setOverFieldName("over_field");
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, () -> builder.build());
assertThat(e.getMessage(), equalTo("field_name cannot be used with function 'freq_rare'"));
}
}
public void testVerify_GivenFunctionsRequiringFieldName() {
// some functions require a fieldname
for (DetectorFunction f : Detector.FIELD_NAME_FUNCTIONS) {
Detector.Builder builder = new Detector.Builder(f, "f");
builder.build();
}
}
public void testVerify_GivenFieldNameFunctionsAndOverField() {
// some functions require a fieldname
for (DetectorFunction f : Detector.FIELD_NAME_FUNCTIONS) {
Detector.Builder builder = new Detector.Builder(f, "f");
builder.setOverFieldName("some_over_field");
builder.build();
}
}
public void testVerify_GivenFieldNameFunctionsAndByField() {
// some functions require a fieldname
for (DetectorFunction f : Detector.FIELD_NAME_FUNCTIONS) {
Detector.Builder builder = new Detector.Builder(f, "f");
builder.setByFieldName("some_by_field");
builder.build();
}
}
public void testVerify_GivenCountFunctionsWithByField() {
// some functions require a fieldname
for (DetectorFunction f : Detector.COUNT_WITHOUT_FIELD_FUNCTIONS) {
Detector.Builder builder = new Detector.Builder(f, null);
builder.setByFieldName("some_by_field");
builder.build();
}
}
public void testVerify_GivenCountFunctionsWithOverField() {
EnumSet<DetectorFunction> functions = EnumSet.copyOf(Detector.COUNT_WITHOUT_FIELD_FUNCTIONS);
functions.removeAll(Detector.NO_OVER_FIELD_NAME_FUNCTIONS);
for (DetectorFunction f : functions) {
Detector.Builder builder = new Detector.Builder(f, null);
builder.setOverFieldName("some_over_field");
builder.build();
}
}
public void testVerify_GivenCountFunctionsWithByAndOverFields() {
EnumSet<DetectorFunction> functions = EnumSet.copyOf(Detector.COUNT_WITHOUT_FIELD_FUNCTIONS);
functions.removeAll(Detector.NO_OVER_FIELD_NAME_FUNCTIONS);
for (DetectorFunction f : functions) {
Detector.Builder builder = new Detector.Builder(f, null);
builder.setByFieldName("some_over_field");
builder.setOverFieldName("some_by_field");
builder.build();
}
}
public void testVerify_GivenRareAndFreqRareWithByAndOverFields() {
for (DetectorFunction f : EnumSet.of(DetectorFunction.RARE, DetectorFunction.FREQ_RARE)) {
Detector.Builder builder = new Detector.Builder(f, null);
builder.setOverFieldName("over_field");
builder.setByFieldName("by_field");
builder.build();
}
}
// some functions require a fieldname
for (String f : new String[]{Detector.DISTINCT_COUNT, Detector.DC,
Detector.HIGH_DISTINCT_COUNT, Detector.HIGH_DC, Detector.LOW_DISTINCT_COUNT, Detector.LOW_DC,
Detector.INFO_CONTENT, Detector.LOW_INFO_CONTENT, Detector.HIGH_INFO_CONTENT,
Detector.METRIC, Detector.MEAN, Detector.HIGH_MEAN, Detector.LOW_MEAN, Detector.AVG,
Detector.HIGH_AVG, Detector.LOW_AVG, Detector.MAX, Detector.MIN, Detector.SUM,
Detector.LOW_SUM, Detector.HIGH_SUM, Detector.NON_NULL_SUM,
Detector.LOW_NON_NULL_SUM, Detector.HIGH_NON_NULL_SUM, Detector.POPULATION_VARIANCE,
Detector.LOW_POPULATION_VARIANCE, Detector.HIGH_POPULATION_VARIANCE}) {
Detector.Builder builder = new Detector.Builder(f, "f");
builder.setOverFieldName("over_field");
builder.build();
}
// these functions cannot have a field name
difference = new HashSet<>(Detector.ANALYSIS_FUNCTIONS);
difference.remove(Detector.METRIC);
difference.remove(Detector.MEAN);
difference.remove(Detector.LOW_MEAN);
difference.remove(Detector.HIGH_MEAN);
difference.remove(Detector.AVG);
difference.remove(Detector.LOW_AVG);
difference.remove(Detector.HIGH_AVG);
difference.remove(Detector.MEDIAN);
difference.remove(Detector.LOW_MEDIAN);
difference.remove(Detector.HIGH_MEDIAN);
difference.remove(Detector.MIN);
difference.remove(Detector.MAX);
difference.remove(Detector.SUM);
difference.remove(Detector.LOW_SUM);
difference.remove(Detector.HIGH_SUM);
difference.remove(Detector.NON_NULL_SUM);
difference.remove(Detector.LOW_NON_NULL_SUM);
difference.remove(Detector.HIGH_NON_NULL_SUM);
difference.remove(Detector.POPULATION_VARIANCE);
difference.remove(Detector.LOW_POPULATION_VARIANCE);
difference.remove(Detector.HIGH_POPULATION_VARIANCE);
difference.remove(Detector.DISTINCT_COUNT);
difference.remove(Detector.HIGH_DISTINCT_COUNT);
difference.remove(Detector.LOW_DISTINCT_COUNT);
difference.remove(Detector.DC);
difference.remove(Detector.LOW_DC);
difference.remove(Detector.HIGH_DC);
difference.remove(Detector.INFO_CONTENT);
difference.remove(Detector.LOW_INFO_CONTENT);
difference.remove(Detector.HIGH_INFO_CONTENT);
difference.remove(Detector.LAT_LONG);
for (String f : difference) {
Detector.Builder builder = new Detector.Builder(f, "f");
builder.setOverFieldName("over_field");
try {
builder.build();
Assert.fail("ElasticsearchException not thrown when expected");
} catch (ElasticsearchException e) {
}
}
// these can have a by field
for (String f : new String[]{Detector.COUNT, Detector.HIGH_COUNT,
Detector.LOW_COUNT, Detector.RARE,
Detector.NON_ZERO_COUNT, Detector.NZC}) {
public void testVerify_GivenFunctionsThatCanHaveByField() {
for (DetectorFunction f : EnumSet.of(DetectorFunction.COUNT, DetectorFunction.HIGH_COUNT, DetectorFunction.LOW_COUNT,
DetectorFunction.RARE, DetectorFunction.NON_ZERO_COUNT, DetectorFunction.LOW_NON_ZERO_COUNT,
DetectorFunction.HIGH_NON_ZERO_COUNT)) {
Detector.Builder builder = new Detector.Builder(f, null);
builder.setByFieldName("b");
builder.build();
}
Detector.Builder builder = new Detector.Builder(Detector.FREQ_RARE, null);
builder.setOverFieldName("over_field");
builder.setByFieldName("b");
builder.build();
builder = new Detector.Builder(Detector.FREQ_RARE, null);
builder.setOverFieldName("over_field");
builder.setByFieldName("b");
builder.build();
// some functions require a fieldname
int testedFunctionsCount = 0;
for (String f : Detector.FIELD_NAME_FUNCTIONS) {
testedFunctionsCount++;
builder = new Detector.Builder(f, "f");
builder.setByFieldName("b");
builder.build();
}
Assert.assertEquals(Detector.FIELD_NAME_FUNCTIONS.size(), testedFunctionsCount);
// these functions don't work with fieldname
testedFunctionsCount = 0;
for (String f : Detector.COUNT_WITHOUT_FIELD_FUNCTIONS) {
testedFunctionsCount++;
try {
builder = new Detector.Builder(f, "field");
builder.setByFieldName("b");
builder.build();
Assert.fail("ElasticsearchException not thrown when expected");
} catch (ElasticsearchException e) {
}
}
Assert.assertEquals(Detector.COUNT_WITHOUT_FIELD_FUNCTIONS.size(), testedFunctionsCount);
builder = new Detector.Builder(Detector.FREQ_RARE, "field");
builder.setByFieldName("b");
builder.setOverFieldName("over_field");
try {
builder.build();
Assert.fail("ElasticsearchException not thrown when expected");
} catch (ElasticsearchException e) {
}
for (String f : new String[]{Detector.HIGH_COUNT,
Detector.LOW_COUNT, Detector.NON_ZERO_COUNT, Detector.NZC}) {
builder = new Detector.Builder(f, null);
builder.setByFieldName("by_field");
builder.build();
}
for (String f : new String[]{Detector.COUNT, Detector.HIGH_COUNT,
Detector.LOW_COUNT}) {
builder = new Detector.Builder(f, null);
builder.setOverFieldName("over_field");
builder.build();
}
for (String f : new String[]{Detector.HIGH_COUNT,
Detector.LOW_COUNT}) {
builder = new Detector.Builder(f, null);
builder.setByFieldName("by_field");
builder.setOverFieldName("over_field");
builder.build();
}
for (String f : new String[]{Detector.NON_ZERO_COUNT, Detector.NZC}) {
try {
builder = new Detector.Builder(f, "field");
builder.setByFieldName("by_field");
builder.setOverFieldName("over_field");
builder.build();
Assert.fail("ElasticsearchException not thrown when expected");
} catch (ElasticsearchException e) {
}
}
}
public void testVerify_GivenInvalidRuleTargetFieldName() {

View File

@ -0,0 +1,28 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.job.config;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.core.ml.job.config.DetectorFunction;
import static org.hamcrest.Matchers.equalTo;
public class DetectorFunctionTests extends ESTestCase {
public void testShortcuts() {
assertThat(DetectorFunction.fromString("nzc").getFullName(), equalTo("non_zero_count"));
assertThat(DetectorFunction.fromString("low_nzc").getFullName(), equalTo("low_non_zero_count"));
assertThat(DetectorFunction.fromString("high_nzc").getFullName(), equalTo("high_non_zero_count"));
assertThat(DetectorFunction.fromString("dc").getFullName(), equalTo("distinct_count"));
assertThat(DetectorFunction.fromString("low_dc").getFullName(), equalTo("low_distinct_count"));
assertThat(DetectorFunction.fromString("high_dc").getFullName(), equalTo("high_distinct_count"));
}
public void testFromString_GivenInvalidFunction() {
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> DetectorFunction.fromString("invalid"));
assertThat(e.getMessage(), equalTo("Unknown function 'invalid'"));
}
}

View File

@ -1254,3 +1254,19 @@
}
]
}
---
"Test function shortcut expansion":
- do:
xpack.ml.put_job:
job_id: jobs-function-shortcut-expansion
body: >
{
"analysis_config" : {
"bucket_span": "1h",
"detectors" :[{"function":"nzc","by_field_name":"airline"}]
},
"data_description" : {}
}
- match: { job_id: "jobs-function-shortcut-expansion" }
- match: { analysis_config.detectors.0.function: "non_zero_count"}

View File

@ -192,3 +192,18 @@
},
"data_description" : {}
}
---
"Test function shortcut expansion":
- do:
xpack.ml.put_job:
job_id: old-cluster-function-shortcut-expansion
body: >
{
"analysis_config" : {
"bucket_span": "1h",
"detectors" :[{"function":"nzc","by_field_name":"airline"}]
},
"data_description" : {}
}
- match: { job_id: "old-cluster-function-shortcut-expansion" }

View File

@ -118,3 +118,12 @@ setup:
}
]
}
---
"Test get job with function shortcut should expand":
- do:
xpack.ml.get_jobs:
job_id: old-cluster-function-shortcut-expansion
- match: { count: 1 }
- match: { jobs.0.analysis_config.detectors.0.function: "non_zero_count" }