[ML] Remove multiple_bucket_spans (#32496)

This commit removes the never released multiple_bucket_spans
configuration parameter. This is now replaced with the new
multibucket feature that requires no configuration.
This commit is contained in:
Dimitris Athanasiou 2018-08-02 11:25:56 +01:00 committed by GitHub
parent 15679315e3
commit f30bb0ebf8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 20 additions and 154 deletions

View File

@ -64,7 +64,6 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
private static final ParseField OVERLAPPING_BUCKETS = new ParseField("overlapping_buckets");
private static final ParseField RESULT_FINALIZATION_WINDOW = new ParseField("result_finalization_window");
private static final ParseField MULTIVARIATE_BY_FIELDS = new ParseField("multivariate_by_fields");
private static final ParseField MULTIPLE_BUCKET_SPANS = new ParseField("multiple_bucket_spans");
private static final ParseField USER_PER_PARTITION_NORMALIZATION = new ParseField("use_per_partition_normalization");
public static final String ML_CATEGORY_FIELD = "mlcategory";
@ -99,9 +98,6 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
parser.declareBoolean(Builder::setOverlappingBuckets, OVERLAPPING_BUCKETS);
parser.declareLong(Builder::setResultFinalizationWindow, RESULT_FINALIZATION_WINDOW);
parser.declareBoolean(Builder::setMultivariateByFields, MULTIVARIATE_BY_FIELDS);
parser.declareStringArray((builder, values) -> builder.setMultipleBucketSpans(
values.stream().map(v -> TimeValue.parseTimeValue(v, MULTIPLE_BUCKET_SPANS.getPreferredName()))
.collect(Collectors.toList())), MULTIPLE_BUCKET_SPANS);
parser.declareBoolean(Builder::setUsePerPartitionNormalization, USER_PER_PARTITION_NORMALIZATION);
return parser;
@ -121,13 +117,12 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
private final Boolean overlappingBuckets;
private final Long resultFinalizationWindow;
private final Boolean multivariateByFields;
private final List<TimeValue> multipleBucketSpans;
private final boolean usePerPartitionNormalization;
private AnalysisConfig(TimeValue bucketSpan, String categorizationFieldName, List<String> categorizationFilters,
CategorizationAnalyzerConfig categorizationAnalyzerConfig, TimeValue latency, String summaryCountFieldName,
List<Detector> detectors, List<String> influencers, Boolean overlappingBuckets, Long resultFinalizationWindow,
Boolean multivariateByFields, List<TimeValue> multipleBucketSpans, boolean usePerPartitionNormalization) {
Boolean multivariateByFields, boolean usePerPartitionNormalization) {
this.detectors = detectors;
this.bucketSpan = bucketSpan;
this.latency = latency;
@ -139,7 +134,6 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
this.overlappingBuckets = overlappingBuckets;
this.resultFinalizationWindow = resultFinalizationWindow;
this.multivariateByFields = multivariateByFields;
this.multipleBucketSpans = multipleBucketSpans == null ? null : Collections.unmodifiableList(multipleBucketSpans);
this.usePerPartitionNormalization = usePerPartitionNormalization;
}
@ -159,16 +153,18 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
overlappingBuckets = in.readOptionalBoolean();
resultFinalizationWindow = in.readOptionalLong();
multivariateByFields = in.readOptionalBoolean();
// BWC for removed multiple_bucket_spans
// TODO Remove in 7.0.0
if (in.getVersion().before(Version.V_6_5_0)) {
if (in.readBoolean()) {
final int arraySize = in.readVInt();
final List<TimeValue> spans = new ArrayList<>(arraySize);
for (int i = 0; i < arraySize; i++) {
spans.add(in.readTimeValue());
in.readTimeValue();
}
multipleBucketSpans = Collections.unmodifiableList(spans);
} else {
multipleBucketSpans = null;
}
}
usePerPartitionNormalization = in.readBoolean();
}
@ -192,15 +188,13 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
out.writeOptionalBoolean(overlappingBuckets);
out.writeOptionalLong(resultFinalizationWindow);
out.writeOptionalBoolean(multivariateByFields);
if (multipleBucketSpans != null) {
out.writeBoolean(true);
out.writeVInt(multipleBucketSpans.size());
for (TimeValue span : multipleBucketSpans) {
out.writeTimeValue(span);
}
} else {
// BWC for removed multiple_bucket_spans
// TODO Remove in 7.0.0
if (out.getVersion().before(Version.V_6_5_0)) {
out.writeBoolean(false);
}
out.writeBoolean(usePerPartitionNormalization);
}
@ -305,10 +299,6 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
return multivariateByFields;
}
public List<TimeValue> getMultipleBucketSpans() {
return multipleBucketSpans;
}
public boolean getUsePerPartitionNormalization() {
return usePerPartitionNormalization;
}
@ -413,10 +403,6 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
if (multivariateByFields != null) {
builder.field(MULTIVARIATE_BY_FIELDS.getPreferredName(), multivariateByFields);
}
if (multipleBucketSpans != null) {
builder.field(MULTIPLE_BUCKET_SPANS.getPreferredName(),
multipleBucketSpans.stream().map(TimeValue::getStringRep).collect(Collectors.toList()));
}
if (usePerPartitionNormalization) {
builder.field(USER_PER_PARTITION_NORMALIZATION.getPreferredName(), usePerPartitionNormalization);
}
@ -440,8 +426,7 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
Objects.equals(influencers, that.influencers) &&
Objects.equals(overlappingBuckets, that.overlappingBuckets) &&
Objects.equals(resultFinalizationWindow, that.resultFinalizationWindow) &&
Objects.equals(multivariateByFields, that.multivariateByFields) &&
Objects.equals(multipleBucketSpans, that.multipleBucketSpans);
Objects.equals(multivariateByFields, that.multivariateByFields);
}
@Override
@ -449,7 +434,7 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
return Objects.hash(
bucketSpan, categorizationFieldName, categorizationFilters, categorizationAnalyzerConfig, latency,
summaryCountFieldName, detectors, influencers, overlappingBuckets, resultFinalizationWindow,
multivariateByFields, multipleBucketSpans, usePerPartitionNormalization
multivariateByFields, usePerPartitionNormalization
);
}
@ -468,7 +453,6 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
private Boolean overlappingBuckets;
private Long resultFinalizationWindow;
private Boolean multivariateByFields;
private List<TimeValue> multipleBucketSpans;
private boolean usePerPartitionNormalization = false;
public Builder(List<Detector> detectors) {
@ -488,8 +472,6 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
this.overlappingBuckets = analysisConfig.overlappingBuckets;
this.resultFinalizationWindow = analysisConfig.resultFinalizationWindow;
this.multivariateByFields = analysisConfig.multivariateByFields;
this.multipleBucketSpans = analysisConfig.multipleBucketSpans == null ? null
: new ArrayList<>(analysisConfig.multipleBucketSpans);
this.usePerPartitionNormalization = analysisConfig.usePerPartitionNormalization;
}
@ -553,10 +535,6 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
this.multivariateByFields = multivariateByFields;
}
public void setMultipleBucketSpans(List<TimeValue> multipleBucketSpans) {
this.multipleBucketSpans = multipleBucketSpans;
}
public void setUsePerPartitionNormalization(boolean usePerPartitionNormalization) {
this.usePerPartitionNormalization = usePerPartitionNormalization;
}
@ -588,7 +566,6 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
verifyCategorizationAnalyzer();
verifyCategorizationFilters();
checkFieldIsNotNegativeIfSpecified(RESULT_FINALIZATION_WINDOW.getPreferredName(), resultFinalizationWindow);
verifyMultipleBucketSpans();
verifyNoMetricFunctionsWhenSummaryCountFieldNameIsSet();
@ -603,7 +580,7 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
return new AnalysisConfig(bucketSpan, categorizationFieldName, categorizationFilters, categorizationAnalyzerConfig,
latency, summaryCountFieldName, detectors, influencers, overlappingBuckets,
resultFinalizationWindow, multivariateByFields, multipleBucketSpans, usePerPartitionNormalization);
resultFinalizationWindow, multivariateByFields, usePerPartitionNormalization);
}
private void verifyNoMetricFunctionsWhenSummaryCountFieldNameIsSet() {
@ -727,19 +704,6 @@ public class AnalysisConfig implements ToXContentObject, Writeable {
}
}
private void verifyMultipleBucketSpans() {
if (multipleBucketSpans == null) {
return;
}
for (TimeValue span : multipleBucketSpans) {
if ((span.getSeconds() % bucketSpan.getSeconds() != 0L) || (span.compareTo(bucketSpan) <= 0)) {
throw ExceptionsHelper.badRequestException(
Messages.getMessage(Messages.JOB_CONFIG_MULTIPLE_BUCKETSPANS_MUST_BE_MULTIPLE, span, bucketSpan));
}
}
}
private static void checkDetectorsHavePartitionFields(List<Detector> detectors) {
for (Detector detector : detectors) {
if (!Strings.isNullOrEmpty(detector.getPartitionFieldName())) {

View File

@ -123,8 +123,6 @@ public final class Messages {
public static final String JOB_CONFIG_INVALID_TIMEFORMAT = "Invalid Time format string ''{0}''";
public static final String JOB_CONFIG_MISSING_ANALYSISCONFIG = "An analysis_config must be set";
public static final String JOB_CONFIG_MISSING_DATA_DESCRIPTION = "A data_description must be set";
public static final String JOB_CONFIG_MULTIPLE_BUCKETSPANS_MUST_BE_MULTIPLE =
"Multiple bucket_span ''{0}'' must be a multiple of the main bucket_span ''{1}''";
public static final String JOB_CONFIG_ANALYSIS_FIELD_MUST_BE_SET =
"Unless a count or temporal function is used one of field_name, by_field_name or over_field_name must be set";
public static final String JOB_CONFIG_NO_DETECTORS = "No detectors configured";

View File

@ -87,14 +87,6 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
if (randomBoolean()) {
builder.setLatency(TimeValue.timeValueSeconds(randomIntBetween(1, 1_000_000)));
}
if (randomBoolean()) {
int numBucketSpans = randomIntBetween(0, 10);
List<TimeValue> multipleBucketSpans = new ArrayList<>();
for (int i = 2; i <= numBucketSpans; i++) {
multipleBucketSpans.add(TimeValue.timeValueSeconds(bucketSpan.getSeconds() * i));
}
builder.setMultipleBucketSpans(multipleBucketSpans);
}
if (randomBoolean()) {
builder.setMultivariateByFields(randomBoolean());
}
@ -255,28 +247,6 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
}
}
public void testFieldConfiguration_singleDetector_PreSummarised() {
// Multiple detectors, pre-summarised
AnalysisConfig.Builder builder = createConfigBuilder();
builder.setSummaryCountFieldName("summaryCount");
AnalysisConfig ac = builder.build();
assertTrue(ac.analysisFields().contains("summaryCount"));
assertEquals("summaryCount", ac.getSummaryCountFieldName());
builder = createConfigBuilder();
builder.setBucketSpan(TimeValue.timeValueSeconds(1000));
builder.setMultipleBucketSpans(Arrays.asList(
TimeValue.timeValueSeconds(5000), TimeValue.timeValueSeconds(10000), TimeValue.timeValueSeconds(24000)));
ac = builder.build();
assertTrue(ac.getMultipleBucketSpans().contains(TimeValue.timeValueSeconds(5000)));
assertTrue(ac.getMultipleBucketSpans().contains(TimeValue.timeValueSeconds(10000)));
assertTrue(ac.getMultipleBucketSpans().contains(TimeValue.timeValueSeconds(24000)));
assertEquals(1, ac.getDetectors().size());
assertEquals(0, ac.getDetectors().get(0).getDetectorIndex());
}
public void testBuild_GivenMlCategoryUsedAsByFieldButNoCategorizationFieldName() {
Detector.Builder detector = new Detector.Builder();
detector.setFunction("count");
@ -693,58 +663,6 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
assertEquals(Messages.getMessage(Messages.JOB_CONFIG_FUNCTION_INCOMPATIBLE_PRESUMMARIZED, DetectorFunction.METRIC), e.getMessage());
}
public void testMultipleBucketsConfig() {
AnalysisConfig.Builder ac = createValidConfig();
ac.setMultipleBucketSpans(Arrays.asList(
TimeValue.timeValueSeconds(10L),
TimeValue.timeValueSeconds(15L),
TimeValue.timeValueSeconds(20L),
TimeValue.timeValueSeconds(25L),
TimeValue.timeValueSeconds(30L),
TimeValue.timeValueSeconds(35L)));
List<Detector> detectors = new ArrayList<>();
Detector detector = new Detector.Builder("count", null).build();
detectors.add(detector);
ac.setDetectors(detectors);
ac.setBucketSpan(TimeValue.timeValueSeconds(4L));
ElasticsearchException e = ESTestCase.expectThrows(ElasticsearchException.class, ac::build);
assertEquals(Messages.getMessage(Messages.JOB_CONFIG_MULTIPLE_BUCKETSPANS_MUST_BE_MULTIPLE, "10s", "4s"), e.getMessage());
ac.setBucketSpan(TimeValue.timeValueSeconds(5L));
ac.build();
AnalysisConfig.Builder ac2 = createValidConfig();
ac2.setBucketSpan(TimeValue.timeValueSeconds(5L));
ac2.setDetectors(detectors);
ac2.setMultipleBucketSpans(Arrays.asList(
TimeValue.timeValueSeconds(10L),
TimeValue.timeValueSeconds(15L),
TimeValue.timeValueSeconds(20L),
TimeValue.timeValueSeconds(25L),
TimeValue.timeValueSeconds(30L)));
assertFalse(ac.equals(ac2));
ac2.setMultipleBucketSpans(Arrays.asList(
TimeValue.timeValueSeconds(10L),
TimeValue.timeValueSeconds(15L),
TimeValue.timeValueSeconds(20L),
TimeValue.timeValueSeconds(25L),
TimeValue.timeValueSeconds(30L),
TimeValue.timeValueSeconds(35L)));
ac.setBucketSpan(TimeValue.timeValueSeconds(222L));
ac.setMultipleBucketSpans(Collections.emptyList());
ac.build();
ac.setMultipleBucketSpans(Collections.singletonList(TimeValue.timeValueSeconds(222L)));
e = ESTestCase.expectThrows(ElasticsearchException.class, ac::build);
assertEquals(Messages.getMessage(Messages.JOB_CONFIG_MULTIPLE_BUCKETSPANS_MUST_BE_MULTIPLE, "3.7m", "3.7m"), e.getMessage());
ac.setMultipleBucketSpans(Arrays.asList(TimeValue.timeValueSeconds(-444L), TimeValue.timeValueSeconds(-888L)));
e = ESTestCase.expectThrows(ElasticsearchException.class, ac::build);
assertEquals(Messages.getMessage(Messages.JOB_CONFIG_MULTIPLE_BUCKETSPANS_MUST_BE_MULTIPLE, -444, "3.7m"), e.getMessage());
}
public void testVerify_GivenCategorizationFiltersButNoCategorizationFieldName() {
AnalysisConfig.Builder config = createValidConfig();
config.setCategorizationFilters(Collections.singletonList("foo"));
@ -838,7 +756,7 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
@Override
protected AnalysisConfig mutateInstance(AnalysisConfig instance) {
AnalysisConfig.Builder builder = new AnalysisConfig.Builder(instance);
switch (between(0, 12)) {
switch (between(0, 11)) {
case 0:
List<Detector> detectors = new ArrayList<>(instance.getDetectors());
Detector.Builder detector = new Detector.Builder();
@ -849,7 +767,6 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
break;
case 1:
builder.setBucketSpan(new TimeValue(instance.getBucketSpan().millis() + (between(1, 1000) * 1000)));
builder.setMultipleBucketSpans(Collections.emptyList());
break;
case 2:
if (instance.getLatency() == null) {
@ -939,16 +856,6 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
}
break;
case 11:
List<TimeValue> multipleBucketSpans;
if (instance.getMultipleBucketSpans() == null) {
multipleBucketSpans = new ArrayList<>();
} else {
multipleBucketSpans = new ArrayList<>(instance.getMultipleBucketSpans());
}
multipleBucketSpans.add(new TimeValue(between(2, 10) * instance.getBucketSpan().millis()));
builder.setMultipleBucketSpans(multipleBucketSpans);
break;
case 12:
boolean usePerPartitionNormalization = instance.getUsePerPartitionNormalization() == false;
builder.setUsePerPartitionNormalization(usePerPartitionNormalization);
if (usePerPartitionNormalization) {

View File

@ -74,7 +74,6 @@ public class ProcessCtrl {
static final String LENGTH_ENCODED_INPUT_ARG = "--lengthEncodedInput";
static final String MODEL_CONFIG_ARG = "--modelconfig=";
public static final String QUANTILES_STATE_PATH_ARG = "--quantilesState=";
static final String MULTIPLE_BUCKET_SPANS_ARG = "--multipleBucketspans=";
static final String PER_PARTITION_NORMALIZATION = "--perPartitionNormalization";
/*
@ -155,8 +154,6 @@ public class ProcessCtrl {
addIfNotNull(analysisConfig.getLatency(), LATENCY_ARG, command);
addIfNotNull(analysisConfig.getSummaryCountFieldName(),
SUMMARY_COUNT_FIELD_ARG, command);
addIfNotNull(analysisConfig.getMultipleBucketSpans(),
MULTIPLE_BUCKET_SPANS_ARG, command);
if (Boolean.TRUE.equals(analysisConfig.getOverlappingBuckets())) {
Long window = analysisConfig.getResultFinalizationWindow();
if (window == null) {