[ML] Remove batch_span and period from analysis config (elastic/x-pack-elasticsearch#1043)

relates elastic/x-pack-elasticsearch#1040

Original commit: elastic/x-pack-elasticsearch@aed5e9912c
This commit is contained in:
Dimitris Athanasiou 2017-04-11 12:57:58 +01:00 committed by GitHub
parent 7ef9a16f45
commit 4da1c5b9dc
6 changed files with 9 additions and 133 deletions

View File

@ -39,7 +39,7 @@ import java.util.stream.Collectors;
* <p>
* The configuration can contain multiple detectors, a new anomaly detector will
* be created for each detector configuration. The fields
* <code>bucketSpan, batchSpan, summaryCountFieldName and categorizationFieldName</code>
* <code>bucketSpan, summaryCountFieldName and categorizationFieldName</code>
* apply to all detectors.
* <p>
* If a value has not been set it will be <code>null</code>
@ -52,11 +52,9 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
*/
private static final ParseField ANALYSIS_CONFIG = new ParseField("analysis_config");
private static final ParseField BUCKET_SPAN = new ParseField("bucket_span");
private static final ParseField BATCH_SPAN = new ParseField("batch_span");
private static final ParseField CATEGORIZATION_FIELD_NAME = new ParseField("categorization_field_name");
public static final ParseField CATEGORIZATION_FILTERS = new ParseField("categorization_filters");
private static final ParseField LATENCY = new ParseField("latency");
private static final ParseField PERIOD = new ParseField("period");
private static final ParseField SUMMARY_COUNT_FIELD_NAME = new ParseField("summary_count_field_name");
private static final ParseField DETECTORS = new ParseField("detectors");
private static final ParseField INFLUENCERS = new ParseField("influencers");
@ -79,13 +77,10 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
PARSER.declareObjectArray(ConstructingObjectParser.constructorArg(), (p, c) -> Detector.PARSER.apply(p, c).build(), DETECTORS);
PARSER.declareString((builder, val) ->
builder.setBucketSpan(TimeValue.parseTimeValue(val, BUCKET_SPAN.getPreferredName())), BUCKET_SPAN);
PARSER.declareString((builder, val) ->
builder.setBatchSpan(TimeValue.parseTimeValue(val, BATCH_SPAN.getPreferredName())), BATCH_SPAN);
PARSER.declareString(Builder::setCategorizationFieldName, CATEGORIZATION_FIELD_NAME);
PARSER.declareStringArray(Builder::setCategorizationFilters, CATEGORIZATION_FILTERS);
PARSER.declareString((builder, val) ->
builder.setLatency(TimeValue.parseTimeValue(val, LATENCY.getPreferredName())), LATENCY);
PARSER.declareLong(Builder::setPeriod, PERIOD);
PARSER.declareString(Builder::setSummaryCountFieldName, SUMMARY_COUNT_FIELD_NAME);
PARSER.declareStringArray(Builder::setInfluencers, INFLUENCERS);
PARSER.declareBoolean(Builder::setOverlappingBuckets, OVERLAPPING_BUCKETS);
@ -101,11 +96,9 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
* These values apply to all detectors
*/
private final TimeValue bucketSpan;
private final TimeValue batchSpan;
private final String categorizationFieldName;
private final List<String> categorizationFilters;
private final TimeValue latency;
private final Long period;
private final String summaryCountFieldName;
private final List<Detector> detectors;
private final List<String> influencers;
@ -115,15 +108,13 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
private final List<TimeValue> multipleBucketSpans;
private final boolean usePerPartitionNormalization;
private AnalysisConfig(TimeValue bucketSpan, TimeValue batchSpan, String categorizationFieldName, List<String> categorizationFilters,
TimeValue latency, Long period, String summaryCountFieldName, List<Detector> detectors,
private AnalysisConfig(TimeValue bucketSpan, String categorizationFieldName, List<String> categorizationFilters,
TimeValue latency, String summaryCountFieldName, List<Detector> detectors,
List<String> influencers, Boolean overlappingBuckets, Long resultFinalizationWindow,
Boolean multivariateByFields, List<TimeValue> multipleBucketSpans, boolean usePerPartitionNormalization) {
this.detectors = detectors;
this.bucketSpan = bucketSpan;
this.batchSpan = batchSpan;
this.latency = latency;
this.period = period;
this.categorizationFieldName = categorizationFieldName;
this.categorizationFilters = categorizationFilters;
this.summaryCountFieldName = summaryCountFieldName;
@ -137,11 +128,9 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
public AnalysisConfig(StreamInput in) throws IOException {
bucketSpan = new TimeValue(in);
batchSpan = in.readOptionalWriteable(TimeValue::new);
categorizationFieldName = in.readOptionalString();
categorizationFilters = in.readBoolean() ? in.readList(StreamInput::readString) : null;
latency = in.readOptionalWriteable(TimeValue::new);
period = in.readOptionalLong();
summaryCountFieldName = in.readOptionalString();
detectors = in.readList(Detector::new);
influencers = in.readList(StreamInput::readString);
@ -155,7 +144,6 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
@Override
public void writeTo(StreamOutput out) throws IOException {
bucketSpan.writeTo(out);
out.writeOptionalWriteable(batchSpan);
out.writeOptionalString(categorizationFieldName);
if (categorizationFilters != null) {
out.writeBoolean(true);
@ -164,7 +152,6 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
out.writeBoolean(false);
}
out.writeOptionalWriteable(latency);
out.writeOptionalLong(period);
out.writeOptionalString(summaryCountFieldName);
out.writeList(detectors);
out.writeStringList(influencers);
@ -189,15 +176,6 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
return bucketSpan;
}
/**
* Interval into which to batch seasonal data
*
* @return The batchspan or <code>null</code> if not set
*/
public TimeValue getBatchSpan() {
return batchSpan;
}
public String getCategorizationFieldName() {
return categorizationFieldName;
}
@ -215,16 +193,6 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
return latency;
}
/**
* The repeat interval for periodic data in multiples of
* {@linkplain #getBatchSpan()}
*
* @return The period or <code>null</code> if not set
*/
public Long getPeriod() {
return period;
}
/**
* The name of the field that contains counts for pre-summarised input
*
@ -367,9 +335,6 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(BUCKET_SPAN.getPreferredName(), bucketSpan.getStringRep());
if (batchSpan != null) {
builder.field(BATCH_SPAN.getPreferredName(), batchSpan.getStringRep());
}
if (categorizationFieldName != null) {
builder.field(CATEGORIZATION_FIELD_NAME.getPreferredName(), categorizationFieldName);
}
@ -379,9 +344,6 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
if (latency != null) {
builder.field(LATENCY.getPreferredName(), latency.getStringRep());
}
if (period != null) {
builder.field(PERIOD.getPreferredName(), period);
}
if (summaryCountFieldName != null) {
builder.field(SUMMARY_COUNT_FIELD_NAME.getPreferredName(), summaryCountFieldName);
}
@ -415,10 +377,8 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
return Objects.equals(latency, that.latency) &&
usePerPartitionNormalization == that.usePerPartitionNormalization &&
Objects.equals(bucketSpan, that.bucketSpan) &&
Objects.equals(batchSpan, that.batchSpan) &&
Objects.equals(categorizationFieldName, that.categorizationFieldName) &&
Objects.equals(categorizationFilters, that.categorizationFilters) &&
Objects.equals(period, that.period) &&
Objects.equals(summaryCountFieldName, that.summaryCountFieldName) &&
Objects.equals(detectors, that.detectors) &&
Objects.equals(influencers, that.influencers) &&
@ -431,7 +391,7 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
@Override
public int hashCode() {
return Objects.hash(
bucketSpan, batchSpan, categorizationFieldName, categorizationFilters, latency, period,
bucketSpan, categorizationFieldName, categorizationFilters, latency,
summaryCountFieldName, detectors, influencers, overlappingBuckets, resultFinalizationWindow,
multivariateByFields, multipleBucketSpans, usePerPartitionNormalization
);
@ -443,9 +403,7 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
private List<Detector> detectors;
private TimeValue bucketSpan = DEFAULT_BUCKET_SPAN;
private TimeValue batchSpan;
private TimeValue latency;
private Long period;
private String categorizationFieldName;
private List<String> categorizationFilters;
private String summaryCountFieldName;
@ -463,9 +421,7 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
public Builder(AnalysisConfig analysisConfig) {
this.detectors = analysisConfig.detectors;
this.bucketSpan = analysisConfig.bucketSpan;
this.batchSpan = analysisConfig.batchSpan;
this.latency = analysisConfig.latency;
this.period = analysisConfig.period;
this.categorizationFieldName = analysisConfig.categorizationFieldName;
this.categorizationFilters = analysisConfig.categorizationFilters;
this.summaryCountFieldName = analysisConfig.summaryCountFieldName;
@ -485,18 +441,10 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
this.bucketSpan = bucketSpan;
}
public void setBatchSpan(TimeValue batchSpan) {
this.batchSpan = batchSpan;
}
public void setLatency(TimeValue latency) {
this.latency = latency;
}
public void setPeriod(long period) {
this.period = period;
}
public void setCategorizationFieldName(String categorizationFieldName) {
this.categorizationFieldName = categorizationFieldName;
}
@ -536,8 +484,7 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
/**
* Checks the configuration is valid
* <ol>
* <li>Check that if non-null BucketSpan, BatchSpan, Latency and Period are
* &gt;= 0</li>
* <li>Check that if non-null BucketSpan and Latency are &gt;= 0</li>
* <li>Check that if non-null Latency is &lt;= MAX_LATENCY</li>
* <li>Check there is at least one detector configured</li>
* <li>Check all the detectors are configured correctly</li>
@ -549,13 +496,9 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
*/
public AnalysisConfig build() {
TimeUtils.checkPositiveMultiple(bucketSpan, TimeUnit.SECONDS, BUCKET_SPAN);
if (batchSpan != null) {
TimeUtils.checkPositiveMultiple(batchSpan, TimeUnit.SECONDS, BATCH_SPAN);
}
if (latency != null) {
TimeUtils.checkNonNegativeMultiple(latency, TimeUnit.SECONDS, LATENCY);
}
checkFieldIsNotNegativeIfSpecified(PERIOD.getPreferredName(), period);
verifyDetectorAreDefined();
verifyFieldName(summaryCountFieldName);
@ -573,8 +516,8 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
checkNoInfluencersAreSet(influencers);
}
return new AnalysisConfig(bucketSpan, batchSpan, categorizationFieldName, categorizationFilters,
latency, period, summaryCountFieldName, detectors, influencers, overlappingBuckets,
return new AnalysisConfig(bucketSpan, categorizationFieldName, categorizationFilters,
latency, summaryCountFieldName, detectors, influencers, overlappingBuckets,
resultFinalizationWindow, multivariateByFields, multipleBucketSpans, usePerPartitionNormalization);
}
@ -696,7 +639,7 @@ public class AnalysisConfig extends ToXContentToBytes implements Writeable {
return true;
}
}
return field.chars().anyMatch(ch -> Character.isISOControl(ch));
return field.chars().anyMatch(Character::isISOControl);
}
private static boolean isValidRegex(String exp) {

View File

@ -88,11 +88,9 @@ public class ProcessCtrl {
/*
* Arguments used by autodetect
*/
static final String BATCH_SPAN_ARG = "--batchspan=";
static final String LATENCY_ARG = "--latency=";
static final String RESULT_FINALIZATION_WINDOW_ARG = "--resultFinalizationWindow=";
static final String MULTIVARIATE_BY_FIELDS_ARG = "--multivariateByFields";
static final String PERIOD_ARG = "--period=";
static final String PERSIST_INTERVAL_ARG = "--persistInterval=";
static final String MAX_QUANTILE_INTERVAL_ARG = "--maxQuantileInterval=";
static final String SUMMARY_COUNT_FIELD_ARG = "--summarycountfield=";
@ -166,9 +164,7 @@ public class ProcessCtrl {
AnalysisConfig analysisConfig = job.getAnalysisConfig();
if (analysisConfig != null) {
addIfNotNull(analysisConfig.getBucketSpan(), BUCKET_SPAN_ARG, command);
addIfNotNull(analysisConfig.getBatchSpan(), BATCH_SPAN_ARG, command);
addIfNotNull(analysisConfig.getLatency(), LATENCY_ARG, command);
addIfNotNull(analysisConfig.getPeriod(), PERIOD_ARG, command);
addIfNotNull(analysisConfig.getSummaryCountFieldName(),
SUMMARY_COUNT_FIELD_ARG, command);
addIfNotNull(analysisConfig.getMultipleBucketSpans(),

View File

@ -53,7 +53,6 @@ public class DatafeedJobValidatorTests extends ESTestCase {
public void testVerify_GivenNoLatency() {
Job.Builder builder = buildJobBuilder("foo");
AnalysisConfig.Builder ac = createAnalysisConfig();
ac.setBatchSpan(TimeValue.timeValueSeconds(1800));
ac.setBucketSpan(TimeValue.timeValueSeconds(100));
builder.setAnalysisConfig(ac);
Job job = builder.build(new Date());

View File

@ -40,9 +40,6 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
}
AnalysisConfig.Builder builder = new AnalysisConfig.Builder(detectors);
if (randomBoolean()) {
builder.setBatchSpan(TimeValue.timeValueSeconds(randomIntBetween(1, 1_000_000)));
}
TimeValue bucketSpan = AnalysisConfig.Builder.DEFAULT_BUCKET_SPAN;
if (randomBoolean()) {
bucketSpan = TimeValue.timeValueSeconds(randomIntBetween(1, 1_000_000));
@ -313,19 +310,6 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
assertEquals(config1.hashCode(), config2.hashCode());
}
public void testEquals_GivenDifferentBatchSpan() {
AnalysisConfig.Builder builder = createConfigBuilder();
builder.setBatchSpan(TimeValue.timeValueHours(3));
AnalysisConfig config1 = builder.build();
builder = createConfigBuilder();
builder.setBatchSpan(TimeValue.timeValueHours(4));
AnalysisConfig config2 = builder.build();
assertFalse(config1.equals(config2));
assertFalse(config2.equals(config1));
}
public void testEquals_GivenDifferentBucketSpan() {
AnalysisConfig.Builder builder = createConfigBuilder();
builder.setBucketSpan(TimeValue.timeValueSeconds(1800));
@ -387,19 +371,6 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
assertFalse(config2.equals(config1));
}
public void testEquals_GivenDifferentPeriod() {
AnalysisConfig.Builder builder = createConfigBuilder();
builder.setPeriod(1800L);
AnalysisConfig config1 = builder.build();
builder = createConfigBuilder();
builder.setPeriod(3600L);
AnalysisConfig config2 = builder.build();
assertFalse(config1.equals(config2));
assertFalse(config2.equals(config1));
}
public void testEquals_GivenSummaryCountField() {
AnalysisConfig.Builder builder = createConfigBuilder();
builder.setSummaryCountFieldName("foo");
@ -461,12 +432,10 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
AnalysisConfig.Builder builder = new AnalysisConfig.Builder(
Collections.singletonList(detector.build()));
builder.setBucketSpan(TimeValue.timeValueHours(1));
builder.setBatchSpan(TimeValue.timeValueHours(24));
builder.setCategorizationFieldName("cat");
builder.setCategorizationFilters(Arrays.asList("foo"));
builder.setInfluencers(Arrays.asList("myInfluencer"));
builder.setLatency(TimeValue.timeValueSeconds(3600));
builder.setPeriod(100L);
builder.setSummaryCountFieldName("sumCount");
return builder.build();
}
@ -524,15 +493,6 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
assertEquals("bucket_span cannot be less or equal than 0. Value = -1", e.getMessage());
}
public void testVerify_GivenNegativeBatchSpan() {
AnalysisConfig.Builder analysisConfig = createValidConfig();
analysisConfig.setBatchSpan(TimeValue.timeValueSeconds(-1));
IllegalArgumentException e = ESTestCase.expectThrows(IllegalArgumentException.class, () -> analysisConfig.build());
assertEquals("batch_span cannot be less or equal than 0. Value = -1", e.getMessage());
}
public void testVerify_GivenNegativeLatency() {
AnalysisConfig.Builder analysisConfig = createValidConfig();
analysisConfig.setLatency(TimeValue.timeValueSeconds(-1));
@ -542,16 +502,6 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
assertEquals("latency cannot be less than 0. Value = -1", e.getMessage());
}
public void testVerify_GivenNegativePeriod() {
AnalysisConfig.Builder analysisConfig = createValidConfig();
analysisConfig.setPeriod(-1L);
IllegalArgumentException e = ESTestCase.expectThrows(IllegalArgumentException.class, () -> analysisConfig.build());
assertEquals(Messages.getMessage(Messages.JOB_CONFIG_FIELD_VALUE_TOO_LOW, "period", 0, -1), e.getMessage());
}
public void testVerify_GivenDefaultConfig_ShouldBeInvalidDueToNoDetectors() {
AnalysisConfig.Builder analysisConfig = createValidConfig();
analysisConfig.setDetectors(null);
@ -585,7 +535,6 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
// Test overlappingBuckets unset
AnalysisConfig.Builder analysisConfig = createValidConfig();
analysisConfig.setBucketSpan(TimeValue.timeValueSeconds(5000L));
analysisConfig.setBatchSpan(TimeValue.ZERO);
detectors = new ArrayList<>();
detector = new Detector.Builder("count", null).build();
detectors.add(detector);
@ -598,7 +547,6 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
// Test overlappingBuckets unset
analysisConfig = createValidConfig();
analysisConfig.setBucketSpan(TimeValue.timeValueSeconds(5000L));
analysisConfig.setBatchSpan(TimeValue.ZERO);
detectors = new ArrayList<>();
detector = new Detector.Builder("count", null).build();
detectors.add(detector);
@ -611,7 +559,6 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
// Test overlappingBuckets unset
analysisConfig = createValidConfig();
analysisConfig.setBucketSpan(TimeValue.timeValueSeconds(5000L));
analysisConfig.setBatchSpan(TimeValue.ZERO);
detectors = new ArrayList<>();
detector = new Detector.Builder("count", null).build();
detectors.add(detector);
@ -793,9 +740,7 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
detectors.add(detector);
AnalysisConfig.Builder analysisConfig = new AnalysisConfig.Builder(detectors);
analysisConfig.setBucketSpan(TimeValue.timeValueHours(1));
analysisConfig.setBatchSpan(TimeValue.timeValueHours(2));
analysisConfig.setLatency(TimeValue.ZERO);
analysisConfig.setPeriod(0L);
return analysisConfig;
}
@ -804,9 +749,7 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
detector.setByFieldName("mlcategory");
AnalysisConfig.Builder analysisConfig = new AnalysisConfig.Builder(Collections.singletonList(detector.build()));
analysisConfig.setBucketSpan(TimeValue.timeValueHours(1));
analysisConfig.setBatchSpan(TimeValue.timeValueHours(2));
analysisConfig.setLatency(TimeValue.ZERO);
analysisConfig.setPeriod(0L);
analysisConfig.setCategorizationFieldName("msg");
return analysisConfig;
}

View File

@ -36,10 +36,8 @@ public class ProcessCtrlTests extends ESTestCase {
Detector.Builder detectorBuilder = new Detector.Builder("metric", "value");
detectorBuilder.setPartitionFieldName("foo");
AnalysisConfig.Builder acBuilder = new AnalysisConfig.Builder(Collections.singletonList(detectorBuilder.build()));
acBuilder.setBatchSpan(TimeValue.timeValueSeconds(100));
acBuilder.setBucketSpan(TimeValue.timeValueSeconds(120));
acBuilder.setLatency(TimeValue.timeValueSeconds(360));
acBuilder.setPeriod(20L);
acBuilder.setSummaryCountFieldName("summaryField");
acBuilder.setOverlappingBuckets(true);
acBuilder.setMultivariateByFields(true);
@ -53,12 +51,10 @@ public class ProcessCtrlTests extends ESTestCase {
job.setDataDescription(dd);
List<String> command = ProcessCtrl.buildAutodetectCommand(env, settings, job.build(), logger, true, pid);
assertEquals(17, command.size());
assertEquals(15, command.size());
assertTrue(command.contains(ProcessCtrl.AUTODETECT_PATH));
assertTrue(command.contains(ProcessCtrl.BATCH_SPAN_ARG + "100"));
assertTrue(command.contains(ProcessCtrl.BUCKET_SPAN_ARG + "120"));
assertTrue(command.contains(ProcessCtrl.LATENCY_ARG + "360"));
assertTrue(command.contains(ProcessCtrl.PERIOD_ARG + "20"));
assertTrue(command.contains(ProcessCtrl.SUMMARY_COUNT_FIELD_ARG + "summaryField"));
assertTrue(command.contains(ProcessCtrl.RESULT_FINALIZATION_WINDOW_ARG + "2"));
assertTrue(command.contains(ProcessCtrl.MULTIVARIATE_BY_FIELDS_ARG));

View File

@ -37,7 +37,6 @@ public class MLTransportClientIT extends ESXPackSmokeClientTestCase {
detectors.add(detector.build());
AnalysisConfig.Builder analysisConfig = new AnalysisConfig.Builder(detectors);
analysisConfig.setBatchSpan(TimeValue.timeValueMinutes(5));
job.setAnalysisConfig(analysisConfig);
PutJobAction.Response putJobResponse = mlClient