[ML] Remove record_count from bucket results (elastic/x-pack-elasticsearch#1568)

relates elastic/x-pack-elasticsearch#1564

Original commit: elastic/x-pack-elasticsearch@0caff1a735
This commit is contained in:
David Roberts 2017-05-26 16:57:40 +01:00 committed by GitHub
parent b284fc3c91
commit cc96580cd6
9 changed files with 31 additions and 76 deletions

View File

@ -106,7 +106,6 @@ score and time constraints:
"anomaly_score": 94.1706,
"bucket_span": 300,
"initial_anomaly_score": 94.1706,
"record_count": 1,
"event_count": 153,
"is_interim": false,
"bucket_influencers": [

View File

@ -96,9 +96,6 @@ A bucket resource has the following properties:
(number) The amount of time, in milliseconds, that it took to analyze the
bucket contents and calculate results.
`record_count`::
(number) The number of anomaly records in this bucket.
`result_type`::
(string) Internal. This value is always set to `bucket`.

View File

@ -179,11 +179,7 @@ public class JobProvider {
throw new RuntimeException(e);
}
}
if (numFields + additionalFieldCount > fieldCountLimit) {
return true;
} else {
return false;
}
return numFields + additionalFieldCount > fieldCountLimit;
}
@SuppressWarnings("unchecked")
@ -421,7 +417,7 @@ public class JobProvider {
if (query.isExpand()) {
Iterator<Bucket> bucketsToExpand = buckets.results().stream()
.filter(bucket -> bucket.getRecordCount() > 0).iterator();
.filter(bucket -> bucket.getBucketInfluencers().size() > 0).iterator();
expandBuckets(jobId, query, buckets, bucketsToExpand, handler, errorHandler, client);
} else {
handler.accept(buckets);

View File

@ -24,7 +24,6 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
@ -74,13 +73,14 @@ public class Bucket extends ToXContentToBytes implements Writeable {
PARSER.declareDouble(Bucket::setAnomalyScore, ANOMALY_SCORE);
PARSER.declareDouble(Bucket::setInitialAnomalyScore, INITIAL_ANOMALY_SCORE);
PARSER.declareBoolean(Bucket::setInterim, Result.IS_INTERIM);
PARSER.declareInt(Bucket::setRecordCount, RECORD_COUNT);
PARSER.declareLong(Bucket::setEventCount, EVENT_COUNT);
PARSER.declareObjectArray(Bucket::setRecords, AnomalyRecord.PARSER, RECORDS);
PARSER.declareObjectArray(Bucket::setBucketInfluencers, BucketInfluencer.PARSER, BUCKET_INFLUENCERS);
PARSER.declareLong(Bucket::setProcessingTimeMs, PROCESSING_TIME_MS);
PARSER.declareObjectArray(Bucket::setPartitionScores, PartitionScore.PARSER, PARTITION_SCORES);
PARSER.declareString((bucket, s) -> {}, Result.RESULT_TYPE);
// For bwc with 5.4
PARSER.declareInt((bucket, recordCount) -> {}, RECORD_COUNT);
}
private final String jobId;
@ -88,7 +88,6 @@ public class Bucket extends ToXContentToBytes implements Writeable {
private final long bucketSpan;
private double anomalyScore;
private double initialAnomalyScore;
private int recordCount;
private List<AnomalyRecord> records = new ArrayList<>();
private long eventCount;
private boolean isInterim;
@ -108,7 +107,6 @@ public class Bucket extends ToXContentToBytes implements Writeable {
this.bucketSpan = other.bucketSpan;
this.anomalyScore = other.anomalyScore;
this.initialAnomalyScore = other.initialAnomalyScore;
this.recordCount = other.recordCount;
this.records = new ArrayList<>(other.records);
this.eventCount = other.eventCount;
this.isInterim = other.isInterim;
@ -123,7 +121,10 @@ public class Bucket extends ToXContentToBytes implements Writeable {
anomalyScore = in.readDouble();
bucketSpan = in.readLong();
initialAnomalyScore = in.readDouble();
recordCount = in.readInt();
// bwc for recordCount
if (in.getVersion().before(Version.V_5_5_0_UNRELEASED)) {
in.readInt();
}
records = in.readList(AnomalyRecord::new);
eventCount = in.readLong();
isInterim = in.readBoolean();
@ -143,7 +144,10 @@ public class Bucket extends ToXContentToBytes implements Writeable {
out.writeDouble(anomalyScore);
out.writeLong(bucketSpan);
out.writeDouble(initialAnomalyScore);
out.writeInt(recordCount);
// bwc for recordCount
if (out.getVersion().before(Version.V_5_5_0_UNRELEASED)) {
out.writeInt(0);
}
out.writeList(records);
out.writeLong(eventCount);
out.writeBoolean(isInterim);
@ -164,7 +168,6 @@ public class Bucket extends ToXContentToBytes implements Writeable {
builder.field(ANOMALY_SCORE.getPreferredName(), anomalyScore);
builder.field(BUCKET_SPAN.getPreferredName(), bucketSpan);
builder.field(INITIAL_ANOMALY_SCORE.getPreferredName(), initialAnomalyScore);
builder.field(RECORD_COUNT.getPreferredName(), recordCount);
if (records.isEmpty() == false) {
builder.field(RECORDS.getPreferredName(), records);
}
@ -223,14 +226,6 @@ public class Bucket extends ToXContentToBytes implements Writeable {
this.initialAnomalyScore = initialAnomalyScore;
}
public int getRecordCount() {
return recordCount;
}
public void setRecordCount(int recordCount) {
this.recordCount = recordCount;
}
/**
* Get all the anomaly records associated with this bucket.
* The records are not part of the bucket document. They will
@ -310,7 +305,7 @@ public class Bucket extends ToXContentToBytes implements Writeable {
@Override
public int hashCode() {
return Objects.hash(jobId, timestamp, eventCount, initialAnomalyScore, anomalyScore, recordCount, records,
return Objects.hash(jobId, timestamp, eventCount, initialAnomalyScore, anomalyScore, records,
isInterim, bucketSpan, bucketInfluencers, partitionScores, processingTimeMs);
}
@ -331,7 +326,6 @@ public class Bucket extends ToXContentToBytes implements Writeable {
return Objects.equals(this.jobId, that.jobId) && Objects.equals(this.timestamp, that.timestamp)
&& (this.eventCount == that.eventCount) && (this.bucketSpan == that.bucketSpan)
&& (this.recordCount == that.recordCount)
&& (this.anomalyScore == that.anomalyScore) && (this.initialAnomalyScore == that.initialAnomalyScore)
&& Objects.equals(this.records, that.records) && Objects.equals(this.isInterim, that.isInterim)
&& Objects.equals(this.bucketInfluencers, that.bucketInfluencers)

View File

@ -65,9 +65,6 @@ public class GetBucketActionResponseTests extends AbstractStreamableTestCase<Get
if (randomBoolean()) {
bucket.setProcessingTimeMs(randomLong());
}
if (randomBoolean()) {
bucket.setRecordCount(randomInt());
}
if (randomBoolean()) {
int size = randomInt(10);
List<AnomalyRecord> records = new ArrayList<>(size);

View File

@ -15,7 +15,7 @@ import org.elasticsearch.xpack.ml.job.config.Job;
import org.elasticsearch.xpack.ml.job.results.Bucket;
import org.junit.After;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -43,7 +43,7 @@ public class UpdateInterimResultsIT extends MlNativeAutodetectIntegTestCase {
public void test() throws Exception {
AnalysisConfig.Builder analysisConfig = new AnalysisConfig.Builder(
Arrays.asList(new Detector.Builder("max", "value").build()));
Collections.singletonList(new Detector.Builder("max", "value").build()));
analysisConfig.setBucketSpan(TimeValue.timeValueSeconds(BUCKET_SPAN_SECONDS));
analysisConfig.setOverlappingBuckets(true);
DataDescription.Builder dataDescription = new DataDescription.Builder();
@ -83,9 +83,7 @@ public class UpdateInterimResultsIT extends MlNativeAutodetectIntegTestCase {
List<Bucket> firstInterimBuckets = getInterimResults(job.getId());
assertThat(firstInterimBuckets.size(), equalTo(2));
assertThat(firstInterimBuckets.get(0).getTimestamp().getTime(), equalTo(1400039000000L));
assertThat(firstInterimBuckets.get(0).getRecordCount(), equalTo(0));
assertThat(firstInterimBuckets.get(1).getTimestamp().getTime(), equalTo(1400040000000L));
assertThat(firstInterimBuckets.get(1).getRecordCount(), equalTo(1));
assertThat(firstInterimBuckets.get(1).getRecords().get(0).getActual().get(0), equalTo(16.0));
});
@ -97,9 +95,7 @@ public class UpdateInterimResultsIT extends MlNativeAutodetectIntegTestCase {
assertBusy(() -> {
List<Bucket> secondInterimBuckets = getInterimResults(job.getId());
assertThat(secondInterimBuckets.get(0).getTimestamp().getTime(), equalTo(1400039000000L));
assertThat(secondInterimBuckets.get(0).getRecordCount(), equalTo(0));
assertThat(secondInterimBuckets.get(1).getTimestamp().getTime(), equalTo(1400040000000L));
assertThat(secondInterimBuckets.get(1).getRecordCount(), equalTo(1));
assertThat(secondInterimBuckets.get(1).getRecords().get(0).getActual().get(0), equalTo(16.0));
});
@ -122,7 +118,7 @@ public class UpdateInterimResultsIT extends MlNativeAutodetectIntegTestCase {
StringBuilder data = new StringBuilder();
for (int i = 0; i < halfBuckets; i++) {
int value = timeToValueMap.getOrDefault(time, randomIntBetween(1, 3));
data.append("{\"time\":" + time + ", \"value\":" + value + "}\n");
data.append("{\"time\":").append(time).append(", \"value\":").append(value).append("}\n");
time += BUCKET_SPAN_SECONDS / 2;
}
return data.toString();
@ -136,6 +132,6 @@ public class UpdateInterimResultsIT extends MlNativeAutodetectIntegTestCase {
assertThat(response.getBuckets().count(), lessThan(1500L));
List<Bucket> buckets = response.getBuckets().results();
assertThat(buckets.size(), greaterThan(0));
return buckets.stream().filter(b -> b.isInterim()).collect(Collectors.toList());
return buckets.stream().filter(Bucket::isInterim).collect(Collectors.toList());
}
}

View File

@ -21,7 +21,7 @@ import org.mockito.ArgumentCaptor;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.List;
@ -41,7 +41,6 @@ public class JobResultsPersisterTests extends ESTestCase {
bucket.setEventCount(57);
bucket.setInitialAnomalyScore(88.8);
bucket.setProcessingTimeMs(8888);
bucket.setRecordCount(1);
BucketInfluencer bi = new BucketInfluencer(JOB_ID, new Date(), 600);
bi.setAnomalyScore(14.15);
@ -53,7 +52,7 @@ public class JobResultsPersisterTests extends ESTestCase {
// We are adding a record but it shouldn't be persisted as part of the bucket
AnomalyRecord record = new AnomalyRecord(JOB_ID, new Date(), 600);
bucket.setRecords(Arrays.asList(record));
bucket.setRecords(Collections.singletonList(record));
JobResultsPersister persister = new JobResultsPersister(Settings.EMPTY, client);
persister.bulkPersisterBuilder(JOB_ID).persistBucket(bucket).executeRequest();
@ -63,7 +62,6 @@ public class JobResultsPersisterTests extends ESTestCase {
String s = ((IndexRequest)bulkRequest.requests().get(0)).source().utf8ToString();
assertTrue(s.matches(".*anomaly_score.:99\\.9.*"));
assertTrue(s.matches(".*initial_anomaly_score.:88\\.8.*"));
assertTrue(s.matches(".*record_count.:1.*"));
assertTrue(s.matches(".*event_count.:57.*"));
assertTrue(s.matches(".*bucket_span.:123456.*"));
assertTrue(s.matches(".*processing_time_ms.:8888.*"));

View File

@ -34,9 +34,9 @@ import static org.mockito.Mockito.when;
public class AutodetectResultsParserTests extends ESTestCase {
private static final double EPSILON = 0.000001;
public static final String METRIC_OUTPUT_SAMPLE = "[{\"bucket\": {\"job_id\":\"foo\",\"timestamp\":1359450000000,"
private static final String METRIC_OUTPUT_SAMPLE = "[{\"bucket\": {\"job_id\":\"foo\",\"timestamp\":1359450000000,"
+ "\"bucket_span\":22, \"records\":[],"
+ "\"anomaly_score\":0,\"record_count\":0,\"event_count\":806,\"bucket_influencers\":["
+ "\"anomaly_score\":0,\"event_count\":806,\"bucket_influencers\":["
+ "{\"timestamp\":1359450000000,\"bucket_span\":22,\"job_id\":\"foo\",\"anomaly_score\":0,"
+ "\"probability\":0.0, \"influencer_field_name\":\"bucket_time\","
+ "\"initial_anomaly_score\":0.0}]}},{\"quantiles\": {\"job_id\":\"foo\", \"quantile_state\":\"[normalizer 1.1, normalizer 2" +
@ -56,7 +56,7 @@ public class AutodetectResultsParserTests extends ESTestCase {
+ "\"probability\":0.0473552,\"by_field_name\":\"airline\",\"by_field_value\":\"SWA\", \"typical\":[152.148],"
+ "\"actual\":[96.6425],\"field_name\":\"responsetime\",\"function\":\"min\",\"partition_field_name\":\"\","
+ "\"partition_field_value\":\"\"}],"
+ "\"initial_anomaly_score\":0.0140005, \"anomaly_score\":20.22688, \"record_count\":4,"
+ "\"initial_anomaly_score\":0.0140005, \"anomaly_score\":20.22688,"
+ "\"event_count\":820,\"bucket_influencers\":[{\"timestamp\":1359453600000,\"bucket_span\":22,"
+ "\"job_id\":\"foo\", \"raw_anomaly_score\":0.0140005, \"probability\":0.01,\"influencer_field_name\":\"bucket_time\","
+ "\"initial_anomaly_score\":20.22688,\"anomaly_score\":20.22688} ,{\"timestamp\":1359453600000,\"bucket_span\":22,"
@ -66,7 +66,7 @@ public class AutodetectResultsParserTests extends ESTestCase {
+ "\"quantile_state\":\"[normalizer 1.2, normalizer 2.2]\"}} ,{\"flush\": {\"id\":\"testing1\"}} ,"
+ "{\"quantiles\": {\"job_id\":\"foo\",\"timestamp\":1359453600000,\"quantile_state\":\"[normalizer 1.3, normalizer 2.3]\"}} ]";
public static final String POPULATION_OUTPUT_SAMPLE = "[{\"timestamp\":1379590200,\"records\":[{\"probability\":1.38951e-08,"
private static final String POPULATION_OUTPUT_SAMPLE = "[{\"timestamp\":1379590200,\"records\":[{\"probability\":1.38951e-08,"
+ "\"field_name\":\"sum_cs_bytes_\",\"over_field_name\":\"cs_host\",\"over_field_value\":\"mail.google.com\","
+ "\"function\":\"max\","
+ "\"causes\":[{\"probability\":1.38951e-08,\"field_name\":\"sum_cs_bytes_\",\"over_field_name\":\"cs_host\","
@ -84,7 +84,7 @@ public class AutodetectResultsParserTests extends ESTestCase {
+ "\"probability\":0.0152333,\"field_name\":\"sum_cs_bytes_\",\"over_field_name\":\"cs_host\","
+ "\"over_field_value\":\"emea.salesforce.com\",\"function\":\"max\",\"typical\":[101534],\"actual\":[5.36373e+06]}],"
+ "\"record_score\":0.303996,\"anomaly_score\":44.7324}],\"raw_anomaly_score\":1.30397,\"anomaly_score\":44.7324,"
+ "\"record_count\":4,\"event_count\":1235}" + ",{\"flush\":\"testing2\"}"
+ "\"event_count\":1235}" + ",{\"flush\":\"testing2\"}"
+ ",{\"timestamp\":1379590800,\"records\":[{\"probability\":1.9008e-08,\"field_name\":\"sum_cs_bytes_\","
+ "\"over_field_name\":\"cs_host\",\"over_field_value\":\"mail.google.com\",\"function\":\"max\",\"causes\":[{"
+ "\"probability\":1.9008e-08,\"field_name\":\"sum_cs_bytes_\",\"over_field_name\":\"cs_host\","
@ -233,7 +233,7 @@ public class AutodetectResultsParserTests extends ESTestCase {
+ "\"field_name\":\"sum_cs_bytes_\",\"over_field_name\":\"cs_host\",\"over_field_value\":\"googleads.g.doubleclick.net\","
+ "\"function\":\"max\",\"typical\":[31356],\"actual\":[210926]}],\"record_score\":0.00237509,"
+ "\"anomaly_score\":1.19192}],\"raw_anomaly_score\":1.26918,\"anomaly_score\":1.19192,"
+ "\"record_count\":34,\"event_count\":1159}" + "]";
+ "\"event_count\":1159}" + "]";
public void testParser() throws IOException {
InputStream inputStream = new ByteArrayInputStream(METRIC_OUTPUT_SAMPLE.getBytes(StandardCharsets.UTF_8));
@ -246,7 +246,6 @@ public class AutodetectResultsParserTests extends ESTestCase {
assertEquals(2, buckets.size());
assertEquals(new Date(1359450000000L), buckets.get(0).getTimestamp());
assertEquals(0, buckets.get(0).getRecordCount());
assertEquals(buckets.get(0).getEventCount(), 806);
@ -258,7 +257,6 @@ public class AutodetectResultsParserTests extends ESTestCase {
assertEquals("bucket_time", bucketInfluencers.get(0).getInfluencerFieldName());
assertEquals(new Date(1359453600000L), buckets.get(1).getTimestamp());
assertEquals(4, buckets.get(1).getRecordCount());
assertEquals(buckets.get(1).getEventCount(), 820);
bucketInfluencers = buckets.get(1).getBucketInfluencers();
@ -341,7 +339,6 @@ public class AutodetectResultsParserTests extends ESTestCase {
assertEquals(2, buckets.size());
assertEquals(new Date(1379590200000L), buckets.get(0).getTimestamp());
assertEquals(4, buckets.get(0).getRecordCount());
assertEquals(buckets.get(0).getEventCount(), 1235);
Bucket firstBucket = buckets.get(0);
@ -353,7 +350,6 @@ public class AutodetectResultsParserTests extends ESTestCase {
assertNotNull(firstBucket.getRecords().get(0).getCauses());
assertEquals(new Date(1379590800000L), buckets.get(1).getTimestamp());
assertEquals(34, buckets.get(1).getRecordCount());
assertEquals(buckets.get(1).getEventCount(), 1159);
}

View File

@ -62,9 +62,6 @@ public class BucketTests extends AbstractSerializingTestCase<Bucket> {
if (randomBoolean()) {
bucket.setProcessingTimeMs(randomLong());
}
if (randomBoolean()) {
bucket.setRecordCount(randomInt());
}
if (randomBoolean()) {
int size = randomInt(10);
List<AnomalyRecord> records = new ArrayList<>(size);
@ -126,19 +123,9 @@ public class BucketTests extends AbstractSerializingTestCase<Bucket> {
assertFalse(bucket2.equals(bucket1));
}
public void testEquals_GivenDifferentRecordCount() {
Bucket bucket1 = new Bucket("foo", new Date(123), 123);
bucket1.setRecordCount(300);
Bucket bucket2 = new Bucket("foo", new Date(123), 123);
bucket2.setRecordCount(400);
assertFalse(bucket1.equals(bucket2));
assertFalse(bucket2.equals(bucket1));
}
public void testEquals_GivenOneHasRecordsAndTheOtherDoesNot() {
Bucket bucket1 = new Bucket("foo", new Date(123), 123);
bucket1.setRecords(Arrays.asList(new AnomalyRecord("foo", new Date(123), 123)));
bucket1.setRecords(Collections.singletonList(new AnomalyRecord("foo", new Date(123), 123)));
Bucket bucket2 = new Bucket("foo", new Date(123), 123);
bucket2.setRecords(Collections.emptyList());
@ -148,7 +135,7 @@ public class BucketTests extends AbstractSerializingTestCase<Bucket> {
public void testEquals_GivenDifferentNumberOfRecords() {
Bucket bucket1 = new Bucket("foo", new Date(123), 123);
bucket1.setRecords(Arrays.asList(new AnomalyRecord("foo", new Date(123), 123)));
bucket1.setRecords(Collections.singletonList(new AnomalyRecord("foo", new Date(123), 123)));
Bucket bucket2 = new Bucket("foo", new Date(123), 123);
bucket2.setRecords(Arrays.asList(new AnomalyRecord("foo", new Date(123), 123),
new AnomalyRecord("foo", new Date(123), 123)));
@ -164,9 +151,9 @@ public class BucketTests extends AbstractSerializingTestCase<Bucket> {
anomalyRecord1.setRecordScore(2.0);
Bucket bucket1 = new Bucket("foo", new Date(123), 123);
bucket1.setRecords(Arrays.asList(anomalyRecord1));
bucket1.setRecords(Collections.singletonList(anomalyRecord1));
Bucket bucket2 = new Bucket("foo", new Date(123), 123);
bucket2.setRecords(Arrays.asList(anomalyRecord2));
bucket2.setRecords(Collections.singletonList(anomalyRecord2));
assertFalse(bucket1.equals(bucket2));
assertFalse(bucket2.equals(bucket1));
@ -207,8 +194,7 @@ public class BucketTests extends AbstractSerializingTestCase<Bucket> {
bucket1.setInitialAnomalyScore(92.0);
bucket1.setEventCount(134);
bucket1.setInterim(true);
bucket1.setRecordCount(4);
bucket1.setRecords(Arrays.asList(record));
bucket1.setRecords(Collections.singletonList(record));
bucket1.addBucketInfluencer(bucketInfluencer);
Bucket bucket2 = new Bucket("foo", date, 123);
@ -216,8 +202,7 @@ public class BucketTests extends AbstractSerializingTestCase<Bucket> {
bucket2.setInitialAnomalyScore(92.0);
bucket2.setEventCount(134);
bucket2.setInterim(true);
bucket2.setRecordCount(4);
bucket2.setRecords(Arrays.asList(record));
bucket2.setRecords(Collections.singletonList(record));
bucket2.addBucketInfluencer(bucketInfluencer);
assertTrue(bucket1.equals(bucket2));
@ -229,7 +214,6 @@ public class BucketTests extends AbstractSerializingTestCase<Bucket> {
Bucket bucket = new Bucket("foo", new Date(123), 123);
bucket.addBucketInfluencer(new BucketInfluencer("foo", new Date(123), 123));
bucket.setAnomalyScore(0.0);
bucket.setRecordCount(0);
assertFalse(bucket.isNormalizable());
}
@ -247,7 +231,6 @@ public class BucketTests extends AbstractSerializingTestCase<Bucket> {
Bucket bucket = new Bucket("foo", new Date(123), 123);
bucket.addBucketInfluencer(new BucketInfluencer("foo", new Date(123), 123));
bucket.setAnomalyScore(1.0);
bucket.setRecordCount(0);
assertTrue(bucket.isNormalizable());
}
@ -256,7 +239,6 @@ public class BucketTests extends AbstractSerializingTestCase<Bucket> {
Bucket bucket = new Bucket("foo", new Date(123), 123);
bucket.addBucketInfluencer(new BucketInfluencer("foo", new Date(123), 123));
bucket.setAnomalyScore(1.0);
bucket.setRecordCount(1);
assertTrue(bucket.isNormalizable());
}