[ML] Delete interim results after job re-opening (elastic/x-pack-elasticsearch#1194)
This is an issue where a bucket can have both interim results and non-interim results, a bucket should never have both at the same time. The steps to cause this situation are: 1. Flush a running job and create interim results 2. Close that job (this does not delete interim results) 3. Re-open the job and POST data 4. The job will eventually emit a bucket result which mingles with the existing interim results Normally interim results are deleted by AutoDetectResultProcessor when a bucket is parsed following a flush command. Because of the close and re-opening of the job AutoDetectResultProcessor no longer knows that a previous flush command creating interim results. The fix is to always delete interim results the first time AutoDetectResultProcessor sees a bucket. relates elastic/x-pack-elasticsearch#1188 Original commit: elastic/x-pack-elasticsearch@5326455f54
This commit is contained in:
parent
019b1f7ece
commit
32128894a5
|
@ -14,6 +14,7 @@ import org.elasticsearch.action.bulk.BulkResponse;
|
||||||
import org.elasticsearch.action.delete.DeleteAction;
|
import org.elasticsearch.action.delete.DeleteAction;
|
||||||
import org.elasticsearch.action.delete.DeleteRequestBuilder;
|
import org.elasticsearch.action.delete.DeleteRequestBuilder;
|
||||||
import org.elasticsearch.action.search.SearchResponse;
|
import org.elasticsearch.action.search.SearchResponse;
|
||||||
|
import org.elasticsearch.action.support.IndicesOptions;
|
||||||
import org.elasticsearch.action.support.WriteRequest;
|
import org.elasticsearch.action.support.WriteRequest;
|
||||||
import org.elasticsearch.client.Client;
|
import org.elasticsearch.client.Client;
|
||||||
import org.elasticsearch.common.logging.Loggers;
|
import org.elasticsearch.common.logging.Loggers;
|
||||||
|
@ -134,6 +135,7 @@ public class JobDataDeleter {
|
||||||
QueryBuilder qb = QueryBuilders.termQuery(Bucket.IS_INTERIM.getPreferredName(), true);
|
QueryBuilder qb = QueryBuilders.termQuery(Bucket.IS_INTERIM.getPreferredName(), true);
|
||||||
|
|
||||||
SearchResponse searchResponse = client.prepareSearch(index)
|
SearchResponse searchResponse = client.prepareSearch(index)
|
||||||
|
.setIndicesOptions(IndicesOptions.lenientExpandOpen())
|
||||||
.setTypes(Result.TYPE.getPreferredName())
|
.setTypes(Result.TYPE.getPreferredName())
|
||||||
.setQuery(new ConstantScoreQueryBuilder(qb))
|
.setQuery(new ConstantScoreQueryBuilder(qb))
|
||||||
.setFetchSource(false)
|
.setFetchSource(false)
|
||||||
|
@ -141,7 +143,6 @@ public class JobDataDeleter {
|
||||||
.setSize(SCROLL_SIZE)
|
.setSize(SCROLL_SIZE)
|
||||||
.get();
|
.get();
|
||||||
|
|
||||||
String scrollId = searchResponse.getScrollId();
|
|
||||||
long totalHits = searchResponse.getHits().getTotalHits();
|
long totalHits = searchResponse.getHits().getTotalHits();
|
||||||
long totalDeletedCount = 0;
|
long totalDeletedCount = 0;
|
||||||
while (totalDeletedCount < totalHits) {
|
while (totalDeletedCount < totalHits) {
|
||||||
|
@ -152,7 +153,17 @@ public class JobDataDeleter {
|
||||||
++deletedResultCount;
|
++deletedResultCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
searchResponse = client.prepareSearchScroll(scrollId).setScroll(SCROLL_CONTEXT_DURATION).get();
|
searchResponse = client.prepareSearchScroll(searchResponse.getScrollId()).setScroll(SCROLL_CONTEXT_DURATION).get();
|
||||||
|
}
|
||||||
|
|
||||||
|
clearScroll(searchResponse.getScrollId());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void clearScroll(String scrollId) {
|
||||||
|
try {
|
||||||
|
client.prepareClearScroll().addScrollId(scrollId).get();
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOGGER.warn("[{}] Error while clearing scroll with id [{}]", jobId, scrollId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -226,6 +237,7 @@ public class JobDataDeleter {
|
||||||
client.prepareSearchScroll(searchResponse.getScrollId()).setScroll(SCROLL_CONTEXT_DURATION).execute(this);
|
client.prepareSearchScroll(searchResponse.getScrollId()).setScroll(SCROLL_CONTEXT_DURATION).execute(this);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
clearScroll(searchResponse.getScrollId());
|
||||||
scrollFinishedListener.onResponse(true);
|
scrollFinishedListener.onResponse(true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -271,7 +271,7 @@ public class AutoDetectResultProcessor {
|
||||||
Context(String jobId, boolean isPerPartitionNormalization, JobResultsPersister.Builder bulkResultsPersister) {
|
Context(String jobId, boolean isPerPartitionNormalization, JobResultsPersister.Builder bulkResultsPersister) {
|
||||||
this.jobId = jobId;
|
this.jobId = jobId;
|
||||||
this.isPerPartitionNormalization = isPerPartitionNormalization;
|
this.isPerPartitionNormalization = isPerPartitionNormalization;
|
||||||
this.deleteInterimRequired = false;
|
this.deleteInterimRequired = true;
|
||||||
this.bulkResultsPersister = bulkResultsPersister;
|
this.bulkResultsPersister = bulkResultsPersister;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,122 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License;
|
||||||
|
* you may not use this file except in compliance with the Elastic License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.xpack.ml.integration;
|
||||||
|
|
||||||
|
import org.elasticsearch.action.search.SearchResponse;
|
||||||
|
import org.elasticsearch.common.unit.TimeValue;
|
||||||
|
import org.elasticsearch.common.xcontent.json.JsonXContent;
|
||||||
|
import org.elasticsearch.index.query.QueryBuilders;
|
||||||
|
import org.elasticsearch.xpack.ml.job.config.AnalysisConfig;
|
||||||
|
import org.elasticsearch.xpack.ml.job.config.DataDescription;
|
||||||
|
import org.elasticsearch.xpack.ml.job.config.Detector;
|
||||||
|
import org.elasticsearch.xpack.ml.job.config.Job;
|
||||||
|
import org.elasticsearch.xpack.ml.job.persistence.AnomalyDetectorsIndex;
|
||||||
|
import org.elasticsearch.xpack.ml.job.results.AnomalyRecord;
|
||||||
|
import org.junit.After;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
import static org.hamcrest.Matchers.is;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This tests that interim results created before a job was reopened get
|
||||||
|
* deleted after new buckets are created.
|
||||||
|
*/
|
||||||
|
public class InterimResultsDeletedAfterReopeningJobIT extends MlNativeAutodetectIntegTestCase {
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void cleanUpTest() throws Exception {
|
||||||
|
cleanUp();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test() throws Exception {
|
||||||
|
Detector.Builder detector = new Detector.Builder("mean", "value");
|
||||||
|
detector.setByFieldName("by_field");
|
||||||
|
|
||||||
|
AnalysisConfig.Builder analysisConfig = new AnalysisConfig.Builder(
|
||||||
|
Arrays.asList(detector.build()));
|
||||||
|
analysisConfig.setBucketSpan(TimeValue.timeValueHours(1));
|
||||||
|
DataDescription.Builder dataDescription = new DataDescription.Builder();
|
||||||
|
Job.Builder job = new Job.Builder("interim-results-deleted-after-reopening-job-test");
|
||||||
|
job.setAnalysisConfig(analysisConfig);
|
||||||
|
job.setDataDescription(dataDescription);
|
||||||
|
|
||||||
|
registerJob(job);
|
||||||
|
putJob(job);
|
||||||
|
openJob(job.getId());
|
||||||
|
|
||||||
|
long timestamp = 1491004800000L;
|
||||||
|
int totalBuckets = 2 * 24;
|
||||||
|
List<String> byFieldValues = Arrays.asList("foo", "bar");
|
||||||
|
int normalValue = 1000;
|
||||||
|
List<String> data = new ArrayList<>();
|
||||||
|
for (int bucket = 0; bucket < totalBuckets; bucket++) {
|
||||||
|
for (String byFieldValue : byFieldValues) {
|
||||||
|
data.add(createJsonRecord(createRecord(timestamp, byFieldValue, normalValue)));
|
||||||
|
}
|
||||||
|
timestamp += TimeValue.timeValueHours(1).getMillis();
|
||||||
|
}
|
||||||
|
|
||||||
|
data.add(createJsonRecord(createRecord(timestamp, "foo", 1)));
|
||||||
|
data.add(createJsonRecord(createRecord(timestamp, "bar", 1)));
|
||||||
|
postData(job.getId(), data.stream().collect(Collectors.joining()));
|
||||||
|
flushJob(job.getId(), true);
|
||||||
|
closeJob(job.getId());
|
||||||
|
|
||||||
|
// We should have 2 interim records
|
||||||
|
List<AnomalyRecord> records = getRecords(job.getId());
|
||||||
|
assertThat(records.size(), equalTo(2));
|
||||||
|
assertThat(records.stream().allMatch(AnomalyRecord::isInterim), is(true));
|
||||||
|
|
||||||
|
// Second batch
|
||||||
|
data = new ArrayList<>();
|
||||||
|
|
||||||
|
// This should fix the mean for 'foo'
|
||||||
|
data.add(createJsonRecord(createRecord(timestamp, "foo", 2000)));
|
||||||
|
|
||||||
|
// Then advance time and send normal data to force creating final results for previous bucket
|
||||||
|
timestamp += TimeValue.timeValueHours(1).getMillis();
|
||||||
|
data.add(createJsonRecord(createRecord(timestamp, "foo", normalValue)));
|
||||||
|
data.add(createJsonRecord(createRecord(timestamp, "bar", normalValue)));
|
||||||
|
|
||||||
|
openJob(job.getId());
|
||||||
|
postData(job.getId(), data.stream().collect(Collectors.joining()));
|
||||||
|
closeJob(job.getId());
|
||||||
|
|
||||||
|
records = getRecords(job.getId());
|
||||||
|
assertThat(records.size(), equalTo(1));
|
||||||
|
assertThat(records.stream().allMatch(AnomalyRecord::isInterim), is(false));
|
||||||
|
|
||||||
|
// No other interim results either
|
||||||
|
assertNoInterimResults(job.getId());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Map<String, Object> createRecord(long timestamp, String byFieldValue, int value) {
|
||||||
|
Map<String, Object> record = new HashMap<>();
|
||||||
|
record.put("time", timestamp);
|
||||||
|
record.put("by_field", byFieldValue);
|
||||||
|
record.put("value", value);
|
||||||
|
return record;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String createJsonRecord(Map<String, Object> keyValueMap) throws IOException {
|
||||||
|
return JsonXContent.contentBuilder().map(keyValueMap).string() + "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertNoInterimResults(String jobId) {
|
||||||
|
String indexName = AnomalyDetectorsIndex.jobResultsAliasedName(jobId);
|
||||||
|
SearchResponse search = client().prepareSearch(indexName).setTypes("result").setSize(1000)
|
||||||
|
.setQuery(QueryBuilders.termQuery("is_interim", true)).get();
|
||||||
|
assertThat(search.getHits().getTotalHits(), equalTo(0L));
|
||||||
|
}
|
||||||
|
}
|
|
@ -24,7 +24,6 @@ import java.util.Collections;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import static org.elasticsearch.mock.orig.Mockito.never;
|
|
||||||
import static org.elasticsearch.mock.orig.Mockito.times;
|
import static org.elasticsearch.mock.orig.Mockito.times;
|
||||||
import static org.elasticsearch.mock.orig.Mockito.verify;
|
import static org.elasticsearch.mock.orig.Mockito.verify;
|
||||||
import static org.mockito.Matchers.any;
|
import static org.mockito.Matchers.any;
|
||||||
|
|
Loading…
Reference in New Issue