[ML] Autodetect should receive events from the earliest valid timestamp (elastic/x-pack-elasticsearch#3570)

When events are searched to be passed to the autodetect process, they
are currently calculated based on the latest record timestamp, when
a job opens, and `now` when the process is updated.

This commit changes both to be consistent and based on the earliest
valid timestamp for the job. The earliest valid timestamp is the
latest record timestamp minus the job latency.

Relates elastic/x-pack-elasticsearch#3016

Original commit: elastic/x-pack-elasticsearch@7f882ea053
This commit is contained in:
Dimitris Athanasiou 2018-01-15 18:07:48 +00:00 committed by GitHub
parent 57d887c9df
commit aff9a4a2ba
7 changed files with 53 additions and 13 deletions

View File

@ -26,6 +26,7 @@ import org.elasticsearch.index.analysis.AnalysisRegistry;
import org.elasticsearch.xpack.ml.MlParserType;
import org.elasticsearch.xpack.ml.job.messages.Messages;
import org.elasticsearch.xpack.ml.job.persistence.AnomalyDetectorsIndexFields;
import org.elasticsearch.xpack.ml.job.process.autodetect.state.DataCounts;
import org.elasticsearch.xpack.ml.utils.ExceptionsHelper;
import org.elasticsearch.xpack.ml.utils.MlStrings;
import org.elasticsearch.xpack.ml.utils.time.TimeUtils;
@ -450,6 +451,23 @@ public class Job extends AbstractDiffable<Job> implements Writeable, ToXContentO
+ PROCESS_MEMORY_OVERHEAD.getBytes();
}
/**
* Returns the timestamp before which data is not accepted by the job.
* This is the latest record timestamp minus the job latency.
* @param dataCounts the job data counts
* @return the timestamp before which data is not accepted by the job
*/
public long earliestValidTimestamp(DataCounts dataCounts) {
long currentTime = 0;
Date latestRecordTimestamp = dataCounts.getLatestRecordTimeStamp();
if (latestRecordTimestamp != null) {
TimeValue latency = analysisConfig.getLatency();
long latencyMillis = latency == null ? 0 : latency.millis();
currentTime = latestRecordTimestamp.getTime() - latencyMillis;
}
return currentTime;
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(jobId);

View File

@ -369,10 +369,7 @@ public class JobProvider {
ActionListener<AutodetectParams.Builder> getScheduledEventsListener = ActionListener.wrap(
paramsBuilder -> {
ScheduledEventsQueryBuilder scheduledEventsQueryBuilder = new ScheduledEventsQueryBuilder();
Date lastestRecordTime = paramsBuilder.getDataCounts().getLatestRecordTimeStamp();
if (lastestRecordTime != null) {
scheduledEventsQueryBuilder.start(Long.toString(lastestRecordTime.getTime()));
}
scheduledEventsQueryBuilder.start(job.earliestValidTimestamp(paramsBuilder.getDataCounts()));
scheduledEventsForJob(jobId, job.getGroups(), scheduledEventsQueryBuilder, ActionListener.wrap(
events -> {
paramsBuilder.setScheduledEvents(events.results());

View File

@ -41,6 +41,11 @@ public class ScheduledEventsQueryBuilder {
return this;
}
public ScheduledEventsQueryBuilder start(long start) {
this.start = Long.toString(start);
return this;
}
public ScheduledEventsQueryBuilder end(String end) {
this.end = end;
return this;

View File

@ -88,8 +88,7 @@ public class TransportPostCalendarEventsAction extends HandledTransportAction<Po
@Override
public void onFailure(Exception e) {
listener.onFailure(
ExceptionsHelper.serverError("Error indexing event", e));
listener.onFailure(ExceptionsHelper.serverError("Error indexing event", e));
}
});
},

View File

@ -28,7 +28,6 @@ import org.elasticsearch.xpack.ml.MLMetadataField;
import org.elasticsearch.xpack.ml.MlMetaIndex;
import org.elasticsearch.xpack.ml.MlMetadata;
import org.elasticsearch.xpack.ml.calendars.Calendar;
import org.elasticsearch.xpack.ml.job.JobManager;
import org.elasticsearch.xpack.ml.utils.ExceptionsHelper;
import java.io.IOException;
@ -43,18 +42,16 @@ public class TransportPutCalendarAction extends HandledTransportAction<PutCalend
private final Client client;
private final ClusterService clusterService;
private final JobManager jobManager;
@Inject
public TransportPutCalendarAction(Settings settings, ThreadPool threadPool,
TransportService transportService, ActionFilters actionFilters,
IndexNameExpressionResolver indexNameExpressionResolver,
Client client, ClusterService clusterService, JobManager jobManager) {
Client client, ClusterService clusterService) {
super(settings, PutCalendarAction.NAME, threadPool, transportService, actionFilters,
indexNameExpressionResolver, PutCalendarAction.Request::new);
this.client = client;
this.clusterService = clusterService;
this.jobManager = jobManager;
}
@Override
@ -81,7 +78,6 @@ public class TransportPutCalendarAction extends HandledTransportAction<PutCalend
new ActionListener<IndexResponse>() {
@Override
public void onResponse(IndexResponse indexResponse) {
jobManager.updateProcessOnCalendarChanged(calendar.getJobIds());
listener.onResponse(new PutCalendarAction.Response(calendar));
}

View File

@ -60,7 +60,6 @@ import java.io.IOException;
import java.io.InputStream;
import java.time.Duration;
import java.time.ZonedDateTime;
import java.util.Collections;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
@ -280,7 +279,8 @@ public class AutodetectProcessManager extends AbstractComponent {
if (updateParams.isUpdateScheduledEvents()) {
Job job = jobManager.getJobOrThrowIfUnknown(jobTask.getJobId());
ScheduledEventsQueryBuilder query = new ScheduledEventsQueryBuilder().start(Long.toString(new Date().getTime()));
DataCounts dataCounts = getStatistics(jobTask).get().v1();
ScheduledEventsQueryBuilder query = new ScheduledEventsQueryBuilder().start(job.earliestValidTimestamp(dataCounts));
jobProvider.scheduledEventsForJob(jobTask.getJobId(), job.getGroups(), query, eventsListener);
} else {
eventsListener.onResponse(null);

View File

@ -22,6 +22,7 @@ import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.MachineLearningClientActionPlugin;
import org.elasticsearch.xpack.ml.job.messages.Messages;
import org.elasticsearch.xpack.ml.job.persistence.AnomalyDetectorsIndexFields;
import org.elasticsearch.xpack.ml.job.process.autodetect.state.DataCounts;
import java.io.IOException;
import java.util.ArrayList;
@ -551,6 +552,30 @@ public class JobTests extends AbstractSerializingTestCase<Job> {
builder.build().estimateMemoryFootprint());
}
public void testEarliestValidTimestamp_GivenEmptyDataCounts() {
assertThat(createRandomizedJob().earliestValidTimestamp(new DataCounts("foo")), equalTo(0L));
}
public void testEarliestValidTimestamp_GivenDataCountsAndZeroLatency() {
Job.Builder builder = buildJobBuilder("foo");
DataCounts dataCounts = new DataCounts(builder.getId());
dataCounts.setLatestRecordTimeStamp(new Date(123456789L));
assertThat(builder.build().earliestValidTimestamp(dataCounts), equalTo(123456789L));
}
public void testEarliestValidTimestamp_GivenDataCountsAndLatency() {
Job.Builder builder = buildJobBuilder("foo");
AnalysisConfig.Builder analysisConfig = new AnalysisConfig.Builder(builder.build().getAnalysisConfig());
analysisConfig.setLatency(TimeValue.timeValueMillis(1000L));
builder.setAnalysisConfig(analysisConfig);
DataCounts dataCounts = new DataCounts(builder.getId());
dataCounts.setLatestRecordTimeStamp(new Date(123456789L));
assertThat(builder.build().earliestValidTimestamp(dataCounts), equalTo(123455789L));
}
public static Job.Builder buildJobBuilder(String id, Date date) {
Job.Builder builder = new Job.Builder(id);
builder.setCreateTime(date);