[ML] Enforce the stop datafeed aoi to wait until any operation that the datafeed is doing has completed.
Original commit: elastic/x-pack-elasticsearch@e8974191a2
This commit is contained in:
parent
e492b17c10
commit
61ca6d435f
|
@ -49,11 +49,11 @@ import org.elasticsearch.xpack.ml.job.config.JobState;
|
||||||
import org.elasticsearch.xpack.ml.notifications.Auditor;
|
import org.elasticsearch.xpack.ml.notifications.Auditor;
|
||||||
import org.elasticsearch.xpack.ml.utils.DatafeedStateObserver;
|
import org.elasticsearch.xpack.ml.utils.DatafeedStateObserver;
|
||||||
import org.elasticsearch.xpack.ml.utils.ExceptionsHelper;
|
import org.elasticsearch.xpack.ml.utils.ExceptionsHelper;
|
||||||
|
import org.elasticsearch.xpack.persistent.NodePersistentTask;
|
||||||
import org.elasticsearch.xpack.persistent.PersistentActionRegistry;
|
import org.elasticsearch.xpack.persistent.PersistentActionRegistry;
|
||||||
import org.elasticsearch.xpack.persistent.PersistentActionRequest;
|
import org.elasticsearch.xpack.persistent.PersistentActionRequest;
|
||||||
import org.elasticsearch.xpack.persistent.PersistentActionResponse;
|
import org.elasticsearch.xpack.persistent.PersistentActionResponse;
|
||||||
import org.elasticsearch.xpack.persistent.PersistentActionService;
|
import org.elasticsearch.xpack.persistent.PersistentActionService;
|
||||||
import org.elasticsearch.xpack.persistent.NodePersistentTask;
|
|
||||||
import org.elasticsearch.xpack.persistent.PersistentTasks;
|
import org.elasticsearch.xpack.persistent.PersistentTasks;
|
||||||
import org.elasticsearch.xpack.persistent.PersistentTasks.Assignment;
|
import org.elasticsearch.xpack.persistent.PersistentTasks.Assignment;
|
||||||
import org.elasticsearch.xpack.persistent.PersistentTasks.PersistentTask;
|
import org.elasticsearch.xpack.persistent.PersistentTasks.PersistentTask;
|
||||||
|
@ -265,12 +265,15 @@ public class StartDatafeedAction
|
||||||
stop();
|
stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* public for testing */
|
|
||||||
public void stop() {
|
public void stop() {
|
||||||
|
stop(TimeValue.timeValueSeconds(20));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void stop(TimeValue timeout) {
|
||||||
if (holder == null) {
|
if (holder == null) {
|
||||||
throw new IllegalStateException("task cancel ran before datafeed runner assigned the holder");
|
throw new IllegalStateException("task cancel ran before datafeed runner assigned the holder");
|
||||||
}
|
}
|
||||||
holder.stop("cancel", null);
|
holder.stop("cancel", timeout, null);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -99,6 +99,7 @@ public class StopDatafeedAction
|
||||||
public Request(String jobId) {
|
public Request(String jobId) {
|
||||||
this.datafeedId = ExceptionsHelper.requireNonNull(jobId, DatafeedConfig.ID.getPreferredName());
|
this.datafeedId = ExceptionsHelper.requireNonNull(jobId, DatafeedConfig.ID.getPreferredName());
|
||||||
setActions(StartDatafeedAction.NAME);
|
setActions(StartDatafeedAction.NAME);
|
||||||
|
setTimeout(TimeValue.timeValueSeconds(20));
|
||||||
}
|
}
|
||||||
|
|
||||||
Request() {
|
Request() {
|
||||||
|
@ -251,7 +252,7 @@ public class StopDatafeedAction
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void taskOperation(Request request, StartDatafeedAction.DatafeedTask task, ActionListener<Response> listener) {
|
protected void taskOperation(Request request, StartDatafeedAction.DatafeedTask task, ActionListener<Response> listener) {
|
||||||
task.stop();
|
task.stop(request.getTimeout());
|
||||||
listener.onResponse(new Response(true));
|
listener.onResponse(new Response(true));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -41,6 +41,8 @@ import java.time.Duration;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.concurrent.Future;
|
import java.util.concurrent.Future;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.locks.ReentrantLock;
|
||||||
import java.util.function.BiConsumer;
|
import java.util.function.BiConsumer;
|
||||||
import java.util.function.Consumer;
|
import java.util.function.Consumer;
|
||||||
import java.util.function.Supplier;
|
import java.util.function.Supplier;
|
||||||
|
@ -113,7 +115,7 @@ public class DatafeedJobRunner extends AbstractComponent {
|
||||||
holder.future = threadPool.executor(MachineLearning.DATAFEED_RUNNER_THREAD_POOL_NAME).submit(() -> {
|
holder.future = threadPool.executor(MachineLearning.DATAFEED_RUNNER_THREAD_POOL_NAME).submit(() -> {
|
||||||
Long next = null;
|
Long next = null;
|
||||||
try {
|
try {
|
||||||
next = holder.datafeedJob.runLookBack(startTime, endTime);
|
next = holder.executeLoopBack(startTime, endTime);
|
||||||
} catch (DatafeedJob.ExtractionProblemException e) {
|
} catch (DatafeedJob.ExtractionProblemException e) {
|
||||||
if (endTime == null) {
|
if (endTime == null) {
|
||||||
next = e.nextDelayInMsSinceEpoch;
|
next = e.nextDelayInMsSinceEpoch;
|
||||||
|
@ -136,13 +138,13 @@ public class DatafeedJobRunner extends AbstractComponent {
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.error("Failed lookback import for job [" + holder.datafeed.getJobId() + "]", e);
|
logger.error("Failed lookback import for job [" + holder.datafeed.getJobId() + "]", e);
|
||||||
holder.stop("general_lookback_failure", e);
|
holder.stop("general_lookback_failure", TimeValue.timeValueSeconds(20), e);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (next != null) {
|
if (next != null) {
|
||||||
doDatafeedRealtime(next, holder.datafeed.getJobId(), holder);
|
doDatafeedRealtime(next, holder.datafeed.getJobId(), holder);
|
||||||
} else {
|
} else {
|
||||||
holder.stop("no_realtime", null);
|
holder.stop("no_realtime", TimeValue.timeValueSeconds(20), null);
|
||||||
holder.problemTracker.finishReport();
|
holder.problemTracker.finishReport();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -155,7 +157,7 @@ public class DatafeedJobRunner extends AbstractComponent {
|
||||||
holder.future = threadPool.schedule(delay, MachineLearning.DATAFEED_RUNNER_THREAD_POOL_NAME, () -> {
|
holder.future = threadPool.schedule(delay, MachineLearning.DATAFEED_RUNNER_THREAD_POOL_NAME, () -> {
|
||||||
long nextDelayInMsSinceEpoch;
|
long nextDelayInMsSinceEpoch;
|
||||||
try {
|
try {
|
||||||
nextDelayInMsSinceEpoch = holder.datafeedJob.runRealtime();
|
nextDelayInMsSinceEpoch = holder.executeRealTime();
|
||||||
holder.problemTracker.reportNoneEmptyCount();
|
holder.problemTracker.reportNoneEmptyCount();
|
||||||
} catch (DatafeedJob.ExtractionProblemException e) {
|
} catch (DatafeedJob.ExtractionProblemException e) {
|
||||||
nextDelayInMsSinceEpoch = e.nextDelayInMsSinceEpoch;
|
nextDelayInMsSinceEpoch = e.nextDelayInMsSinceEpoch;
|
||||||
|
@ -168,11 +170,13 @@ public class DatafeedJobRunner extends AbstractComponent {
|
||||||
holder.problemTracker.reportEmptyDataCount();
|
holder.problemTracker.reportEmptyDataCount();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.error("Unexpected datafeed failure for job [" + jobId + "] stopping...", e);
|
logger.error("Unexpected datafeed failure for job [" + jobId + "] stopping...", e);
|
||||||
holder.stop("general_realtime_error", e);
|
holder.stop("general_realtime_error", TimeValue.timeValueSeconds(20), e);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
holder.problemTracker.finishReport();
|
holder.problemTracker.finishReport();
|
||||||
|
if (nextDelayInMsSinceEpoch >= 0) {
|
||||||
doDatafeedRealtime(nextDelayInMsSinceEpoch, jobId, holder);
|
doDatafeedRealtime(nextDelayInMsSinceEpoch, jobId, holder);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -241,6 +245,8 @@ public class DatafeedJobRunner extends AbstractComponent {
|
||||||
public class Holder {
|
public class Holder {
|
||||||
|
|
||||||
private final DatafeedConfig datafeed;
|
private final DatafeedConfig datafeed;
|
||||||
|
// To ensure that we wait until loopback / realtime search has completed before we stop the datafeed
|
||||||
|
private final ReentrantLock datafeedJobLock = new ReentrantLock(true);
|
||||||
private final DatafeedJob datafeedJob;
|
private final DatafeedJob datafeedJob;
|
||||||
private final boolean autoCloseJob;
|
private final boolean autoCloseJob;
|
||||||
private final ProblemTracker problemTracker;
|
private final ProblemTracker problemTracker;
|
||||||
|
@ -260,10 +266,12 @@ public class DatafeedJobRunner extends AbstractComponent {
|
||||||
return datafeedJob.isRunning();
|
return datafeedJob.isRunning();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void stop(String source, Exception e) {
|
public void stop(String source, TimeValue timeout, Exception e) {
|
||||||
logger.info("[{}] attempt to stop datafeed [{}] for job [{}]", source, datafeed.getId(), datafeed.getJobId());
|
logger.info("[{}] attempt to stop datafeed [{}] for job [{}]", source, datafeed.getId(), datafeed.getJobId());
|
||||||
if (datafeedJob.stop()) {
|
if (datafeedJob.stop()) {
|
||||||
|
boolean acquired = false;
|
||||||
try {
|
try {
|
||||||
|
acquired = datafeedJobLock.tryLock(timeout.millis(), TimeUnit.MILLISECONDS);
|
||||||
logger.info("[{}] stopping datafeed [{}] for job [{}]...", source, datafeed.getId(), datafeed.getJobId());
|
logger.info("[{}] stopping datafeed [{}] for job [{}]...", source, datafeed.getId(), datafeed.getJobId());
|
||||||
FutureUtils.cancel(future);
|
FutureUtils.cancel(future);
|
||||||
auditor.info(datafeed.getJobId(), Messages.getMessage(Messages.JOB_AUDIT_DATAFEED_STOPPED));
|
auditor.info(datafeed.getJobId(), Messages.getMessage(Messages.JOB_AUDIT_DATAFEED_STOPPED));
|
||||||
|
@ -271,14 +279,45 @@ public class DatafeedJobRunner extends AbstractComponent {
|
||||||
if (autoCloseJob) {
|
if (autoCloseJob) {
|
||||||
closeJob();
|
closeJob();
|
||||||
}
|
}
|
||||||
|
} catch (InterruptedException e1) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
} finally {
|
} finally {
|
||||||
handler.accept(e);
|
handler.accept(e);
|
||||||
|
if (acquired) {
|
||||||
|
datafeedJobLock.unlock();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
logger.info("[{}] datafeed [{}] for job [{}] was already stopped", source, datafeed.getId(), datafeed.getJobId());
|
logger.info("[{}] datafeed [{}] for job [{}] was already stopped", source, datafeed.getId(), datafeed.getJobId());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Long executeLoopBack(long startTime, Long endTime) throws Exception {
|
||||||
|
datafeedJobLock.lock();
|
||||||
|
try {
|
||||||
|
if (isRunning()) {
|
||||||
|
return datafeedJob.runLookBack(startTime, endTime);
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
datafeedJobLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private long executeRealTime() throws Exception {
|
||||||
|
datafeedJobLock.lock();
|
||||||
|
try {
|
||||||
|
if (isRunning()) {
|
||||||
|
return datafeedJob.runRealtime();
|
||||||
|
} else {
|
||||||
|
return -1L;
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
datafeedJobLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void closeJob() {
|
private void closeJob() {
|
||||||
CloseJobAction.Request closeJobRequest = new CloseJobAction.Request(datafeed.getJobId());
|
CloseJobAction.Request closeJobRequest = new CloseJobAction.Request(datafeed.getJobId());
|
||||||
client.execute(CloseJobAction.INSTANCE, closeJobRequest, new ActionListener<CloseJobAction.Response>() {
|
client.execute(CloseJobAction.INSTANCE, closeJobRequest, new ActionListener<CloseJobAction.Response>() {
|
||||||
|
|
Loading…
Reference in New Issue