[ML] Don’t wait on flush ack if results parser has failed (elastic/x-pack-elasticsearch#1540)

Original commit: elastic/x-pack-elasticsearch@f1a82ae315
This commit is contained in:
David Kyle 2017-05-25 10:52:44 +01:00 committed by GitHub
parent 6befa83337
commit ce25e1f4f3
5 changed files with 55 additions and 3 deletions

View File

@ -40,7 +40,6 @@ import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
@ -178,13 +177,14 @@ public class AutodetectCommunicator implements Closeable {
}, handler);
}
private void waitFlushToCompletion(String flushId) {
void waitFlushToCompletion(String flushId) {
LOGGER.debug("[{}] waiting for flush", job.getId());
try {
boolean isFlushComplete = autoDetectResultProcessor.waitForFlushAcknowledgement(flushId, FLUSH_PROCESS_CHECK_FREQUENCY);
while (isFlushComplete == false) {
checkProcessIsAlive();
checkResultsProcessorIsAlive();
isFlushComplete = autoDetectResultProcessor.waitForFlushAcknowledgement(flushId, FLUSH_PROCESS_CHECK_FREQUENCY);
}
} finally {
@ -212,6 +212,15 @@ public class AutodetectCommunicator implements Closeable {
}
}
private void checkResultsProcessorIsAlive() {
if (autoDetectResultProcessor.isFailed()) {
ParameterizedMessage message =
new ParameterizedMessage("[{}] Unexpected death of the result processor", job.getId());
LOGGER.error(message);
throw new ElasticsearchException(message.getFormattedMessage());
}
}
public JobTask getJobTask() {
return jobTask;
}

View File

@ -190,7 +190,8 @@ public class AutodetectProcessManager extends AbstractComponent {
if (communicator == null) {
String message = String.format(Locale.ROOT, "Cannot flush because job [%s] is not open", jobTask.getJobId());
logger.debug(message);
throw ExceptionsHelper.conflictStatusException(message);
handler.accept(ExceptionsHelper.conflictStatusException(message));
return;
}
communicator.flushJob(params, (aVoid, e) -> {

View File

@ -67,6 +67,7 @@ public class AutoDetectResultProcessor {
final Semaphore updateModelSnapshotIdSemaphore = new Semaphore(1);
private final FlushListener flushListener;
private volatile boolean processKilled;
private volatile boolean failed;
/**
* New model size stats are read as the process is running
@ -124,6 +125,8 @@ public class AutoDetectResultProcessor {
LOGGER.info("[{}] {} buckets parsed from autodetect output", jobId, bucketCount);
} catch (Exception e) {
failed = true;
if (processKilled) {
// Don't log the stack trace in this case. Log just enough to hint
// that it would have been better to close jobs before shutting down,
@ -288,6 +291,9 @@ public class AutoDetectResultProcessor {
* @return {@code true} if the flush has completed or the parsing finished; {@code false} if the timeout expired
*/
public boolean waitForFlushAcknowledgement(String flushId, Duration timeout) {
if (failed) {
return false;
}
return flushListener.waitForFlush(flushId, timeout);
}
@ -299,6 +305,14 @@ public class AutoDetectResultProcessor {
renormalizer.waitUntilIdle();
}
/**
* If failed then there was an error parsing the results that cannot be recovered from
* @return true if failed
*/
public boolean isFailed() {
return failed;
}
static class Context {
private final String jobId;

View File

@ -65,6 +65,16 @@ public class AutodetectCommunicatorTests extends ESTestCase {
}
}
public void testWaitForFlushReturnsIfParserFails() throws IOException {
AutodetectProcess process = mockAutodetectProcessWithOutputStream();
when(process.isProcessAlive()).thenReturn(true);
AutoDetectResultProcessor processor = mock(AutoDetectResultProcessor.class);
when(processor.isFailed()).thenReturn(true);
when(processor.waitForFlushAcknowledgement(anyString(), any())).thenReturn(false);
AutodetectCommunicator communicator = createAutodetectCommunicator(process, processor);
expectThrows(ElasticsearchException.class, () -> communicator.waitFlushToCompletion("foo"));
}
public void testFlushJob_throwsIfProcessIsDead() throws IOException {
AutodetectProcess process = mockAutodetectProcessWithOutputStream();
when(process.isProcessAlive()).thenReturn(false);

View File

@ -6,6 +6,7 @@
package org.elasticsearch.xpack.ml.job.process.autodetect.output;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.client.Client;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.ml.action.UpdateJobAction;
@ -25,6 +26,8 @@ import org.elasticsearch.xpack.ml.job.results.ModelPlot;
import org.junit.Before;
import org.mockito.InOrder;
import java.time.Duration;
import java.time.temporal.ChronoUnit;
import java.util.Arrays;
import java.util.Date;
import java.util.Iterator;
@ -327,6 +330,21 @@ public class AutoDetectResultProcessorTests extends ESTestCase {
verify(persister, times(2)).persistModelSizeStats(any());
}
public void testParsingErrorSetsFailed() {
@SuppressWarnings("unchecked")
Iterator<AutodetectResult> iterator = mock(Iterator.class);
when(iterator.hasNext()).thenThrow(new ElasticsearchParseException("this test throws"));
AutodetectProcess process = mock(AutodetectProcess.class);
when(process.readAutodetectResults()).thenReturn(iterator);
assertFalse(processorUnderTest.isFailed());
processorUnderTest.process(process);
assertTrue(processorUnderTest.isFailed());
// Wait for flush should return immediately
assertFalse(processorUnderTest.waitForFlushAcknowledgement("foo", Duration.of(300, ChronoUnit.SECONDS)));
}
public void testKill() throws TimeoutException {
AutodetectResult autodetectResult = mock(AutodetectResult.class);
@SuppressWarnings("unchecked")