[ML][Data Frame] adjusting change detection workflow (#45511) (#45580)

* [ML][Data Frame] adjusting change detection workflow

* adjusting for PR comment

* disallowing null as an argument value
This commit is contained in:
Benjamin Trent 2019-08-14 17:26:24 -05:00 committed by GitHub
parent 647a8308c3
commit fde5dae387
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 99 additions and 76 deletions

View File

@ -154,7 +154,12 @@ public abstract class AsyncTwoPhaseIndexer<JobPosition, JobStats extends Indexer
// fire off the search. Note this is async, the method will return from here // fire off the search. Note this is async, the method will return from here
executor.execute(() -> { executor.execute(() -> {
onStart(now, ActionListener.wrap(r -> { onStart(now, ActionListener.wrap(r -> {
nextSearch(ActionListener.wrap(this::onSearchResponse, this::finishWithSearchFailure)); assert r != null;
if (r) {
nextSearch(ActionListener.wrap(this::onSearchResponse, this::finishWithSearchFailure));
} else {
finishAndSetState();
}
}, e -> { }, e -> {
finishAndSetState(); finishAndSetState();
onFailure(e); onFailure(e);
@ -200,9 +205,11 @@ public abstract class AsyncTwoPhaseIndexer<JobPosition, JobStats extends Indexer
* internal state is {@link IndexerState#STARTED}. * internal state is {@link IndexerState#STARTED}.
* *
* @param now The current time in milliseconds passed through from {@link #maybeTriggerAsyncJob(long)} * @param now The current time in milliseconds passed through from {@link #maybeTriggerAsyncJob(long)}
* @param listener listener to call after done * @param listener listener to call after done. The argument passed to the listener indicates if the indexer should continue or not
* true: continue execution as normal
* false: cease execution. This does NOT call onFinish
*/ */
protected abstract void onStart(long now, ActionListener<Void> listener); protected abstract void onStart(long now, ActionListener<Boolean> listener);
/** /**
* Executes the {@link SearchRequest} and calls <code>nextPhase</code> with the * Executes the {@link SearchRequest} and calls <code>nextPhase</code> with the

View File

@ -88,10 +88,10 @@ public class AsyncTwoPhaseIndexerTests extends ESTestCase {
} }
@Override @Override
protected void onStart(long now, ActionListener<Void> listener) { protected void onStart(long now, ActionListener<Boolean> listener) {
assertThat(step, equalTo(0)); assertThat(step, equalTo(0));
++step; ++step;
listener.onResponse(null); listener.onResponse(true);
} }
@Override @Override
@ -186,9 +186,9 @@ public class AsyncTwoPhaseIndexerTests extends ESTestCase {
} }
@Override @Override
protected void onStart(long now, ActionListener<Void> listener) { protected void onStart(long now, ActionListener<Boolean> listener) {
started = true; started = true;
listener.onResponse(null); listener.onResponse(true);
} }
@Override @Override
@ -270,10 +270,10 @@ public class AsyncTwoPhaseIndexerTests extends ESTestCase {
} }
@Override @Override
protected void onStart(long now, ActionListener<Void> listener) { protected void onStart(long now, ActionListener<Boolean> listener) {
assertThat(step, equalTo(0)); assertThat(step, equalTo(0));
++step; ++step;
listener.onResponse(null); listener.onResponse(true);
} }
@Override @Override

View File

@ -147,7 +147,7 @@ public abstract class DataFrameIndexer extends AsyncTwoPhaseIndexer<DataFrameInd
protected abstract void createCheckpoint(ActionListener<DataFrameTransformCheckpoint> listener); protected abstract void createCheckpoint(ActionListener<DataFrameTransformCheckpoint> listener);
@Override @Override
protected void onStart(long now, ActionListener<Void> listener) { protected void onStart(long now, ActionListener<Boolean> listener) {
try { try {
pivot = new Pivot(getConfig().getPivotConfig()); pivot = new Pivot(getConfig().getPivotConfig());
@ -157,7 +157,7 @@ public abstract class DataFrameIndexer extends AsyncTwoPhaseIndexer<DataFrameInd
} }
runState = determineRunStateAtStart(); runState = determineRunStateAtStart();
listener.onResponse(null); listener.onResponse(true);
} catch (Exception e) { } catch (Exception e) {
listener.onFailure(e); listener.onFailure(e);
} }
@ -525,5 +525,5 @@ public abstract class DataFrameIndexer extends AsyncTwoPhaseIndexer<DataFrameInd
return null; return null;
} }
protected abstract boolean sourceHasChanged(); protected abstract void sourceHasChanged(ActionListener<Boolean> hasChangedListener);
} }

View File

@ -12,7 +12,6 @@ import org.apache.lucene.util.SetOnce;
import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.ResourceNotFoundException;
import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.LatchedActionListener;
import org.elasticsearch.action.bulk.BulkAction; import org.elasticsearch.action.bulk.BulkAction;
import org.elasticsearch.action.bulk.BulkItemResponse; import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkRequest; import org.elasticsearch.action.bulk.BulkRequest;
@ -57,8 +56,6 @@ import org.elasticsearch.xpack.dataframe.transforms.pivot.AggregationResultUtils
import java.time.Instant; import java.time.Instant;
import java.util.Arrays; import java.util.Arrays;
import java.util.Map; import java.util.Map;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.atomic.AtomicReference;
@ -324,15 +321,27 @@ public class DataFrameTransformTask extends AllocatedPersistentTask implements S
return; return;
} }
logger.debug("Data frame indexer [{}] schedule has triggered, state: [{}]", event.getJobName(), getIndexer().getState()); if (taskState.get() == DataFrameTransformTaskState.FAILED) {
logger.debug("Schedule was triggered for transform [{}] but task is failed. Ignoring trigger.", getTransformId());
return;
}
// ignore trigger if indexer is running or completely stopped
IndexerState indexerState = getIndexer().getState();
if (IndexerState.INDEXING.equals(indexerState) ||
IndexerState.STOPPING.equals(indexerState) ||
IndexerState.STOPPED.equals(indexerState)) {
logger.debug("Indexer for transform [{}] has state [{}], ignoring trigger", getTransformId(), indexerState);
return;
}
logger.debug("Data frame indexer [{}] schedule has triggered, state: [{}]", event.getJobName(), indexerState);
// if it runs for the 1st time we just do it, if not we check for changes // if it runs for the 1st time we just do it, if not we check for changes
if (currentCheckpoint.get() == 0 ) { if (currentCheckpoint.get() == 0) {
logger.debug("Trigger initial run"); logger.debug("Trigger initial run");
getIndexer().maybeTriggerAsyncJob(System.currentTimeMillis()); getIndexer().maybeTriggerAsyncJob(System.currentTimeMillis());
} else if (getIndexer().isContinuous() && getIndexer().sourceHasChanged()) { } else if (getIndexer().isContinuous()) {
changesLastDetectedAt = Instant.now();
logger.debug("Source has changed, triggering new indexer run");
getIndexer().maybeTriggerAsyncJob(System.currentTimeMillis()); getIndexer().maybeTriggerAsyncJob(System.currentTimeMillis());
} }
} }
@ -620,7 +629,7 @@ public class DataFrameTransformTask extends AllocatedPersistentTask implements S
} }
@Override @Override
protected void onStart(long now, ActionListener<Void> listener) { protected void onStart(long now, ActionListener<Boolean> listener) {
// On each run, we need to get the total number of docs and reset the count of processed docs // On each run, we need to get the total number of docs and reset the count of processed docs
// Since multiple checkpoints can be executed in the task while it is running on the same node, we need to gather // Since multiple checkpoints can be executed in the task while it is running on the same node, we need to gather
// the progress here, and not in the executor. // the progress here, and not in the executor.
@ -657,30 +666,55 @@ public class DataFrameTransformTask extends AllocatedPersistentTask implements S
); );
// If we are continuous, we will want to verify we have the latest stored configuration // If we are continuous, we will want to verify we have the latest stored configuration
if (isContinuous()) { ActionListener<Void> changedSourceListener = ActionListener.wrap(
transformsConfigManager.getTransformConfiguration(getJobId(), ActionListener.wrap( r -> {
config -> { if (isContinuous()) {
transformConfig = config; transformsConfigManager.getTransformConfiguration(getJobId(), ActionListener.wrap(
logger.debug("[" + getJobId() + "] successfully refreshed data frame transform config from index."); config -> {
transformConfig = config;
logger.debug("[{}] successfully refreshed data frame transform config from index.", transformId);
updateConfigListener.onResponse(null);
},
failure -> {
String msg = DataFrameMessages.getMessage(
DataFrameMessages.FAILED_TO_RELOAD_TRANSFORM_CONFIGURATION,
getJobId());
logger.error(msg, failure);
// If the transform config index or the transform config is gone, something serious occurred
// We are in an unknown state and should fail out
if (failure instanceof ResourceNotFoundException) {
updateConfigListener.onFailure(new TransformConfigReloadingException(msg, failure));
} else {
auditor.warning(getJobId(), msg);
updateConfigListener.onResponse(null);
}
}
));
} else {
updateConfigListener.onResponse(null); updateConfigListener.onResponse(null);
},
failure -> {
String msg = DataFrameMessages.getMessage(
DataFrameMessages.FAILED_TO_RELOAD_TRANSFORM_CONFIGURATION,
getJobId());
logger.error(msg, failure);
// If the transform config index or the transform config is gone, something serious occurred
// We are in an unknown state and should fail out
if (failure instanceof ResourceNotFoundException) {
updateConfigListener.onFailure(new TransformConfigReloadingException(msg, failure));
} else {
auditor.warning(getJobId(), msg);
updateConfigListener.onResponse(null);
}
} }
},
listener::onFailure
);
// If we are not on the initial batch checkpoint and its the first pass of whatever continuous checkpoint we are on,
// we should verify if there are local changes based on the sync config. If not, do not proceed further and exit.
if (transformTask.currentCheckpoint.get() > 0 && initialRun()) {
sourceHasChanged(ActionListener.wrap(
hasChanged -> {
if (hasChanged) {
transformTask.changesLastDetectedAt = Instant.now();
logger.debug("[{}] source has changed, triggering new indexer run.", transformId);
changedSourceListener.onResponse(null);
} else {
// No changes, stop executing
listener.onResponse(false);
}
},
listener::onFailure
)); ));
} else { } else {
updateConfigListener.onResponse(null); changedSourceListener.onResponse(null);
} }
} }
@ -930,41 +964,22 @@ public class DataFrameTransformTask extends AllocatedPersistentTask implements S
} }
@Override @Override
public boolean sourceHasChanged() { protected void sourceHasChanged(ActionListener<Boolean> hasChangedListener) {
if (getState() == IndexerState.INDEXING) {
logger.trace("Indexer is still running, ignore");
return false;
}
CountDownLatch latch = new CountDownLatch(1);
SetOnce<Boolean> changed = new SetOnce<>();
checkpointProvider.sourceHasChanged(getLastCheckpoint(), checkpointProvider.sourceHasChanged(getLastCheckpoint(),
new LatchedActionListener<>(ActionListener.wrap(changed::set, e -> { ActionListener.wrap(
changed.set(false); hasChanged -> {
logger.trace("[{}] change detected [{}]", transformId, hasChanged);
hasChangedListener.onResponse(hasChanged);
},
e -> {
logger.warn( logger.warn(
"Failed to detect changes for data frame transform [" + transformId + "], skipping update till next check", "Failed to detect changes for data frame transform [" + transformId + "], skipping update till next check.",
e); e);
auditor.warning(transformId, auditor.warning(transformId,
"Failed to detect changes for data frame transform, skipping update till next check. Exception: " "Failed to detect changes for data frame transform, skipping update till next check. Exception: "
+ e.getMessage());
}), latch));
try {
if (latch.await(5, TimeUnit.SECONDS)) {
logger.trace("Change detected:" + changed.get());
return changed.get();
}
} catch (InterruptedException e) {
logger.warn("Failed to detect changes for data frame transform [" + transformId + "], skipping update till next check", e);
auditor.warning(transformId,
"Failed to detect changes for data frame transform, skipping update till next check. Exception: "
+ e.getMessage()); + e.getMessage());
} hasChangedListener.onResponse(false);
}));
return false;
} }
private boolean isIrrecoverableFailure(Exception e) { private boolean isIrrecoverableFailure(Exception e) {

View File

@ -63,6 +63,7 @@ public abstract class DataFrameSingleNodeTestCase extends ESSingleNodeTestCase {
} }
}, e -> { }, e -> {
if (onException == null) { if (onException == null) {
logger.error("got unexpected exception", e);
fail("got unexpected exception: " + e.getMessage()); fail("got unexpected exception: " + e.getMessage());
} else { } else {
onException.accept(e); onException.accept(e);

View File

@ -181,8 +181,8 @@ public class DataFrameIndexerTests extends ESTestCase {
} }
@Override @Override
protected boolean sourceHasChanged() { protected void sourceHasChanged(ActionListener<Boolean> listener) {
return false; listener.onResponse(false);
} }
} }

View File

@ -108,7 +108,7 @@ public abstract class RollupIndexer extends AsyncTwoPhaseIndexer<Map<String, Obj
} }
@Override @Override
protected void onStart(long now, ActionListener<Void> listener) { protected void onStart(long now, ActionListener<Boolean> listener) {
try { try {
// this is needed to exclude buckets that can still receive new documents // this is needed to exclude buckets that can still receive new documents
DateHistogramGroupConfig dateHisto = job.getConfig().getGroupConfig().getDateHistogram(); DateHistogramGroupConfig dateHisto = job.getConfig().getGroupConfig().getDateHistogram();
@ -116,7 +116,7 @@ public abstract class RollupIndexer extends AsyncTwoPhaseIndexer<Map<String, Obj
long delay = dateHisto.getDelay() != null ? long delay = dateHisto.getDelay() != null ?
TimeValue.parseTimeValue(dateHisto.getDelay().toString(), "").millis() : 0; TimeValue.parseTimeValue(dateHisto.getDelay().toString(), "").millis() : 0;
maxBoundary = dateHisto.createRounding().round(now - delay); maxBoundary = dateHisto.createRounding().round(now - delay);
listener.onResponse(null); listener.onResponse(true);
} catch (Exception e) { } catch (Exception e) {
listener.onFailure(e); listener.onFailure(e);
} }