* [ML][Data Frame] adjusting change detection workflow * adjusting for PR comment * disallowing null as an argument value
This commit is contained in:
parent
647a8308c3
commit
fde5dae387
|
@ -154,7 +154,12 @@ public abstract class AsyncTwoPhaseIndexer<JobPosition, JobStats extends Indexer
|
||||||
// fire off the search. Note this is async, the method will return from here
|
// fire off the search. Note this is async, the method will return from here
|
||||||
executor.execute(() -> {
|
executor.execute(() -> {
|
||||||
onStart(now, ActionListener.wrap(r -> {
|
onStart(now, ActionListener.wrap(r -> {
|
||||||
nextSearch(ActionListener.wrap(this::onSearchResponse, this::finishWithSearchFailure));
|
assert r != null;
|
||||||
|
if (r) {
|
||||||
|
nextSearch(ActionListener.wrap(this::onSearchResponse, this::finishWithSearchFailure));
|
||||||
|
} else {
|
||||||
|
finishAndSetState();
|
||||||
|
}
|
||||||
}, e -> {
|
}, e -> {
|
||||||
finishAndSetState();
|
finishAndSetState();
|
||||||
onFailure(e);
|
onFailure(e);
|
||||||
|
@ -200,9 +205,11 @@ public abstract class AsyncTwoPhaseIndexer<JobPosition, JobStats extends Indexer
|
||||||
* internal state is {@link IndexerState#STARTED}.
|
* internal state is {@link IndexerState#STARTED}.
|
||||||
*
|
*
|
||||||
* @param now The current time in milliseconds passed through from {@link #maybeTriggerAsyncJob(long)}
|
* @param now The current time in milliseconds passed through from {@link #maybeTriggerAsyncJob(long)}
|
||||||
* @param listener listener to call after done
|
* @param listener listener to call after done. The argument passed to the listener indicates if the indexer should continue or not
|
||||||
|
* true: continue execution as normal
|
||||||
|
* false: cease execution. This does NOT call onFinish
|
||||||
*/
|
*/
|
||||||
protected abstract void onStart(long now, ActionListener<Void> listener);
|
protected abstract void onStart(long now, ActionListener<Boolean> listener);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Executes the {@link SearchRequest} and calls <code>nextPhase</code> with the
|
* Executes the {@link SearchRequest} and calls <code>nextPhase</code> with the
|
||||||
|
|
|
@ -88,10 +88,10 @@ public class AsyncTwoPhaseIndexerTests extends ESTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void onStart(long now, ActionListener<Void> listener) {
|
protected void onStart(long now, ActionListener<Boolean> listener) {
|
||||||
assertThat(step, equalTo(0));
|
assertThat(step, equalTo(0));
|
||||||
++step;
|
++step;
|
||||||
listener.onResponse(null);
|
listener.onResponse(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -186,9 +186,9 @@ public class AsyncTwoPhaseIndexerTests extends ESTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void onStart(long now, ActionListener<Void> listener) {
|
protected void onStart(long now, ActionListener<Boolean> listener) {
|
||||||
started = true;
|
started = true;
|
||||||
listener.onResponse(null);
|
listener.onResponse(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -270,10 +270,10 @@ public class AsyncTwoPhaseIndexerTests extends ESTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void onStart(long now, ActionListener<Void> listener) {
|
protected void onStart(long now, ActionListener<Boolean> listener) {
|
||||||
assertThat(step, equalTo(0));
|
assertThat(step, equalTo(0));
|
||||||
++step;
|
++step;
|
||||||
listener.onResponse(null);
|
listener.onResponse(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -147,7 +147,7 @@ public abstract class DataFrameIndexer extends AsyncTwoPhaseIndexer<DataFrameInd
|
||||||
protected abstract void createCheckpoint(ActionListener<DataFrameTransformCheckpoint> listener);
|
protected abstract void createCheckpoint(ActionListener<DataFrameTransformCheckpoint> listener);
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void onStart(long now, ActionListener<Void> listener) {
|
protected void onStart(long now, ActionListener<Boolean> listener) {
|
||||||
try {
|
try {
|
||||||
pivot = new Pivot(getConfig().getPivotConfig());
|
pivot = new Pivot(getConfig().getPivotConfig());
|
||||||
|
|
||||||
|
@ -157,7 +157,7 @@ public abstract class DataFrameIndexer extends AsyncTwoPhaseIndexer<DataFrameInd
|
||||||
}
|
}
|
||||||
|
|
||||||
runState = determineRunStateAtStart();
|
runState = determineRunStateAtStart();
|
||||||
listener.onResponse(null);
|
listener.onResponse(true);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
listener.onFailure(e);
|
listener.onFailure(e);
|
||||||
}
|
}
|
||||||
|
@ -525,5 +525,5 @@ public abstract class DataFrameIndexer extends AsyncTwoPhaseIndexer<DataFrameInd
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract boolean sourceHasChanged();
|
protected abstract void sourceHasChanged(ActionListener<Boolean> hasChangedListener);
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,7 +12,6 @@ import org.apache.lucene.util.SetOnce;
|
||||||
import org.elasticsearch.ElasticsearchException;
|
import org.elasticsearch.ElasticsearchException;
|
||||||
import org.elasticsearch.ResourceNotFoundException;
|
import org.elasticsearch.ResourceNotFoundException;
|
||||||
import org.elasticsearch.action.ActionListener;
|
import org.elasticsearch.action.ActionListener;
|
||||||
import org.elasticsearch.action.LatchedActionListener;
|
|
||||||
import org.elasticsearch.action.bulk.BulkAction;
|
import org.elasticsearch.action.bulk.BulkAction;
|
||||||
import org.elasticsearch.action.bulk.BulkItemResponse;
|
import org.elasticsearch.action.bulk.BulkItemResponse;
|
||||||
import org.elasticsearch.action.bulk.BulkRequest;
|
import org.elasticsearch.action.bulk.BulkRequest;
|
||||||
|
@ -57,8 +56,6 @@ import org.elasticsearch.xpack.dataframe.transforms.pivot.AggregationResultUtils
|
||||||
import java.time.Instant;
|
import java.time.Instant;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.CountDownLatch;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
import java.util.concurrent.atomic.AtomicReference;
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
@ -324,15 +321,27 @@ public class DataFrameTransformTask extends AllocatedPersistentTask implements S
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.debug("Data frame indexer [{}] schedule has triggered, state: [{}]", event.getJobName(), getIndexer().getState());
|
if (taskState.get() == DataFrameTransformTaskState.FAILED) {
|
||||||
|
logger.debug("Schedule was triggered for transform [{}] but task is failed. Ignoring trigger.", getTransformId());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ignore trigger if indexer is running or completely stopped
|
||||||
|
IndexerState indexerState = getIndexer().getState();
|
||||||
|
if (IndexerState.INDEXING.equals(indexerState) ||
|
||||||
|
IndexerState.STOPPING.equals(indexerState) ||
|
||||||
|
IndexerState.STOPPED.equals(indexerState)) {
|
||||||
|
logger.debug("Indexer for transform [{}] has state [{}], ignoring trigger", getTransformId(), indexerState);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.debug("Data frame indexer [{}] schedule has triggered, state: [{}]", event.getJobName(), indexerState);
|
||||||
|
|
||||||
// if it runs for the 1st time we just do it, if not we check for changes
|
// if it runs for the 1st time we just do it, if not we check for changes
|
||||||
if (currentCheckpoint.get() == 0 ) {
|
if (currentCheckpoint.get() == 0) {
|
||||||
logger.debug("Trigger initial run");
|
logger.debug("Trigger initial run");
|
||||||
getIndexer().maybeTriggerAsyncJob(System.currentTimeMillis());
|
getIndexer().maybeTriggerAsyncJob(System.currentTimeMillis());
|
||||||
} else if (getIndexer().isContinuous() && getIndexer().sourceHasChanged()) {
|
} else if (getIndexer().isContinuous()) {
|
||||||
changesLastDetectedAt = Instant.now();
|
|
||||||
logger.debug("Source has changed, triggering new indexer run");
|
|
||||||
getIndexer().maybeTriggerAsyncJob(System.currentTimeMillis());
|
getIndexer().maybeTriggerAsyncJob(System.currentTimeMillis());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -620,7 +629,7 @@ public class DataFrameTransformTask extends AllocatedPersistentTask implements S
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void onStart(long now, ActionListener<Void> listener) {
|
protected void onStart(long now, ActionListener<Boolean> listener) {
|
||||||
// On each run, we need to get the total number of docs and reset the count of processed docs
|
// On each run, we need to get the total number of docs and reset the count of processed docs
|
||||||
// Since multiple checkpoints can be executed in the task while it is running on the same node, we need to gather
|
// Since multiple checkpoints can be executed in the task while it is running on the same node, we need to gather
|
||||||
// the progress here, and not in the executor.
|
// the progress here, and not in the executor.
|
||||||
|
@ -657,30 +666,55 @@ public class DataFrameTransformTask extends AllocatedPersistentTask implements S
|
||||||
);
|
);
|
||||||
|
|
||||||
// If we are continuous, we will want to verify we have the latest stored configuration
|
// If we are continuous, we will want to verify we have the latest stored configuration
|
||||||
if (isContinuous()) {
|
ActionListener<Void> changedSourceListener = ActionListener.wrap(
|
||||||
transformsConfigManager.getTransformConfiguration(getJobId(), ActionListener.wrap(
|
r -> {
|
||||||
config -> {
|
if (isContinuous()) {
|
||||||
transformConfig = config;
|
transformsConfigManager.getTransformConfiguration(getJobId(), ActionListener.wrap(
|
||||||
logger.debug("[" + getJobId() + "] successfully refreshed data frame transform config from index.");
|
config -> {
|
||||||
|
transformConfig = config;
|
||||||
|
logger.debug("[{}] successfully refreshed data frame transform config from index.", transformId);
|
||||||
|
updateConfigListener.onResponse(null);
|
||||||
|
},
|
||||||
|
failure -> {
|
||||||
|
String msg = DataFrameMessages.getMessage(
|
||||||
|
DataFrameMessages.FAILED_TO_RELOAD_TRANSFORM_CONFIGURATION,
|
||||||
|
getJobId());
|
||||||
|
logger.error(msg, failure);
|
||||||
|
// If the transform config index or the transform config is gone, something serious occurred
|
||||||
|
// We are in an unknown state and should fail out
|
||||||
|
if (failure instanceof ResourceNotFoundException) {
|
||||||
|
updateConfigListener.onFailure(new TransformConfigReloadingException(msg, failure));
|
||||||
|
} else {
|
||||||
|
auditor.warning(getJobId(), msg);
|
||||||
|
updateConfigListener.onResponse(null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
));
|
||||||
|
} else {
|
||||||
updateConfigListener.onResponse(null);
|
updateConfigListener.onResponse(null);
|
||||||
},
|
|
||||||
failure -> {
|
|
||||||
String msg = DataFrameMessages.getMessage(
|
|
||||||
DataFrameMessages.FAILED_TO_RELOAD_TRANSFORM_CONFIGURATION,
|
|
||||||
getJobId());
|
|
||||||
logger.error(msg, failure);
|
|
||||||
// If the transform config index or the transform config is gone, something serious occurred
|
|
||||||
// We are in an unknown state and should fail out
|
|
||||||
if (failure instanceof ResourceNotFoundException) {
|
|
||||||
updateConfigListener.onFailure(new TransformConfigReloadingException(msg, failure));
|
|
||||||
} else {
|
|
||||||
auditor.warning(getJobId(), msg);
|
|
||||||
updateConfigListener.onResponse(null);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
listener::onFailure
|
||||||
|
);
|
||||||
|
|
||||||
|
// If we are not on the initial batch checkpoint and its the first pass of whatever continuous checkpoint we are on,
|
||||||
|
// we should verify if there are local changes based on the sync config. If not, do not proceed further and exit.
|
||||||
|
if (transformTask.currentCheckpoint.get() > 0 && initialRun()) {
|
||||||
|
sourceHasChanged(ActionListener.wrap(
|
||||||
|
hasChanged -> {
|
||||||
|
if (hasChanged) {
|
||||||
|
transformTask.changesLastDetectedAt = Instant.now();
|
||||||
|
logger.debug("[{}] source has changed, triggering new indexer run.", transformId);
|
||||||
|
changedSourceListener.onResponse(null);
|
||||||
|
} else {
|
||||||
|
// No changes, stop executing
|
||||||
|
listener.onResponse(false);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
listener::onFailure
|
||||||
));
|
));
|
||||||
} else {
|
} else {
|
||||||
updateConfigListener.onResponse(null);
|
changedSourceListener.onResponse(null);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -930,41 +964,22 @@ public class DataFrameTransformTask extends AllocatedPersistentTask implements S
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean sourceHasChanged() {
|
protected void sourceHasChanged(ActionListener<Boolean> hasChangedListener) {
|
||||||
if (getState() == IndexerState.INDEXING) {
|
|
||||||
logger.trace("Indexer is still running, ignore");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
CountDownLatch latch = new CountDownLatch(1);
|
|
||||||
SetOnce<Boolean> changed = new SetOnce<>();
|
|
||||||
|
|
||||||
checkpointProvider.sourceHasChanged(getLastCheckpoint(),
|
checkpointProvider.sourceHasChanged(getLastCheckpoint(),
|
||||||
new LatchedActionListener<>(ActionListener.wrap(changed::set, e -> {
|
ActionListener.wrap(
|
||||||
changed.set(false);
|
hasChanged -> {
|
||||||
|
logger.trace("[{}] change detected [{}]", transformId, hasChanged);
|
||||||
|
hasChangedListener.onResponse(hasChanged);
|
||||||
|
},
|
||||||
|
e -> {
|
||||||
logger.warn(
|
logger.warn(
|
||||||
"Failed to detect changes for data frame transform [" + transformId + "], skipping update till next check",
|
"Failed to detect changes for data frame transform [" + transformId + "], skipping update till next check.",
|
||||||
e);
|
e);
|
||||||
|
|
||||||
auditor.warning(transformId,
|
auditor.warning(transformId,
|
||||||
"Failed to detect changes for data frame transform, skipping update till next check. Exception: "
|
"Failed to detect changes for data frame transform, skipping update till next check. Exception: "
|
||||||
+ e.getMessage());
|
|
||||||
}), latch));
|
|
||||||
|
|
||||||
try {
|
|
||||||
if (latch.await(5, TimeUnit.SECONDS)) {
|
|
||||||
logger.trace("Change detected:" + changed.get());
|
|
||||||
return changed.get();
|
|
||||||
}
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
logger.warn("Failed to detect changes for data frame transform [" + transformId + "], skipping update till next check", e);
|
|
||||||
|
|
||||||
auditor.warning(transformId,
|
|
||||||
"Failed to detect changes for data frame transform, skipping update till next check. Exception: "
|
|
||||||
+ e.getMessage());
|
+ e.getMessage());
|
||||||
}
|
hasChangedListener.onResponse(false);
|
||||||
|
}));
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isIrrecoverableFailure(Exception e) {
|
private boolean isIrrecoverableFailure(Exception e) {
|
||||||
|
|
|
@ -63,6 +63,7 @@ public abstract class DataFrameSingleNodeTestCase extends ESSingleNodeTestCase {
|
||||||
}
|
}
|
||||||
}, e -> {
|
}, e -> {
|
||||||
if (onException == null) {
|
if (onException == null) {
|
||||||
|
logger.error("got unexpected exception", e);
|
||||||
fail("got unexpected exception: " + e.getMessage());
|
fail("got unexpected exception: " + e.getMessage());
|
||||||
} else {
|
} else {
|
||||||
onException.accept(e);
|
onException.accept(e);
|
||||||
|
|
|
@ -181,8 +181,8 @@ public class DataFrameIndexerTests extends ESTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected boolean sourceHasChanged() {
|
protected void sourceHasChanged(ActionListener<Boolean> listener) {
|
||||||
return false;
|
listener.onResponse(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -108,7 +108,7 @@ public abstract class RollupIndexer extends AsyncTwoPhaseIndexer<Map<String, Obj
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void onStart(long now, ActionListener<Void> listener) {
|
protected void onStart(long now, ActionListener<Boolean> listener) {
|
||||||
try {
|
try {
|
||||||
// this is needed to exclude buckets that can still receive new documents
|
// this is needed to exclude buckets that can still receive new documents
|
||||||
DateHistogramGroupConfig dateHisto = job.getConfig().getGroupConfig().getDateHistogram();
|
DateHistogramGroupConfig dateHisto = job.getConfig().getGroupConfig().getDateHistogram();
|
||||||
|
@ -116,7 +116,7 @@ public abstract class RollupIndexer extends AsyncTwoPhaseIndexer<Map<String, Obj
|
||||||
long delay = dateHisto.getDelay() != null ?
|
long delay = dateHisto.getDelay() != null ?
|
||||||
TimeValue.parseTimeValue(dateHisto.getDelay().toString(), "").millis() : 0;
|
TimeValue.parseTimeValue(dateHisto.getDelay().toString(), "").millis() : 0;
|
||||||
maxBoundary = dateHisto.createRounding().round(now - delay);
|
maxBoundary = dateHisto.createRounding().round(now - delay);
|
||||||
listener.onResponse(null);
|
listener.onResponse(true);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
listener.onFailure(e);
|
listener.onFailure(e);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue