[ML] Add reason to DataFrameAnalyticsTask setFailed log message (#52659) (#52707)

This commit is contained in:
David Kyle 2020-02-24 15:21:51 +00:00 committed by GitHub
parent 5e48811585
commit 044a4e127a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 23 additions and 22 deletions

View File

@ -60,7 +60,8 @@ public final class Messages {
public static final String DATA_FRAME_ANALYTICS_AUDIT_STARTED = "Started analytics"; public static final String DATA_FRAME_ANALYTICS_AUDIT_STARTED = "Started analytics";
public static final String DATA_FRAME_ANALYTICS_AUDIT_STOPPED = "Stopped analytics"; public static final String DATA_FRAME_ANALYTICS_AUDIT_STOPPED = "Stopped analytics";
public static final String DATA_FRAME_ANALYTICS_AUDIT_DELETED = "Deleted analytics"; public static final String DATA_FRAME_ANALYTICS_AUDIT_DELETED = "Deleted analytics";
public static final String DATA_FRAME_ANALYTICS_AUDIT_UPDATED_STATE = "Successfully updated analytics task state to [{0}]"; public static final String DATA_FRAME_ANALYTICS_AUDIT_UPDATED_STATE_WITH_REASON =
"Updated analytics task state to [{0}] with reason [{1}]";
public static final String DATA_FRAME_ANALYTICS_AUDIT_ESTIMATED_MEMORY_USAGE = "Estimated memory usage for this analytics to be [{0}]"; public static final String DATA_FRAME_ANALYTICS_AUDIT_ESTIMATED_MEMORY_USAGE = "Estimated memory usage for this analytics to be [{0}]";
public static final String DATA_FRAME_ANALYTICS_AUDIT_CREATING_DEST_INDEX = "Creating destination index [{0}]"; public static final String DATA_FRAME_ANALYTICS_AUDIT_CREATING_DEST_INDEX = "Creating destination index [{0}]";
public static final String DATA_FRAME_ANALYTICS_AUDIT_REUSING_DEST_INDEX = "Using existing destination index [{0}]"; public static final String DATA_FRAME_ANALYTICS_AUDIT_REUSING_DEST_INDEX = "Using existing destination index [{0}]";

View File

@ -93,12 +93,12 @@ public class DataFrameAnalyticsManager {
executeJobInMiddleOfReindexing(task, config); executeJobInMiddleOfReindexing(task, config);
break; break;
default: default:
task.updateState(DataFrameAnalyticsState.FAILED, "Cannot execute analytics task [" + config.getId() + task.setFailed("Cannot execute analytics task [" + config.getId() +
"] as it is in unknown state [" + currentState + "]. Must be one of [STARTED, REINDEXING, ANALYZING]"); "] as it is in unknown state [" + currentState + "]. Must be one of [STARTED, REINDEXING, ANALYZING]");
} }
}, },
error -> task.updateState(DataFrameAnalyticsState.FAILED, error.getMessage()) error -> task.setFailed(error.getMessage())
); );
// Retrieve configuration // Retrieve configuration
@ -122,13 +122,13 @@ public class DataFrameAnalyticsManager {
case FIRST_TIME: case FIRST_TIME:
task.updatePersistentTaskState(reindexingState, ActionListener.wrap( task.updatePersistentTaskState(reindexingState, ActionListener.wrap(
updatedTask -> reindexDataframeAndStartAnalysis(task, config), updatedTask -> reindexDataframeAndStartAnalysis(task, config),
error -> task.updateState(DataFrameAnalyticsState.FAILED, error.getMessage()) error -> task.setFailed(error.getMessage())
)); ));
break; break;
case RESUMING_REINDEXING: case RESUMING_REINDEXING:
task.updatePersistentTaskState(reindexingState, ActionListener.wrap( task.updatePersistentTaskState(reindexingState, ActionListener.wrap(
updatedTask -> executeJobInMiddleOfReindexing(task, config), updatedTask -> executeJobInMiddleOfReindexing(task, config),
error -> task.updateState(DataFrameAnalyticsState.FAILED, error.getMessage()) error -> task.setFailed(error.getMessage())
)); ));
break; break;
case RESUMING_ANALYZING: case RESUMING_ANALYZING:
@ -136,7 +136,7 @@ public class DataFrameAnalyticsManager {
break; break;
case FINISHED: case FINISHED:
default: default:
task.updateState(DataFrameAnalyticsState.FAILED, "Unexpected starting state [" + startingState + "]"); task.setFailed("Unexpected starting state [" + startingState + "]");
} }
} }
@ -151,7 +151,7 @@ public class DataFrameAnalyticsManager {
if (ExceptionsHelper.unwrapCause(e) instanceof IndexNotFoundException) { if (ExceptionsHelper.unwrapCause(e) instanceof IndexNotFoundException) {
reindexDataframeAndStartAnalysis(task, config); reindexDataframeAndStartAnalysis(task, config);
} else { } else {
task.updateState(DataFrameAnalyticsState.FAILED, e.getMessage()); task.setFailed(e.getMessage());
} }
} }
)); ));
@ -178,7 +178,7 @@ public class DataFrameAnalyticsManager {
Messages.getMessage(Messages.DATA_FRAME_ANALYTICS_AUDIT_FINISHED_REINDEXING, config.getDest().getIndex())); Messages.getMessage(Messages.DATA_FRAME_ANALYTICS_AUDIT_FINISHED_REINDEXING, config.getDest().getIndex()));
startAnalytics(task, config); startAnalytics(task, config);
}, },
error -> task.updateState(DataFrameAnalyticsState.FAILED, error.getMessage()) error -> task.setFailed(error.getMessage())
); );
// Reindex // Reindex
@ -244,12 +244,12 @@ public class DataFrameAnalyticsManager {
if (ExceptionsHelper.unwrapCause(error) instanceof ResourceNotFoundException) { if (ExceptionsHelper.unwrapCause(error) instanceof ResourceNotFoundException) {
// Task has stopped // Task has stopped
} else { } else {
task.updateState(DataFrameAnalyticsState.FAILED, error.getMessage()); task.setFailed(error.getMessage());
} }
} }
)); ));
}, },
error -> task.updateState(DataFrameAnalyticsState.FAILED, error.getMessage()) error -> task.setFailed(error.getMessage())
); );
ActionListener<RefreshResponse> refreshListener = ActionListener.wrap( ActionListener<RefreshResponse> refreshListener = ActionListener.wrap(

View File

@ -177,17 +177,20 @@ public class DataFrameAnalyticsTask extends AllocatedPersistentTask implements S
} }
} }
public void updateState(DataFrameAnalyticsState state, @Nullable String reason) { public void setFailed(String reason) {
DataFrameAnalyticsTaskState newTaskState = new DataFrameAnalyticsTaskState(state, getAllocationId(), reason); DataFrameAnalyticsTaskState newTaskState = new DataFrameAnalyticsTaskState(DataFrameAnalyticsState.FAILED,
getAllocationId(), reason);
updatePersistentTaskState( updatePersistentTaskState(
newTaskState, newTaskState,
ActionListener.wrap( ActionListener.wrap(
updatedTask -> { updatedTask -> {
auditor.info(getParams().getId(), Messages.getMessage(Messages.DATA_FRAME_ANALYTICS_AUDIT_UPDATED_STATE, state)); String message = Messages.getMessage(Messages.DATA_FRAME_ANALYTICS_AUDIT_UPDATED_STATE_WITH_REASON,
LOGGER.info("[{}] Successfully update task state to [{}]", getParams().getId(), state); DataFrameAnalyticsState.FAILED, reason);
auditor.info(getParams().getId(), message);
LOGGER.info("[{}] {}", getParams().getId(), message);
}, },
e -> LOGGER.error(new ParameterizedMessage("[{}] Could not update task state to [{}] with reason [{}]", e -> LOGGER.error(new ParameterizedMessage("[{}] Could not update task state to [{}] with reason [{}]",
getParams().getId(), state, reason), e) getParams().getId(), DataFrameAnalyticsState.FAILED, reason), e)
) )
); );
} }

View File

@ -23,7 +23,6 @@ import org.elasticsearch.search.SearchHit;
import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.xpack.core.ClientHelper; import org.elasticsearch.xpack.core.ClientHelper;
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig;
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsState;
import org.elasticsearch.xpack.core.ml.dataframe.analyses.DataFrameAnalysis; import org.elasticsearch.xpack.core.ml.dataframe.analyses.DataFrameAnalysis;
import org.elasticsearch.xpack.core.ml.job.messages.Messages; import org.elasticsearch.xpack.core.ml.job.messages.Messages;
import org.elasticsearch.xpack.core.ml.job.persistence.AnomalyDetectorsIndex; import org.elasticsearch.xpack.core.ml.job.persistence.AnomalyDetectorsIndex;
@ -110,8 +109,7 @@ public class AnalyticsProcessManager {
return; return;
} }
if (processContextByAllocation.putIfAbsent(task.getAllocationId(), processContext) != null) { if (processContextByAllocation.putIfAbsent(task.getAllocationId(), processContext) != null) {
task.updateState( task.setFailed("[" + config.getId() + "] Could not create process as one already exists");
DataFrameAnalyticsState.FAILED, "[" + config.getId() + "] Could not create process as one already exists");
return; return;
} }
} }
@ -193,7 +191,7 @@ public class AnalyticsProcessManager {
task.markAsCompleted(); task.markAsCompleted();
} else { } else {
LOGGER.error("[{}] Marking task failed; {}", config.getId(), processContext.getFailureReason()); LOGGER.error("[{}] Marking task failed; {}", config.getId(), processContext.getFailureReason());
task.updateState(DataFrameAnalyticsState.FAILED, processContext.getFailureReason()); task.setFailed(processContext.getFailureReason());
// Note: We are not marking the task as failed here as we want the user to be able to inspect the failure reason. // Note: We are not marking the task as failed here as we want the user to be able to inspect the failure reason.
} }
} }
@ -265,7 +263,7 @@ public class AnalyticsProcessManager {
process.restoreState(state); process.restoreState(state);
} catch (Exception e) { } catch (Exception e) {
LOGGER.error(new ParameterizedMessage("[{}] Failed to restore state", process.getConfig().jobId()), e); LOGGER.error(new ParameterizedMessage("[{}] Failed to restore state", process.getConfig().jobId()), e);
task.updateState(DataFrameAnalyticsState.FAILED, "Failed to restore state: " + e.getMessage()); task.setFailed("Failed to restore state: " + e.getMessage());
} }
} }

View File

@ -14,7 +14,6 @@ import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig;
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfigTests; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfigTests;
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsState;
import org.elasticsearch.xpack.core.ml.dataframe.analyses.OutlierDetectionTests; import org.elasticsearch.xpack.core.ml.dataframe.analyses.OutlierDetectionTests;
import org.elasticsearch.xpack.ml.dataframe.DataFrameAnalyticsTask; import org.elasticsearch.xpack.ml.dataframe.DataFrameAnalyticsTask;
import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractor; import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractor;
@ -131,7 +130,7 @@ public class AnalyticsProcessManagerTests extends ESTestCase {
inOrder.verify(task).getStatsHolder(); inOrder.verify(task).getStatsHolder();
inOrder.verify(task).isStopping(); inOrder.verify(task).isStopping();
inOrder.verify(task).getAllocationId(); inOrder.verify(task).getAllocationId();
inOrder.verify(task).updateState(DataFrameAnalyticsState.FAILED, "[config-id] Could not create process as one already exists"); inOrder.verify(task).setFailed("[config-id] Could not create process as one already exists");
verifyNoMoreInteractions(task); verifyNoMoreInteractions(task);
} }