[TEST] Use busy asserts in ML distributed failure test (#52461)
When changing a job state using a mechanism that doesn't wait for the desired state to be reached within the production code the test code needs to loop until the cluster state has been updated. Closes #52451
This commit is contained in:
parent
6fa067a2a0
commit
9c49868bc5
|
@ -231,10 +231,12 @@ public class MlDistributedFailureIT extends BaseMlIntegTestCase {
|
||||||
PostDataAction.Response postDataResponse = client().execute(PostDataAction.INSTANCE, postDataRequest).actionGet();
|
PostDataAction.Response postDataResponse = client().execute(PostDataAction.INSTANCE, postDataRequest).actionGet();
|
||||||
assertEquals(1L, postDataResponse.getDataCounts().getInputRecordCount());
|
assertEquals(1L, postDataResponse.getDataCounts().getInputRecordCount());
|
||||||
|
|
||||||
// Confirm the job state is now failed
|
// Confirm the job state is now failed - this may take a while to update in cluster state
|
||||||
jobStatsRequest = new GetJobsStatsAction.Request(jobId);
|
assertBusy(() -> {
|
||||||
jobStatsResponse = client().execute(GetJobsStatsAction.INSTANCE, jobStatsRequest).actionGet();
|
GetJobsStatsAction.Request jobStatsRequest2 = new GetJobsStatsAction.Request(jobId);
|
||||||
assertEquals(JobState.FAILED, jobStatsResponse.getResponse().results().get(0).getState());
|
GetJobsStatsAction.Response jobStatsResponse2 = client().execute(GetJobsStatsAction.INSTANCE, jobStatsRequest2).actionGet();
|
||||||
|
assertEquals(JobState.FAILED, jobStatsResponse2.getResponse().results().get(0).getState());
|
||||||
|
});
|
||||||
|
|
||||||
// It's impossible to reliably get the datafeed into a stopping state at the point when the ML node is removed from the cluster
|
// It's impossible to reliably get the datafeed into a stopping state at the point when the ML node is removed from the cluster
|
||||||
// using externally accessible actions. The only way this situation could occur in reality is through extremely unfortunate
|
// using externally accessible actions. The only way this situation could occur in reality is through extremely unfortunate
|
||||||
|
@ -248,11 +250,13 @@ public class MlDistributedFailureIT extends BaseMlIntegTestCase {
|
||||||
client().execute(UpdatePersistentTaskStatusAction.INSTANCE, updatePersistentTaskStatusRequest).actionGet();
|
client().execute(UpdatePersistentTaskStatusAction.INSTANCE, updatePersistentTaskStatusRequest).actionGet();
|
||||||
assertNotNull(updatePersistentTaskStatusResponse.getTask());
|
assertNotNull(updatePersistentTaskStatusResponse.getTask());
|
||||||
|
|
||||||
// Confirm the datafeed state is now stopping
|
// Confirm the datafeed state is now stopping - this may take a while to update in cluster state
|
||||||
|
assertBusy(() -> {
|
||||||
GetDatafeedsStatsAction.Request datafeedStatsRequest = new GetDatafeedsStatsAction.Request(datafeedId);
|
GetDatafeedsStatsAction.Request datafeedStatsRequest = new GetDatafeedsStatsAction.Request(datafeedId);
|
||||||
GetDatafeedsStatsAction.Response datafeedStatsResponse =
|
GetDatafeedsStatsAction.Response datafeedStatsResponse =
|
||||||
client().execute(GetDatafeedsStatsAction.INSTANCE, datafeedStatsRequest).actionGet();
|
client().execute(GetDatafeedsStatsAction.INSTANCE, datafeedStatsRequest).actionGet();
|
||||||
assertEquals(DatafeedState.STOPPING, datafeedStatsResponse.getResponse().results().get(0).getDatafeedState());
|
assertEquals(DatafeedState.STOPPING, datafeedStatsResponse.getResponse().results().get(0).getDatafeedState());
|
||||||
|
});
|
||||||
|
|
||||||
// Stop the node running the failed job/stopping datafeed
|
// Stop the node running the failed job/stopping datafeed
|
||||||
ensureGreen(); // replicas must be assigned, otherwise we could lose a whole index
|
ensureGreen(); // replicas must be assigned, otherwise we could lose a whole index
|
||||||
|
@ -265,10 +269,12 @@ public class MlDistributedFailureIT extends BaseMlIntegTestCase {
|
||||||
StopDatafeedAction.Response stopDatafeedResponse = client().execute(StopDatafeedAction.INSTANCE, stopDatafeedRequest).actionGet();
|
StopDatafeedAction.Response stopDatafeedResponse = client().execute(StopDatafeedAction.INSTANCE, stopDatafeedRequest).actionGet();
|
||||||
assertTrue(stopDatafeedResponse.isStopped());
|
assertTrue(stopDatafeedResponse.isStopped());
|
||||||
|
|
||||||
// Confirm the datafeed state is now stopped
|
// Confirm the datafeed state is now stopped - shouldn't need a busy check here as
|
||||||
datafeedStatsRequest = new GetDatafeedsStatsAction.Request(datafeedId);
|
// the stop endpoint shouldn't return until its effects are externally visible
|
||||||
datafeedStatsResponse = client().execute(GetDatafeedsStatsAction.INSTANCE, datafeedStatsRequest).actionGet();
|
GetDatafeedsStatsAction.Request datafeedStatsRequest2 = new GetDatafeedsStatsAction.Request(datafeedId);
|
||||||
assertEquals(DatafeedState.STOPPED, datafeedStatsResponse.getResponse().results().get(0).getDatafeedState());
|
GetDatafeedsStatsAction.Response datafeedStatsResponse2 =
|
||||||
|
client().execute(GetDatafeedsStatsAction.INSTANCE, datafeedStatsRequest2).actionGet();
|
||||||
|
assertEquals(DatafeedState.STOPPED, datafeedStatsResponse2.getResponse().results().get(0).getDatafeedState());
|
||||||
|
|
||||||
// We should be allowed to force stop the unassigned failed job
|
// We should be allowed to force stop the unassigned failed job
|
||||||
CloseJobAction.Request closeJobRequest = new CloseJobAction.Request(jobId);
|
CloseJobAction.Request closeJobRequest = new CloseJobAction.Request(jobId);
|
||||||
|
|
Loading…
Reference in New Issue