Fix shard follow task startup error handling (#39053)

Prior to this commit, if during fetch leader / follower GCP
a fatal error occurred, then the shard follow task was removed.

This is unexpected, because if such an error occurs during the lifetime of shard follow task then replication is stopped and the fatal error flag is set. This allows the ccr stats api to report the fatal exception that has occurred (instead of the user grepping through the elasticsearch logs).

This issue was found by a rare failure of the  `FollowStatsIT#testFollowStatsApiIncludeShardFollowStatsWithRemovedFollowerIndex` test.

Closes #38779
This commit is contained in:
Martijn van Groningen 2019-02-19 08:53:20 +01:00
parent 199155f5fb
commit c8d59f6f0f
No known key found for this signature in database
GPG Key ID: AB236F4FCF2AF12A
3 changed files with 7 additions and 4 deletions

View File

@ -452,11 +452,15 @@ public abstract class ShardFollowNodeTask extends AllocatedPersistentTask {
scheduler.accept(TimeValue.timeValueMillis(delay), task);
}
} else {
fatalException = ExceptionsHelper.convertToElastic(e);
LOGGER.warn("shard follow task encounter non-retryable error", e);
setFatalException(e);
}
}
void setFatalException(Exception e) {
fatalException = ExceptionsHelper.convertToElastic(e);
LOGGER.warn("shard follow task encounter non-retryable error", e);
}
static long computeDelay(int currentRetry, long maxRetryDelayInMillis) {
// Cap currentRetry to avoid overflow when computing n variable
int maxCurrentRetry = Math.min(currentRetry, 24);

View File

@ -282,7 +282,7 @@ public class ShardFollowTasksExecutor extends PersistentTasksExecutor<ShardFollo
shardFollowNodeTask), e);
threadPool.schedule(() -> nodeOperation(task, params, state), params.getMaxRetryDelay(), Ccr.CCR_THREAD_POOL_NAME);
} else {
shardFollowNodeTask.markAsFailed(e);
shardFollowNodeTask.setFatalException(e);
}
};

View File

@ -149,7 +149,6 @@ public class FollowStatsIT extends CcrSingleNodeTestCase {
assertAcked(client().execute(PauseFollowAction.INSTANCE, new PauseFollowAction.Request("follower1")).actionGet());
}
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/38779")
public void testFollowStatsApiIncludeShardFollowStatsWithRemovedFollowerIndex() throws Exception {
final String leaderIndexSettings = getIndexSettings(1, 0,
singletonMap(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), "true"));