mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-22 12:56:53 +00:00
Fix shard follow task startup error handling (#39053)
Prior to this commit, if during fetch leader / follower GCP a fatal error occurred, then the shard follow task was removed. This is unexpected, because if such an error occurs during the lifetime of shard follow task then replication is stopped and the fatal error flag is set. This allows the ccr stats api to report the fatal exception that has occurred (instead of the user grepping through the elasticsearch logs). This issue was found by a rare failure of the `FollowStatsIT#testFollowStatsApiIncludeShardFollowStatsWithRemovedFollowerIndex` test. Closes #38779
This commit is contained in:
parent
199155f5fb
commit
c8d59f6f0f
@ -452,11 +452,15 @@ public abstract class ShardFollowNodeTask extends AllocatedPersistentTask {
|
||||
scheduler.accept(TimeValue.timeValueMillis(delay), task);
|
||||
}
|
||||
} else {
|
||||
fatalException = ExceptionsHelper.convertToElastic(e);
|
||||
LOGGER.warn("shard follow task encounter non-retryable error", e);
|
||||
setFatalException(e);
|
||||
}
|
||||
}
|
||||
|
||||
void setFatalException(Exception e) {
|
||||
fatalException = ExceptionsHelper.convertToElastic(e);
|
||||
LOGGER.warn("shard follow task encounter non-retryable error", e);
|
||||
}
|
||||
|
||||
static long computeDelay(int currentRetry, long maxRetryDelayInMillis) {
|
||||
// Cap currentRetry to avoid overflow when computing n variable
|
||||
int maxCurrentRetry = Math.min(currentRetry, 24);
|
||||
|
@ -282,7 +282,7 @@ public class ShardFollowTasksExecutor extends PersistentTasksExecutor<ShardFollo
|
||||
shardFollowNodeTask), e);
|
||||
threadPool.schedule(() -> nodeOperation(task, params, state), params.getMaxRetryDelay(), Ccr.CCR_THREAD_POOL_NAME);
|
||||
} else {
|
||||
shardFollowNodeTask.markAsFailed(e);
|
||||
shardFollowNodeTask.setFatalException(e);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -149,7 +149,6 @@ public class FollowStatsIT extends CcrSingleNodeTestCase {
|
||||
assertAcked(client().execute(PauseFollowAction.INSTANCE, new PauseFollowAction.Request("follower1")).actionGet());
|
||||
}
|
||||
|
||||
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/38779")
|
||||
public void testFollowStatsApiIncludeShardFollowStatsWithRemovedFollowerIndex() throws Exception {
|
||||
final String leaderIndexSettings = getIndexSettings(1, 0,
|
||||
singletonMap(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), "true"));
|
||||
|
Loading…
x
Reference in New Issue
Block a user