CCR: Add TransportService closed to retryable errors (#34722)
Both testFollowIndexAndCloseNode and testFailOverOnFollower failed because they responded to the FollowTask a TransportService closed exception which is currently considered as a fatal error. This behavior is not desirable since a closing node can throw that exception, and we should retry in that case. This change adds TransportService closed error to the list of retryable errors. Closes #34694
This commit is contained in:
parent
90fd15bb56
commit
e242fd2e42
|
@ -28,6 +28,8 @@ import org.elasticsearch.index.translog.Translog;
|
|||
import org.elasticsearch.indices.IndexClosedException;
|
||||
import org.elasticsearch.persistent.AllocatedPersistentTask;
|
||||
import org.elasticsearch.tasks.TaskId;
|
||||
import org.elasticsearch.transport.NodeDisconnectedException;
|
||||
import org.elasticsearch.transport.NodeNotConnectedException;
|
||||
import org.elasticsearch.xpack.ccr.action.bulk.BulkShardOperationsResponse;
|
||||
import org.elasticsearch.xpack.core.ccr.ShardFollowNodeTaskStatus;
|
||||
|
||||
|
@ -371,6 +373,7 @@ public abstract class ShardFollowNodeTask extends AllocatedPersistentTask {
|
|||
scheduler.accept(TimeValue.timeValueMillis(delay), task);
|
||||
} else {
|
||||
fatalException = ExceptionsHelper.convertToElastic(e);
|
||||
LOGGER.warn("shard follow task encounter non-retryable error", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -399,7 +402,10 @@ public abstract class ShardFollowNodeTask extends AllocatedPersistentTask {
|
|||
actual instanceof AlreadyClosedException ||
|
||||
actual instanceof ElasticsearchSecurityException || // If user does not have sufficient privileges
|
||||
actual instanceof ClusterBlockException || // If leader index is closed or no elected master
|
||||
actual instanceof IndexClosedException; // If follow index is closed
|
||||
actual instanceof IndexClosedException || // If follow index is closed
|
||||
actual instanceof NodeDisconnectedException ||
|
||||
actual instanceof NodeNotConnectedException ||
|
||||
(actual.getMessage() != null && actual.getMessage().contains("TransportService is closed"));
|
||||
}
|
||||
|
||||
// These methods are protected for testing purposes:
|
||||
|
|
|
@ -270,7 +270,6 @@ public class IndexFollowingIT extends CcrIntegTestCase {
|
|||
assertMaxSeqNoOfUpdatesIsTransferred(resolveLeaderIndex("index1"), resolveFollowerIndex("index2"), numberOfShards);
|
||||
}
|
||||
|
||||
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/34696")
|
||||
public void testFollowIndexAndCloseNode() throws Exception {
|
||||
getFollowerCluster().ensureAtLeastNumDataNodes(3);
|
||||
String leaderIndexSettings = getIndexSettings(3, 1, singletonMap(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), "true"));
|
||||
|
@ -587,7 +586,6 @@ public class IndexFollowingIT extends CcrIntegTestCase {
|
|||
assertThat(followerClient().prepareSearch("index2").get().getHits().getTotalHits(), equalTo(2L));
|
||||
}
|
||||
|
||||
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/34696")
|
||||
public void testFailOverOnFollower() throws Exception {
|
||||
int numberOfReplicas = between(1, 2);
|
||||
getFollowerCluster().startMasterOnlyNode();
|
||||
|
|
Loading…
Reference in New Issue