CCR: Add TransportService closed to retryable errors (#34722)

Both testFollowIndexAndCloseNode and testFailOverOnFollower failed
because they responded to the FollowTask a TransportService closed
exception which is currently considered as a fatal error. This behavior
is not desirable since a closing node can throw that exception, and we
should retry in that case.

This change adds TransportService closed error to the list of retryable
errors.

Closes #34694
This commit is contained in:
Nhat Nguyen 2018-10-23 14:23:29 -04:00 committed by GitHub
parent 90fd15bb56
commit e242fd2e42
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 7 additions and 3 deletions

View File

@ -28,6 +28,8 @@ import org.elasticsearch.index.translog.Translog;
import org.elasticsearch.indices.IndexClosedException; import org.elasticsearch.indices.IndexClosedException;
import org.elasticsearch.persistent.AllocatedPersistentTask; import org.elasticsearch.persistent.AllocatedPersistentTask;
import org.elasticsearch.tasks.TaskId; import org.elasticsearch.tasks.TaskId;
import org.elasticsearch.transport.NodeDisconnectedException;
import org.elasticsearch.transport.NodeNotConnectedException;
import org.elasticsearch.xpack.ccr.action.bulk.BulkShardOperationsResponse; import org.elasticsearch.xpack.ccr.action.bulk.BulkShardOperationsResponse;
import org.elasticsearch.xpack.core.ccr.ShardFollowNodeTaskStatus; import org.elasticsearch.xpack.core.ccr.ShardFollowNodeTaskStatus;
@ -371,6 +373,7 @@ public abstract class ShardFollowNodeTask extends AllocatedPersistentTask {
scheduler.accept(TimeValue.timeValueMillis(delay), task); scheduler.accept(TimeValue.timeValueMillis(delay), task);
} else { } else {
fatalException = ExceptionsHelper.convertToElastic(e); fatalException = ExceptionsHelper.convertToElastic(e);
LOGGER.warn("shard follow task encounter non-retryable error", e);
} }
} }
@ -399,7 +402,10 @@ public abstract class ShardFollowNodeTask extends AllocatedPersistentTask {
actual instanceof AlreadyClosedException || actual instanceof AlreadyClosedException ||
actual instanceof ElasticsearchSecurityException || // If user does not have sufficient privileges actual instanceof ElasticsearchSecurityException || // If user does not have sufficient privileges
actual instanceof ClusterBlockException || // If leader index is closed or no elected master actual instanceof ClusterBlockException || // If leader index is closed or no elected master
actual instanceof IndexClosedException; // If follow index is closed actual instanceof IndexClosedException || // If follow index is closed
actual instanceof NodeDisconnectedException ||
actual instanceof NodeNotConnectedException ||
(actual.getMessage() != null && actual.getMessage().contains("TransportService is closed"));
} }
// These methods are protected for testing purposes: // These methods are protected for testing purposes:

View File

@ -270,7 +270,6 @@ public class IndexFollowingIT extends CcrIntegTestCase {
assertMaxSeqNoOfUpdatesIsTransferred(resolveLeaderIndex("index1"), resolveFollowerIndex("index2"), numberOfShards); assertMaxSeqNoOfUpdatesIsTransferred(resolveLeaderIndex("index1"), resolveFollowerIndex("index2"), numberOfShards);
} }
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/34696")
public void testFollowIndexAndCloseNode() throws Exception { public void testFollowIndexAndCloseNode() throws Exception {
getFollowerCluster().ensureAtLeastNumDataNodes(3); getFollowerCluster().ensureAtLeastNumDataNodes(3);
String leaderIndexSettings = getIndexSettings(3, 1, singletonMap(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), "true")); String leaderIndexSettings = getIndexSettings(3, 1, singletonMap(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), "true"));
@ -587,7 +586,6 @@ public class IndexFollowingIT extends CcrIntegTestCase {
assertThat(followerClient().prepareSearch("index2").get().getHits().getTotalHits(), equalTo(2L)); assertThat(followerClient().prepareSearch("index2").get().getHits().getTotalHits(), equalTo(2L));
} }
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/34696")
public void testFailOverOnFollower() throws Exception { public void testFailOverOnFollower() throws Exception {
int numberOfReplicas = between(1, 2); int numberOfReplicas = between(1, 2);
getFollowerCluster().startMasterOnlyNode(); getFollowerCluster().startMasterOnlyNode();