From af1ff52e70117dbfff8a893fe01f75e7a1024c8d Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Fri, 24 Jan 2020 20:33:13 +0100 Subject: [PATCH] Fix TransportMasterNodeAction not Retrying NodeClosedException (#51325) (#51437) Added node closed exception to the retryable remote exceptions as it's possible to run into this exception instead of a connect exception when the master node is just shutting down but still responding to requests. --- .../master/TransportMasterNodeAction.java | 4 +++- .../master/TransportMasterNodeActionTests.java | 4 +++- .../elasticsearch/cluster/ClusterHealthIT.java | 16 ++++++++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/support/master/TransportMasterNodeAction.java b/server/src/main/java/org/elasticsearch/action/support/master/TransportMasterNodeAction.java index 154359ff993..e3beeeafc58 100644 --- a/server/src/main/java/org/elasticsearch/action/support/master/TransportMasterNodeAction.java +++ b/server/src/main/java/org/elasticsearch/action/support/master/TransportMasterNodeAction.java @@ -46,6 +46,7 @@ import org.elasticsearch.node.NodeClosedException; import org.elasticsearch.tasks.Task; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.ConnectTransportException; +import org.elasticsearch.transport.RemoteTransportException; import org.elasticsearch.transport.TransportException; import org.elasticsearch.transport.TransportService; @@ -180,7 +181,8 @@ public abstract class TransportMasterNodeAction> responseFutures = new ArrayList<>(); + // Run a few health requests concurrent to master fail-overs against a data-node to make sure master failover is handled + // without exceptions + for (int i = 0; i < 20; ++i) { + responseFutures.add(client(node).admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).execute()); + internalCluster().restartNode(internalCluster().getMasterName(), InternalTestCluster.EMPTY_CALLBACK); + } + for (ActionFuture responseFuture : responseFutures) { + assertSame(responseFuture.get().getStatus(), ClusterHealthStatus.GREEN); + } + } }