Deprecate and ignore join timeout (#60872)

There is no point in timing out a join attempt any more once a cluster is entirely in 7.x. Timing out and retrying with the same master is pointless, and an in-flight join attempt to one master no longer blocks attempts to join other masters. This commit deprecates this unnecessary setting and removes its effect from the joining process. Relates #60873 which removes this setting in master.
2020-08-10 13:57:41 +01:00 · 2020-08-10 13:57:41 +01:00 · f44c28b595
parent 235e5ed3ea
commit f44c28b595
5 changed files with 19 additions and 30 deletions
--- a/docs/reference/modules/discovery/discovery-settings.asciidoc
+++ b/docs/reference/modules/discovery/discovery-settings.asciidoc
@ -43,9 +43,9 @@ compatibility. Support for the old name will be removed in a future version.
    Specifies whether {es} should form a multiple-node cluster. By default, {es}
    discovers other nodes when forming a cluster and allows other nodes to join
    the cluster later. If `discovery.type` is set to `single-node`, {es} forms a
-    single-node cluster and suppresses the timeouts set by
+    single-node cluster and suppresses the timeout set by
-    `cluster.publish.timeout` and `cluster.join.timeout`. For more information
+    `cluster.publish.timeout`. For more information about when you might use
-    about when you might use this setting, see <<single-node-discovery>>.
+    this setting, see <<single-node-discovery>>.
 `cluster.initial_master_nodes`::
@ -196,9 +196,9 @@ or may become unstable or intolerant of certain failures.
 `cluster.join.timeout`::
-    Sets how long a node will wait after sending a request to join a cluster
+    deprecated[7.10, Has no effect in 7.x clusters] Sets how long a node will
-    before it considers the request to have failed and retries, unless
+    wait after sending a request to join a version 6.8 master before it
-    `discovery.type` is set to `single-node`. Defaults to `60s`.
+    considers the request to have failed and retries. Defaults to `60s`.
 `cluster.max_voting_config_exclusions`::
--- a/docs/reference/setup/add-nodes.asciidoc
+++ b/docs/reference/setup/add-nodes.asciidoc
@ -67,9 +67,7 @@ to the voting configuration if it is appropriate to do so.
 During master election or when joining an existing formed cluster, a node
 sends a join request to the master in order to be officially added to the
-cluster. You can use the `cluster.join.timeout` setting to configure how long a
+cluster.
 node waits after sending a request to join a cluster. Its default value is `30s`.
 See <<modules-discovery-settings>>.
 [discrete]
 [[modules-discovery-removing-nodes]]
--- a/server/src/internalClusterTest/java/org/elasticsearch/action/support/master/IndexingMasterFailoverIT.java
+++ b/server/src/internalClusterTest/java/org/elasticsearch/action/support/master/IndexingMasterFailoverIT.java
@ -52,15 +52,11 @@ public class IndexingMasterFailoverIT extends ESIntegTestCase {
    public void testMasterFailoverDuringIndexingWithMappingChanges() throws Throwable {
        logger.info("--> start 4 nodes, 3 master, 1 data");
        final Settings sharedSettings = Settings.builder()
                .put("cluster.join.timeout", "10s")  // still long to induce failures but not too long so test won't time out
                .build();
        internalCluster().setBootstrapMasterNodeIndex(2);
-        internalCluster().startMasterOnlyNodes(3, sharedSettings);
+        internalCluster().startMasterOnlyNodes(3, Settings.EMPTY);
-        String dataNode = internalCluster().startDataOnlyNode(sharedSettings);
+        String dataNode = internalCluster().startDataOnlyNode(Settings.EMPTY);
        logger.info("--> wait for all nodes to join the cluster");
        ensureStableCluster(4);
--- a/server/src/main/java/org/elasticsearch/cluster/coordination/JoinHelper.java
+++ b/server/src/main/java/org/elasticsearch/cluster/coordination/JoinHelper.java
@ -34,7 +34,6 @@ import org.elasticsearch.cluster.node.DiscoveryNode;
 import org.elasticsearch.cluster.routing.RerouteService;
 import org.elasticsearch.cluster.routing.allocation.AllocationService;
 import org.elasticsearch.cluster.service.MasterService;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.Priority;
 import org.elasticsearch.common.collect.Tuple;
 import org.elasticsearch.common.io.stream.StreamInput;
@ -43,7 +42,6 @@ import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.discovery.zen.MembershipAction;
 import org.elasticsearch.discovery.zen.ZenDiscovery;
 import org.elasticsearch.discovery.DiscoveryModule;
 import org.elasticsearch.monitor.NodeHealthService;
 import org.elasticsearch.monitor.StatusInfo;
 import org.elasticsearch.threadpool.ThreadPool;
@ -83,22 +81,21 @@ public class JoinHelper {
    public static final String VALIDATE_JOIN_ACTION_NAME = "internal:cluster/coordination/join/validate";
    public static final String START_JOIN_ACTION_NAME = "internal:cluster/coordination/start_join";
-    // the timeout for each join attempt
+    // the timeout for Zen1 join attempts
    public static final Setting<TimeValue> JOIN_TIMEOUT_SETTING =
        Setting.timeSetting("cluster.join.timeout",
-            TimeValue.timeValueMillis(60000), TimeValue.timeValueMillis(1), Setting.Property.NodeScope);
+            TimeValue.timeValueMillis(60000), TimeValue.timeValueMillis(1), Setting.Property.NodeScope, Setting.Property.Deprecated);
    private final MasterService masterService;
    private final TransportService transportService;
    private final JoinTaskExecutor joinTaskExecutor;
-    @Nullable // if using single-node discovery
+    private final TimeValue joinTimeout; // only used for Zen1 joining
    private final TimeValue joinTimeout;
    private final NodeHealthService nodeHealthService;
    private final Set<Tuple<DiscoveryNode, JoinRequest>> pendingOutgoingJoins = Collections.synchronizedSet(new HashSet<>());
-    private AtomicReference<FailedJoinAttempt> lastFailedJoinAttempt = new AtomicReference<>();
+    private final AtomicReference<FailedJoinAttempt> lastFailedJoinAttempt = new AtomicReference<>();
    JoinHelper(Settings settings, AllocationService allocationService, MasterService masterService,
               TransportService transportService, LongSupplier currentTermSupplier, Supplier<ClusterState> currentStateSupplier,
@ -108,7 +105,7 @@ public class JoinHelper {
        this.masterService = masterService;
        this.transportService = transportService;
        this.nodeHealthService = nodeHealthService;
-        this.joinTimeout = DiscoveryModule.isSingleNodeDiscovery(settings) ? null : JOIN_TIMEOUT_SETTING.get(settings);
+        this.joinTimeout = JOIN_TIMEOUT_SETTING.get(settings);
        this.joinTaskExecutor = new JoinTaskExecutor(settings, allocationService, logger, rerouteService) {
            @Override
@ -286,15 +283,17 @@ public class JoinHelper {
            logger.debug("attempting to join {} with {}", destination, joinRequest);
            final String actionName;
            final TransportRequest transportRequest;
            final TransportRequestOptions transportRequestOptions;
            if (Coordinator.isZen1Node(destination)) {
                actionName = MembershipAction.DISCOVERY_JOIN_ACTION_NAME;
                transportRequest = new MembershipAction.JoinRequest(transportService.getLocalNode());
                transportRequestOptions = TransportRequestOptions.builder().withTimeout(joinTimeout).build();
            } else {
                actionName = JOIN_ACTION_NAME;
                transportRequest = joinRequest;
                transportRequestOptions = TransportRequestOptions.EMPTY;
            }
-            transportService.sendRequest(destination, actionName, transportRequest,
+            transportService.sendRequest(destination, actionName, transportRequest, transportRequestOptions,
                TransportRequestOptions.builder().withTimeout(joinTimeout).build(),
                new TransportResponseHandler<Empty>() {
                    @Override
                    public Empty read(StreamInput in) {
@ -363,9 +362,7 @@ public class JoinHelper {
        } else {
            actionName = VALIDATE_JOIN_ACTION_NAME;
        }
-        transportService.sendRequest(node, actionName,
+        transportService.sendRequest(node, actionName, new ValidateJoinRequest(state),
            new ValidateJoinRequest(state),
            TransportRequestOptions.builder().withTimeout(joinTimeout).build(),
            new ActionListenerResponseHandler<>(listener, i -> Empty.INSTANCE, ThreadPool.Names.GENERIC));
    }
--- a/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java
+++ b/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java
@ -24,7 +24,6 @@ import org.elasticsearch.cluster.block.ClusterBlock;
 import org.elasticsearch.cluster.block.ClusterBlockLevel;
 import org.elasticsearch.cluster.coordination.Coordinator;
 import org.elasticsearch.cluster.coordination.FollowersChecker;
 import org.elasticsearch.cluster.coordination.JoinHelper;
 import org.elasticsearch.cluster.coordination.LeaderChecker;
 import org.elasticsearch.cluster.node.DiscoveryNodes;
 import org.elasticsearch.common.Nullable;
@ -126,7 +125,6 @@ public abstract class AbstractDisruptionTestCase extends ESIntegTestCase {
            .put(LeaderChecker.LEADER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) // for hitting simulated network failures quickly
            .put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "5s") // for hitting simulated network failures quickly
            .put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) // for hitting simulated network failures quickly
            .put(JoinHelper.JOIN_TIMEOUT_SETTING.getKey(), "10s") // still long to induce failures but to long so test won't time out
            .put(Coordinator.PUBLISH_TIMEOUT_SETTING.getKey(), "5s") // <-- for hitting simulated network failures quickly
            .put(TransportSettings.CONNECT_TIMEOUT.getKey(), "10s") // Network delay disruption waits for the min between this
            // value and the time of disruption and does not recover immediately