From 11f41c4e7d07d5ad1fd850cfd774e4315ef6238c Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 19 Apr 2019 16:02:35 +0100 Subject: [PATCH] Omit non-masters in ClusterFormationFailureHelper (#41344) Today the `ClusterFormationFailureHelper` says `... discovery will continue using ... from last-known cluster state` and lists all the nodes in the last-known cluster state. In fact we ignore the master-ineligible nodes in the last-known cluster state during discovery. This commit fixes this by listing only the master-eligible nodes from the cluster state in this message. --- .../ClusterFormationFailureHelper.java | 4 +- .../ClusterFormationFailureHelperTests.java | 42 +++++++++++++++---- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterFormationFailureHelper.java b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterFormationFailureHelper.java index aaae94d0297..a707a9ae980 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterFormationFailureHelper.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterFormationFailureHelper.java @@ -133,8 +133,8 @@ public class ClusterFormationFailureHelper { } String getDescription() { - final List clusterStateNodes - = StreamSupport.stream(clusterState.nodes().spliterator(), false).map(DiscoveryNode::toString).collect(Collectors.toList()); + final List clusterStateNodes = StreamSupport.stream(clusterState.nodes().getMasterNodes().values().spliterator(), false) + .map(n -> n.value.toString()).collect(Collectors.toList()); final String discoveryWillContinueDescription = String.format(Locale.ROOT, "discovery will continue using %s from hosts providers and %s from last-known cluster state; " + diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/ClusterFormationFailureHelperTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/ClusterFormationFailureHelperTests.java index 8b08c9c3fc0..16740b0761f 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/ClusterFormationFailureHelperTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/ClusterFormationFailureHelperTests.java @@ -31,6 +31,7 @@ import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.test.ESTestCase; import java.util.Arrays; +import java.util.HashSet; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; @@ -43,6 +44,7 @@ import static org.elasticsearch.cluster.coordination.ClusterBootstrapService.INI import static org.elasticsearch.node.Node.NODE_NAME_SETTING; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.isOneOf; public class ClusterFormationFailureHelperTests extends ESTestCase { public void testScheduling() { @@ -72,7 +74,7 @@ public class ClusterFormationFailureHelperTests extends ESTestCase { warningCount.incrementAndGet(); return new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), emptyList(), 0L); }, - deterministicTaskQueue.getThreadPool(), () -> logLastFailedJoinAttemptWarningCount.incrementAndGet()); + deterministicTaskQueue.getThreadPool(), logLastFailedJoinAttemptWarningCount::incrementAndGet); deterministicTaskQueue.runAllTasks(); assertThat("should not schedule anything yet", warningCount.get(), is(0L)); @@ -139,19 +141,18 @@ public class ClusterFormationFailureHelperTests extends ESTestCase { .version(12L).nodes(DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId())).build(); assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), emptyList(), 15L).getDescription(), - is("master not discovered yet: have discovered []; discovery will continue using [] from hosts providers and [" + localNode + - "] from last-known cluster state; node term 15, last-accepted version 12 in term 0")); + is("master not discovered yet: have discovered []; discovery will continue using [] from hosts providers " + + "and [] from last-known cluster state; node term 15, last-accepted version 12 in term 0")); final TransportAddress otherAddress = buildNewFakeTransportAddress(); assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, singletonList(otherAddress), emptyList(), 16L).getDescription(), is("master not discovered yet: have discovered []; discovery will continue using [" + otherAddress + - "] from hosts providers and [" + localNode + - "] from last-known cluster state; node term 16, last-accepted version 12 in term 0")); + "] from hosts providers and [] from last-known cluster state; node term 16, last-accepted version 12 in term 0")); final DiscoveryNode otherNode = new DiscoveryNode("other", buildNewFakeTransportAddress(), Version.CURRENT); assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), singletonList(otherNode), 17L).getDescription(), - is("master not discovered yet: have discovered [" + otherNode + "]; discovery will continue using [] from hosts providers and [" - + localNode + "] from last-known cluster state; node term 17, last-accepted version 12 in term 0")); + is("master not discovered yet: have discovered [" + otherNode + "]; discovery will continue using [] from hosts providers " + + "and [] from last-known cluster state; node term 17, last-accepted version 12 in term 0")); } public void testDescriptionBeforeBootstrapping() { @@ -349,5 +350,32 @@ public class ClusterFormationFailureHelperTests extends ESTestCase { "have discovered [] which is not a quorum; " + "discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state; node term 0, last-accepted version 0 in term 0")); + + final DiscoveryNode otherMasterNode = new DiscoveryNode("other-master", buildNewFakeTransportAddress(), Version.CURRENT); + final DiscoveryNode otherNonMasterNode = new DiscoveryNode("other-non-master", buildNewFakeTransportAddress(), emptyMap(), + new HashSet<>(randomSubsetOf(Arrays.stream(DiscoveryNode.Role.values()) + .filter(r -> r != DiscoveryNode.Role.MASTER).collect(Collectors.toList()))), + Version.CURRENT); + + String[] configNodeIds = new String[]{"n1", "n2"}; + final ClusterState stateWithOtherNodes = ClusterState.builder(ClusterName.DEFAULT) + .nodes(DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId()).add(otherMasterNode).add(otherNonMasterNode)) + .metaData(MetaData.builder().coordinationMetaData(CoordinationMetaData.builder() + .lastAcceptedConfiguration(config(configNodeIds)) + .lastCommittedConfiguration(config(configNodeIds)).build())).build(); + + assertThat(new ClusterFormationState(Settings.EMPTY, stateWithOtherNodes, emptyList(), emptyList(), 0L).getDescription(), isOneOf( + + // nodes from last-known cluster state could be in either order + + "master not discovered or elected yet, an election requires two nodes with ids [n1, n2], " + + "have discovered [] which is not a quorum; " + + "discovery will continue using [] from hosts providers and [" + localNode + ", " + otherMasterNode + + "] from last-known cluster state; node term 0, last-accepted version 0 in term 0", + + "master not discovered or elected yet, an election requires two nodes with ids [n1, n2], " + + "have discovered [] which is not a quorum; " + + "discovery will continue using [] from hosts providers and [" + otherMasterNode + ", " + localNode + + "] from last-known cluster state; node term 0, last-accepted version 0 in term 0")); } }