Omit non-masters in ClusterFormationFailureHelper (#41344)

Today the `ClusterFormationFailureHelper` says `... discovery will continue
using ... from last-known cluster state` and lists all the nodes in the
last-known cluster state. In fact we ignore the master-ineligible nodes in the
last-known cluster state during discovery. This commit fixes this by listing
only the master-eligible nodes from the cluster state in this message.
This commit is contained in:
David Turner 2019-04-19 16:02:35 +01:00 committed by Yannick Welsch
parent 480453aa24
commit 11f41c4e7d
2 changed files with 37 additions and 9 deletions

View File

@ -133,8 +133,8 @@ public class ClusterFormationFailureHelper {
}
String getDescription() {
final List<String> clusterStateNodes
= StreamSupport.stream(clusterState.nodes().spliterator(), false).map(DiscoveryNode::toString).collect(Collectors.toList());
final List<String> clusterStateNodes = StreamSupport.stream(clusterState.nodes().getMasterNodes().values().spliterator(), false)
.map(n -> n.value.toString()).collect(Collectors.toList());
final String discoveryWillContinueDescription = String.format(Locale.ROOT,
"discovery will continue using %s from hosts providers and %s from last-known cluster state; " +

View File

@ -31,6 +31,7 @@ import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.test.ESTestCase;
import java.util.Arrays;
import java.util.HashSet;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
@ -43,6 +44,7 @@ import static org.elasticsearch.cluster.coordination.ClusterBootstrapService.INI
import static org.elasticsearch.node.Node.NODE_NAME_SETTING;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.isOneOf;
public class ClusterFormationFailureHelperTests extends ESTestCase {
public void testScheduling() {
@ -72,7 +74,7 @@ public class ClusterFormationFailureHelperTests extends ESTestCase {
warningCount.incrementAndGet();
return new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), emptyList(), 0L);
},
deterministicTaskQueue.getThreadPool(), () -> logLastFailedJoinAttemptWarningCount.incrementAndGet());
deterministicTaskQueue.getThreadPool(), logLastFailedJoinAttemptWarningCount::incrementAndGet);
deterministicTaskQueue.runAllTasks();
assertThat("should not schedule anything yet", warningCount.get(), is(0L));
@ -139,19 +141,18 @@ public class ClusterFormationFailureHelperTests extends ESTestCase {
.version(12L).nodes(DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId())).build();
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), emptyList(), 15L).getDescription(),
is("master not discovered yet: have discovered []; discovery will continue using [] from hosts providers and [" + localNode +
"] from last-known cluster state; node term 15, last-accepted version 12 in term 0"));
is("master not discovered yet: have discovered []; discovery will continue using [] from hosts providers " +
"and [] from last-known cluster state; node term 15, last-accepted version 12 in term 0"));
final TransportAddress otherAddress = buildNewFakeTransportAddress();
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, singletonList(otherAddress), emptyList(), 16L).getDescription(),
is("master not discovered yet: have discovered []; discovery will continue using [" + otherAddress +
"] from hosts providers and [" + localNode +
"] from last-known cluster state; node term 16, last-accepted version 12 in term 0"));
"] from hosts providers and [] from last-known cluster state; node term 16, last-accepted version 12 in term 0"));
final DiscoveryNode otherNode = new DiscoveryNode("other", buildNewFakeTransportAddress(), Version.CURRENT);
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), singletonList(otherNode), 17L).getDescription(),
is("master not discovered yet: have discovered [" + otherNode + "]; discovery will continue using [] from hosts providers and ["
+ localNode + "] from last-known cluster state; node term 17, last-accepted version 12 in term 0"));
is("master not discovered yet: have discovered [" + otherNode + "]; discovery will continue using [] from hosts providers " +
"and [] from last-known cluster state; node term 17, last-accepted version 12 in term 0"));
}
public void testDescriptionBeforeBootstrapping() {
@ -349,5 +350,32 @@ public class ClusterFormationFailureHelperTests extends ESTestCase {
"have discovered [] which is not a quorum; " +
"discovery will continue using [] from hosts providers and [" + localNode +
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
final DiscoveryNode otherMasterNode = new DiscoveryNode("other-master", buildNewFakeTransportAddress(), Version.CURRENT);
final DiscoveryNode otherNonMasterNode = new DiscoveryNode("other-non-master", buildNewFakeTransportAddress(), emptyMap(),
new HashSet<>(randomSubsetOf(Arrays.stream(DiscoveryNode.Role.values())
.filter(r -> r != DiscoveryNode.Role.MASTER).collect(Collectors.toList()))),
Version.CURRENT);
String[] configNodeIds = new String[]{"n1", "n2"};
final ClusterState stateWithOtherNodes = ClusterState.builder(ClusterName.DEFAULT)
.nodes(DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId()).add(otherMasterNode).add(otherNonMasterNode))
.metaData(MetaData.builder().coordinationMetaData(CoordinationMetaData.builder()
.lastAcceptedConfiguration(config(configNodeIds))
.lastCommittedConfiguration(config(configNodeIds)).build())).build();
assertThat(new ClusterFormationState(Settings.EMPTY, stateWithOtherNodes, emptyList(), emptyList(), 0L).getDescription(), isOneOf(
// nodes from last-known cluster state could be in either order
"master not discovered or elected yet, an election requires two nodes with ids [n1, n2], " +
"have discovered [] which is not a quorum; " +
"discovery will continue using [] from hosts providers and [" + localNode + ", " + otherMasterNode +
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0",
"master not discovered or elected yet, an election requires two nodes with ids [n1, n2], " +
"have discovered [] which is not a quorum; " +
"discovery will continue using [] from hosts providers and [" + otherMasterNode + ", " + localNode +
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
}
}