Report terms and version if cluster does not form (#37473)

Adds the node's current term and the term and version of the the last-accepted
cluster state to the message reported by the `ClusterFormationFailureHelper`,
since these values may be of importance when tracking down a cluster formation
failure.
This commit is contained in:
David Turner 2019-01-15 17:32:08 +00:00 committed by GitHub
parent 7c11b05c28
commit a2a40c50a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 68 additions and 47 deletions

View File

@ -117,13 +117,15 @@ public class ClusterFormationFailureHelper {
private final ClusterState clusterState; private final ClusterState clusterState;
private final List<TransportAddress> resolvedAddresses; private final List<TransportAddress> resolvedAddresses;
private final List<DiscoveryNode> foundPeers; private final List<DiscoveryNode> foundPeers;
private final long currentTerm;
ClusterFormationState(Settings settings, ClusterState clusterState, List<TransportAddress> resolvedAddresses, ClusterFormationState(Settings settings, ClusterState clusterState, List<TransportAddress> resolvedAddresses,
List<DiscoveryNode> foundPeers) { List<DiscoveryNode> foundPeers, long currentTerm) {
this.settings = settings; this.settings = settings;
this.clusterState = clusterState; this.clusterState = clusterState;
this.resolvedAddresses = resolvedAddresses; this.resolvedAddresses = resolvedAddresses;
this.foundPeers = foundPeers; this.foundPeers = foundPeers;
this.currentTerm = currentTerm;
} }
String getDescription() { String getDescription() {
@ -131,8 +133,9 @@ public class ClusterFormationFailureHelper {
= StreamSupport.stream(clusterState.nodes().spliterator(), false).map(DiscoveryNode::toString).collect(Collectors.toList()); = StreamSupport.stream(clusterState.nodes().spliterator(), false).map(DiscoveryNode::toString).collect(Collectors.toList());
final String discoveryWillContinueDescription = String.format(Locale.ROOT, final String discoveryWillContinueDescription = String.format(Locale.ROOT,
"discovery will continue using %s from hosts providers and %s from last-known cluster state", "discovery will continue using %s from hosts providers and %s from last-known cluster state; " +
resolvedAddresses, clusterStateNodes); "node term %d, last-accepted version %d in term %d",
resolvedAddresses, clusterStateNodes, currentTerm, clusterState.version(), clusterState.term());
final String discoveryStateIgnoringQuorum = String.format(Locale.ROOT, "have discovered %s; %s", final String discoveryStateIgnoringQuorum = String.format(Locale.ROOT, "have discovered %s; %s",
foundPeers, discoveryWillContinueDescription); foundPeers, discoveryWillContinueDescription);

View File

@ -182,7 +182,7 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery
private ClusterFormationState getClusterFormationState() { private ClusterFormationState getClusterFormationState() {
return new ClusterFormationState(settings, getStateForMasterService(), peerFinder.getLastResolvedAddresses(), return new ClusterFormationState(settings, getStateForMasterService(), peerFinder.getLastResolvedAddresses(),
StreamSupport.stream(peerFinder.getFoundPeers().spliterator(), false).collect(Collectors.toList())); StreamSupport.stream(peerFinder.getFoundPeers().spliterator(), false).collect(Collectors.toList()), getCurrentTerm());
} }
private Runnable getOnLeaderFailure() { private Runnable getOnLeaderFailure() {

View File

@ -68,7 +68,7 @@ public class ClusterFormationFailureHelperTests extends ESTestCase {
final ClusterFormationFailureHelper clusterFormationFailureHelper = new ClusterFormationFailureHelper(settingsBuilder.build(), final ClusterFormationFailureHelper clusterFormationFailureHelper = new ClusterFormationFailureHelper(settingsBuilder.build(),
() -> { () -> {
warningCount.incrementAndGet(); warningCount.incrementAndGet();
return new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), emptyList()); return new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), emptyList(), 0L);
}, },
deterministicTaskQueue.getThreadPool()); deterministicTaskQueue.getThreadPool());
@ -131,51 +131,57 @@ public class ClusterFormationFailureHelperTests extends ESTestCase {
public void testDescriptionOnMasterIneligibleNodes() { public void testDescriptionOnMasterIneligibleNodes() {
final DiscoveryNode localNode = new DiscoveryNode("local", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT); final DiscoveryNode localNode = new DiscoveryNode("local", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT);
final ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT) final ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT)
.nodes(DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId())).build(); .version(12L).nodes(DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId())).build();
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), emptyList()).getDescription(), assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), emptyList(), 15L).getDescription(),
is("master not discovered yet: have discovered []; discovery will continue using [] from hosts providers and [" + localNode + is("master not discovered yet: have discovered []; discovery will continue using [] from hosts providers and [" + localNode +
"] from last-known cluster state")); "] from last-known cluster state; node term 15, last-accepted version 12 in term 0"));
final TransportAddress otherAddress = buildNewFakeTransportAddress(); final TransportAddress otherAddress = buildNewFakeTransportAddress();
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, singletonList(otherAddress), emptyList()).getDescription(), assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, singletonList(otherAddress), emptyList(), 16L).getDescription(),
is("master not discovered yet: have discovered []; discovery will continue using [" + otherAddress + is("master not discovered yet: have discovered []; discovery will continue using [" + otherAddress +
"] from hosts providers and [" + localNode + "] from last-known cluster state")); "] from hosts providers and [" + localNode +
"] from last-known cluster state; node term 16, last-accepted version 12 in term 0"));
final DiscoveryNode otherNode = new DiscoveryNode("other", buildNewFakeTransportAddress(), Version.CURRENT); final DiscoveryNode otherNode = new DiscoveryNode("other", buildNewFakeTransportAddress(), Version.CURRENT);
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), singletonList(otherNode)).getDescription(), assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), singletonList(otherNode), 17L).getDescription(),
is("master not discovered yet: have discovered [" + otherNode + "]; discovery will continue using [] from hosts providers and [" is("master not discovered yet: have discovered [" + otherNode + "]; discovery will continue using [] from hosts providers and ["
+ localNode + "] from last-known cluster state")); + localNode + "] from last-known cluster state; node term 17, last-accepted version 12 in term 0"));
} }
public void testDescriptionBeforeBootstrapping() { public void testDescriptionBeforeBootstrapping() {
final DiscoveryNode localNode = new DiscoveryNode("local", buildNewFakeTransportAddress(), Version.CURRENT); final DiscoveryNode localNode = new DiscoveryNode("local", buildNewFakeTransportAddress(), Version.CURRENT);
final ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT) final ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT)
.version(7L)
.metaData(MetaData.builder().coordinationMetaData(CoordinationMetaData.builder().term(4L).build()))
.nodes(DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId())).build(); .nodes(DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId())).build();
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), emptyList()).getDescription(), assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), emptyList(), 1L).getDescription(),
is("master not discovered yet, this node has not previously joined a bootstrapped (v7+) cluster, and " + is("master not discovered yet, this node has not previously joined a bootstrapped (v7+) cluster, and " +
"[cluster.initial_master_nodes] is empty on this node: have discovered []; " + "[cluster.initial_master_nodes] is empty on this node: have discovered []; " +
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state")); "discovery will continue using [] from hosts providers and [" + localNode +
"] from last-known cluster state; node term 1, last-accepted version 7 in term 4"));
final TransportAddress otherAddress = buildNewFakeTransportAddress(); final TransportAddress otherAddress = buildNewFakeTransportAddress();
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, singletonList(otherAddress), emptyList()).getDescription(), assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, singletonList(otherAddress), emptyList(), 2L).getDescription(),
is("master not discovered yet, this node has not previously joined a bootstrapped (v7+) cluster, and " + is("master not discovered yet, this node has not previously joined a bootstrapped (v7+) cluster, and " +
"[cluster.initial_master_nodes] is empty on this node: have discovered []; " + "[cluster.initial_master_nodes] is empty on this node: have discovered []; " +
"discovery will continue using [" + otherAddress + "] from hosts providers and [" + localNode + "discovery will continue using [" + otherAddress + "] from hosts providers and [" + localNode +
"] from last-known cluster state")); "] from last-known cluster state; node term 2, last-accepted version 7 in term 4"));
final DiscoveryNode otherNode = new DiscoveryNode("other", buildNewFakeTransportAddress(), Version.CURRENT); final DiscoveryNode otherNode = new DiscoveryNode("other", buildNewFakeTransportAddress(), Version.CURRENT);
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), singletonList(otherNode)).getDescription(), assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), singletonList(otherNode), 3L).getDescription(),
is("master not discovered yet, this node has not previously joined a bootstrapped (v7+) cluster, and " + is("master not discovered yet, this node has not previously joined a bootstrapped (v7+) cluster, and " +
"[cluster.initial_master_nodes] is empty on this node: have discovered [" + otherNode + "]; " + "[cluster.initial_master_nodes] is empty on this node: have discovered [" + otherNode + "]; " +
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state")); "discovery will continue using [] from hosts providers and [" + localNode +
"] from last-known cluster state; node term 3, last-accepted version 7 in term 4"));
assertThat(new ClusterFormationState(Settings.builder().putList(INITIAL_MASTER_NODES_SETTING.getKey(), "other").build(), assertThat(new ClusterFormationState(Settings.builder().putList(INITIAL_MASTER_NODES_SETTING.getKey(), "other").build(),
clusterState, emptyList(), emptyList()).getDescription(), clusterState, emptyList(), emptyList(), 4L).getDescription(),
is("master not discovered yet, this node has not previously joined a bootstrapped (v7+) cluster, and " + is("master not discovered yet, this node has not previously joined a bootstrapped (v7+) cluster, and " +
"this node must discover master-eligible nodes [other] to bootstrap a cluster: have discovered []; " + "this node must discover master-eligible nodes [other] to bootstrap a cluster: have discovered []; " +
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state")); "discovery will continue using [] from hosts providers and [" + localNode +
"] from last-known cluster state; node term 4, last-accepted version 7 in term 4"));
} }
private static VotingConfiguration config(String[] nodeIds) { private static VotingConfiguration config(String[] nodeIds) {
@ -199,75 +205,87 @@ public class ClusterFormationFailureHelperTests extends ESTestCase {
final ClusterState clusterState = state(localNode, "otherNode"); final ClusterState clusterState = state(localNode, "otherNode");
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), emptyList()).getDescription(), assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), emptyList(), 0L).getDescription(),
is("master not discovered or elected yet, an election requires a node with id [otherNode], " + is("master not discovered or elected yet, an election requires a node with id [otherNode], " +
"have discovered [] which is not a quorum; " + "have discovered [] which is not a quorum; " +
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state")); "discovery will continue using [] from hosts providers and [" + localNode +
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
final TransportAddress otherAddress = buildNewFakeTransportAddress(); final TransportAddress otherAddress = buildNewFakeTransportAddress();
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, singletonList(otherAddress), emptyList()).getDescription(), assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, singletonList(otherAddress), emptyList(), 0L).getDescription(),
is("master not discovered or elected yet, an election requires a node with id [otherNode], " + is("master not discovered or elected yet, an election requires a node with id [otherNode], " +
"have discovered [] which is not a quorum; " + "have discovered [] which is not a quorum; " +
"discovery will continue using [" + otherAddress + "] from hosts providers and [" + localNode + "discovery will continue using [" + otherAddress + "] from hosts providers and [" + localNode +
"] from last-known cluster state")); "] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
final DiscoveryNode otherNode = new DiscoveryNode("otherNode", buildNewFakeTransportAddress(), Version.CURRENT); final DiscoveryNode otherNode = new DiscoveryNode("otherNode", buildNewFakeTransportAddress(), Version.CURRENT);
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), singletonList(otherNode)).getDescription(), assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), singletonList(otherNode), 0L).getDescription(),
is("master not discovered or elected yet, an election requires a node with id [otherNode], " + is("master not discovered or elected yet, an election requires a node with id [otherNode], " +
"have discovered [" + otherNode + "] which is a quorum; " + "have discovered [" + otherNode + "] which is a quorum; " +
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state")); "discovery will continue using [] from hosts providers and [" + localNode +
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
final DiscoveryNode yetAnotherNode = new DiscoveryNode("yetAnotherNode", buildNewFakeTransportAddress(), Version.CURRENT); final DiscoveryNode yetAnotherNode = new DiscoveryNode("yetAnotherNode", buildNewFakeTransportAddress(), Version.CURRENT);
assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), singletonList(yetAnotherNode)).getDescription(), assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), singletonList(yetAnotherNode), 0L).getDescription(),
is("master not discovered or elected yet, an election requires a node with id [otherNode], " + is("master not discovered or elected yet, an election requires a node with id [otherNode], " +
"have discovered [" + yetAnotherNode + "] which is not a quorum; " + "have discovered [" + yetAnotherNode + "] which is not a quorum; " +
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state")); "discovery will continue using [] from hosts providers and [" + localNode +
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, "n1", "n2"), emptyList(), emptyList()).getDescription(), assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, "n1", "n2"), emptyList(), emptyList(), 0L).getDescription(),
is("master not discovered or elected yet, an election requires two nodes with ids [n1, n2], " + is("master not discovered or elected yet, an election requires two nodes with ids [n1, n2], " +
"have discovered [] which is not a quorum; " + "have discovered [] which is not a quorum; " +
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state")); "discovery will continue using [] from hosts providers and [" + localNode +
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, "n1", "n2", "n3"), emptyList(), emptyList()).getDescription(), assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, "n1", "n2", "n3"), emptyList(), emptyList(), 0L)
.getDescription(),
is("master not discovered or elected yet, an election requires at least 2 nodes with ids from [n1, n2, n3], " + is("master not discovered or elected yet, an election requires at least 2 nodes with ids from [n1, n2, n3], " +
"have discovered [] which is not a quorum; " + "have discovered [] which is not a quorum; " +
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state")); "discovery will continue using [] from hosts providers and [" + localNode +
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, "n1", "n2", "n3", "n4"), emptyList(), emptyList()) assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, "n1", "n2", "n3", "n4"), emptyList(), emptyList(), 0L)
.getDescription(), .getDescription(),
is("master not discovered or elected yet, an election requires at least 3 nodes with ids from [n1, n2, n3, n4], " + is("master not discovered or elected yet, an election requires at least 3 nodes with ids from [n1, n2, n3, n4], " +
"have discovered [] which is not a quorum; " + "have discovered [] which is not a quorum; " +
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state")); "discovery will continue using [] from hosts providers and [" + localNode +
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, "n1", "n2", "n3", "n4", "n5"), emptyList(), emptyList()) assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, "n1", "n2", "n3", "n4", "n5"), emptyList(), emptyList(), 0L)
.getDescription(), .getDescription(),
is("master not discovered or elected yet, an election requires at least 3 nodes with ids from [n1, n2, n3, n4, n5], " + is("master not discovered or elected yet, an election requires at least 3 nodes with ids from [n1, n2, n3, n4, n5], " +
"have discovered [] which is not a quorum; " + "have discovered [] which is not a quorum; " +
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state")); "discovery will continue using [] from hosts providers and [" + localNode +
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, new String[]{"n1"}, new String[]{"n1"}), assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, new String[]{"n1"}, new String[]{"n1"}), emptyList(),
emptyList(), emptyList()).getDescription(), emptyList(), 0L).getDescription(),
is("master not discovered or elected yet, an election requires a node with id [n1], " + is("master not discovered or elected yet, an election requires a node with id [n1], " +
"have discovered [] which is not a quorum; " + "have discovered [] which is not a quorum; " +
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state")); "discovery will continue using [] from hosts providers and [" + localNode +
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, new String[]{"n1"}, new String[]{"n2"}), assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, new String[]{"n1"}, new String[]{"n2"}), emptyList(),
emptyList(), emptyList()).getDescription(), emptyList(), 0L).getDescription(),
is("master not discovered or elected yet, an election requires a node with id [n1] and a node with id [n2], " + is("master not discovered or elected yet, an election requires a node with id [n1] and a node with id [n2], " +
"have discovered [] which is not a quorum; " + "have discovered [] which is not a quorum; " +
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state")); "discovery will continue using [] from hosts providers and [" + localNode +
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, new String[]{"n1"}, new String[]{"n2", "n3"}), assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, new String[]{"n1"}, new String[]{"n2", "n3"}), emptyList(),
emptyList(), emptyList()).getDescription(), emptyList(), 0L).getDescription(),
is("master not discovered or elected yet, an election requires a node with id [n1] and two nodes with ids [n2, n3], " + is("master not discovered or elected yet, an election requires a node with id [n1] and two nodes with ids [n2, n3], " +
"have discovered [] which is not a quorum; " + "have discovered [] which is not a quorum; " +
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state")); "discovery will continue using [] from hosts providers and [" + localNode +
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, new String[]{"n1"}, new String[]{"n2", "n3", "n4"}), assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, new String[]{"n1"}, new String[]{"n2", "n3", "n4"}),
emptyList(), emptyList()).getDescription(), emptyList(), emptyList(), 0L).getDescription(),
is("master not discovered or elected yet, an election requires a node with id [n1] and " + is("master not discovered or elected yet, an election requires a node with id [n1] and " +
"at least 2 nodes with ids from [n2, n3, n4], " + "at least 2 nodes with ids from [n2, n3, n4], " +
"have discovered [] which is not a quorum; " + "have discovered [] which is not a quorum; " +
"discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state")); "discovery will continue using [] from hosts providers and [" + localNode +
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
} }
} }