Strengthen validateClusterFormed check (#49248)
Strengthens the validateClusterFormed check that is used by the test infrastructure to make sure that nodes are properly connected and know about each other. Is used in situations where the cluster is scaled up and down, and where there previously was a network disruption that has been healed. Closes #49243
This commit is contained in:
parent
d9835f7fb4
commit
420825c3b5
|
@ -161,9 +161,9 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcke
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
import static org.hamcrest.Matchers.greaterThan;
|
import static org.hamcrest.Matchers.greaterThan;
|
||||||
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
|
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
|
||||||
import static org.junit.Assert.assertEquals;
|
|
||||||
import static org.hamcrest.Matchers.not;
|
import static org.hamcrest.Matchers.not;
|
||||||
import static org.hamcrest.Matchers.nullValue;
|
import static org.hamcrest.Matchers.nullValue;
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertFalse;
|
import static org.junit.Assert.assertFalse;
|
||||||
import static org.junit.Assert.assertThat;
|
import static org.junit.Assert.assertThat;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
@ -1239,29 +1239,35 @@ public final class InternalTestCluster extends TestCluster {
|
||||||
|
|
||||||
/** ensure a cluster is formed with all published nodes. */
|
/** ensure a cluster is formed with all published nodes. */
|
||||||
public synchronized void validateClusterFormed() {
|
public synchronized void validateClusterFormed() {
|
||||||
String name = randomFrom(random, getNodeNames());
|
final Set<DiscoveryNode> expectedNodes = new HashSet<>();
|
||||||
validateClusterFormed(name);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** ensure a cluster is formed with all published nodes, but do so by using the client of the specified node */
|
|
||||||
private synchronized void validateClusterFormed(String viaNode) {
|
|
||||||
Set<DiscoveryNode> expectedNodes = new HashSet<>();
|
|
||||||
for (NodeAndClient nodeAndClient : nodes.values()) {
|
for (NodeAndClient nodeAndClient : nodes.values()) {
|
||||||
expectedNodes.add(getInstanceFromNode(ClusterService.class, nodeAndClient.node()).localNode());
|
expectedNodes.add(getInstanceFromNode(ClusterService.class, nodeAndClient.node()).localNode());
|
||||||
}
|
}
|
||||||
logger.trace("validating cluster formed via [{}], expecting {}", viaNode, expectedNodes);
|
logger.trace("validating cluster formed, expecting {}", expectedNodes);
|
||||||
final Client client = client(viaNode);
|
|
||||||
try {
|
try {
|
||||||
assertBusy(() -> {
|
assertBusy(() -> {
|
||||||
DiscoveryNodes discoveryNodes = client.admin().cluster().prepareState().get().getState().nodes();
|
final List<ClusterState> states = nodes.values().stream()
|
||||||
assertEquals(expectedNodes.size(), discoveryNodes.getSize());
|
.map(node -> getInstanceFromNode(ClusterService.class, node.node()))
|
||||||
|
.map(ClusterService::state)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
final String debugString = ", expected nodes: " + expectedNodes + " and actual cluster states " + states;
|
||||||
|
// all nodes have a master
|
||||||
|
assertTrue("Missing master" + debugString, states.stream().allMatch(cs -> cs.nodes().getMasterNodeId() != null));
|
||||||
|
// all nodes have the same master (in same term)
|
||||||
|
assertEquals("Not all masters in same term" + debugString, 1,
|
||||||
|
states.stream().mapToLong(ClusterState::term).distinct().count());
|
||||||
|
// all nodes know about all other nodes
|
||||||
|
states.forEach(cs -> {
|
||||||
|
DiscoveryNodes discoveryNodes = cs.nodes();
|
||||||
|
assertEquals("Node size mismatch" + debugString, expectedNodes.size(), discoveryNodes.getSize());
|
||||||
for (DiscoveryNode expectedNode : expectedNodes) {
|
for (DiscoveryNode expectedNode : expectedNodes) {
|
||||||
assertTrue("Expected node to exist: " + expectedNode, discoveryNodes.nodeExists(expectedNode));
|
assertTrue("Expected node to exist: " + expectedNode + debugString, discoveryNodes.nodeExists(expectedNode));
|
||||||
}
|
}
|
||||||
|
});
|
||||||
}, 30, TimeUnit.SECONDS);
|
}, 30, TimeUnit.SECONDS);
|
||||||
} catch (AssertionError ae) {
|
} catch (AssertionError ae) {
|
||||||
throw new IllegalStateException("cluster failed to form with expected nodes " + expectedNodes + " and actual nodes " +
|
throw new IllegalStateException("cluster failed to form", ae);
|
||||||
client.admin().cluster().prepareState().get().getState().nodes());
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new IllegalStateException(e);
|
throw new IllegalStateException(e);
|
||||||
}
|
}
|
||||||
|
@ -1821,8 +1827,7 @@ public final class InternalTestCluster extends TestCluster {
|
||||||
|
|
||||||
if (callback.validateClusterForming() || excludedNodeIds.isEmpty() == false) {
|
if (callback.validateClusterForming() || excludedNodeIds.isEmpty() == false) {
|
||||||
// we have to validate cluster size to ensure that the restarted node has rejoined the cluster if it was master-eligible;
|
// we have to validate cluster size to ensure that the restarted node has rejoined the cluster if it was master-eligible;
|
||||||
// we have to do this via the node that was just restarted as it may be that the master didn't yet process the fact that it left
|
validateClusterFormed();
|
||||||
validateClusterFormed(nodeAndClient.name);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (excludedNodeIds.isEmpty() == false) {
|
if (excludedNodeIds.isEmpty() == false) {
|
||||||
|
|
|
@ -20,7 +20,6 @@
|
||||||
package org.elasticsearch.test.disruption;
|
package org.elasticsearch.test.disruption;
|
||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||||
|
|
||||||
import org.apache.logging.log4j.LogManager;
|
import org.apache.logging.log4j.LogManager;
|
||||||
import org.apache.logging.log4j.Logger;
|
import org.apache.logging.log4j.Logger;
|
||||||
import org.elasticsearch.cluster.ClusterState;
|
import org.elasticsearch.cluster.ClusterState;
|
||||||
|
@ -41,8 +40,6 @@ import java.util.Set;
|
||||||
import java.util.concurrent.CountDownLatch;
|
import java.util.concurrent.CountDownLatch;
|
||||||
import java.util.function.BiConsumer;
|
import java.util.function.BiConsumer;
|
||||||
|
|
||||||
import static org.junit.Assert.assertFalse;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Network disruptions are modeled using two components:
|
* Network disruptions are modeled using two components:
|
||||||
* 1) the {@link DisruptedLinks} represents the links in the network that are to be disrupted
|
* 1) the {@link DisruptedLinks} represents the links in the network that are to be disrupted
|
||||||
|
@ -119,10 +116,7 @@ public class NetworkDisruption implements ServiceDisruptionScheme {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void ensureNodeCount(InternalTestCluster cluster) {
|
protected void ensureNodeCount(InternalTestCluster cluster) {
|
||||||
assertFalse("cluster failed to form after disruption was healed", cluster.client().admin().cluster().prepareHealth()
|
cluster.validateClusterFormed();
|
||||||
.setWaitForNodes(String.valueOf(cluster.size()))
|
|
||||||
.setWaitForNoRelocatingShards(true)
|
|
||||||
.get().isTimedOut());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
Loading…
Reference in New Issue