Unmute, fix, refactor and zen2ify NetworkDisruptionIT (#38351)
This commit unmutes NetworkDisruptionIT. It makes changes necessary for Zen2 - avoids usage of autoMinMasterNodes and selects cluster size, such that there is no need to call AddVotingExclusion. This test also introduces refactors a single method prepareDistruptedCluster to be used by both test methods. Unfortunately, NetworkDisruption is broken and the testNetworkPartitionRemovalRestoresConnections "is fixed" by introducing assertBusy - #38348. Relates #36205 Relates #38348 (cherry picked from commit 97707c7f892636e5b75c3df546b067414acb27cd)
This commit is contained in:
parent
d35d9d1886
commit
42602478b8
|
@ -19,7 +19,9 @@
|
|||
|
||||
package org.elasticsearch.test.disruption;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
|
||||
import org.elasticsearch.cluster.NodeConnectionsService;
|
||||
import org.elasticsearch.common.collect.Tuple;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.plugins.Plugin;
|
||||
import org.elasticsearch.test.ESIntegTestCase;
|
||||
import org.elasticsearch.test.InternalTestCluster;
|
||||
|
@ -35,52 +37,67 @@ import java.util.Set;
|
|||
|
||||
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
|
||||
|
||||
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/36205")
|
||||
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoMinMasterNodes = false)
|
||||
public class NetworkDisruptionIT extends ESIntegTestCase {
|
||||
@Override
|
||||
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
||||
return Arrays.asList(MockTransportService.TestPlugin.class);
|
||||
}
|
||||
|
||||
public void testNetworkPartitionWithNodeShutdown() throws IOException {
|
||||
internalCluster().ensureAtLeastNumDataNodes(2);
|
||||
String[] nodeNames = internalCluster().getNodeNames();
|
||||
NetworkDisruption networkDisruption =
|
||||
new NetworkDisruption(new TwoPartitions(nodeNames[0], nodeNames[1]), new NetworkDisruption.NetworkUnresponsive());
|
||||
internalCluster().setDisruptionScheme(networkDisruption);
|
||||
networkDisruption.startDisrupting();
|
||||
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(nodeNames[0]));
|
||||
internalCluster().clearDisruptionScheme();
|
||||
}
|
||||
private static final Settings DISRUPTION_TUNED_SETTINGS = Settings.builder()
|
||||
.put(NodeConnectionsService.CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.getKey(), "2s")
|
||||
.build();
|
||||
|
||||
public void testNetworkPartitionRemovalRestoresConnections() throws IOException {
|
||||
Set<String> nodes = new HashSet<>();
|
||||
nodes.addAll(Arrays.asList(internalCluster().getNodeNames()));
|
||||
nodes.remove(internalCluster().getMasterName());
|
||||
if (nodes.size() <= 2) {
|
||||
internalCluster().ensureAtLeastNumDataNodes(3 - nodes.size());
|
||||
nodes.addAll(Arrays.asList(internalCluster().getNodeNames()));
|
||||
nodes.remove(internalCluster().getMasterName());
|
||||
}
|
||||
Set<String> side1 = new HashSet<>(randomSubsetOf(randomIntBetween(1, nodes.size() - 1), nodes));
|
||||
/**
|
||||
* Creates 3 to 5 mixed-node cluster and splits it into 2 parts.
|
||||
* The first part is guaranteed to have at least the majority of the nodes,
|
||||
* so that master could be elected on this side.
|
||||
*/
|
||||
private Tuple<Set<String>, Set<String>> prepareDisruptedCluster() {
|
||||
int numOfNodes = randomIntBetween(3, 5);
|
||||
internalCluster().setBootstrapMasterNodeIndex(numOfNodes - 1);
|
||||
Set<String> nodes = new HashSet<>(internalCluster().startNodes(numOfNodes, DISRUPTION_TUNED_SETTINGS));
|
||||
ensureGreen();
|
||||
assertThat(nodes.size(), greaterThanOrEqualTo(3));
|
||||
int majority = nodes.size() / 2 + 1;
|
||||
Set<String> side1 = new HashSet<>(randomSubsetOf(randomIntBetween(majority, nodes.size() - 1), nodes));
|
||||
assertThat(side1.size(), greaterThanOrEqualTo(majority));
|
||||
Set<String> side2 = new HashSet<>(nodes);
|
||||
side2.removeAll(side1);
|
||||
assertThat(side2.size(), greaterThanOrEqualTo(1));
|
||||
NetworkDisruption networkDisruption = new NetworkDisruption(new TwoPartitions(side1, side2),
|
||||
new NetworkDisruption.NetworkDisconnect());
|
||||
new NetworkDisruption.NetworkDisconnect());
|
||||
internalCluster().setDisruptionScheme(networkDisruption);
|
||||
networkDisruption.startDisrupting();
|
||||
// sends some requests
|
||||
client(randomFrom(side1)).admin().cluster().prepareNodesInfo().get();
|
||||
client(randomFrom(side2)).admin().cluster().prepareNodesInfo().get();
|
||||
|
||||
return Tuple.tuple(side1, side2);
|
||||
}
|
||||
|
||||
public void testClearDisruptionSchemeWhenNodeIsDown() throws IOException {
|
||||
Tuple<Set<String>, Set<String>> sides = prepareDisruptedCluster();
|
||||
|
||||
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(randomFrom(sides.v2())));
|
||||
internalCluster().clearDisruptionScheme();
|
||||
// check all connections are restore
|
||||
}
|
||||
|
||||
public void testNetworkPartitionRemovalRestoresConnections() throws Exception {
|
||||
Tuple<Set<String>, Set<String>> sides = prepareDisruptedCluster();
|
||||
Set<String> side1 = sides.v1();
|
||||
Set<String> side2 = sides.v2();
|
||||
|
||||
// sends some requests to the majority side part
|
||||
client(randomFrom(side1)).admin().cluster().prepareNodesInfo().get();
|
||||
internalCluster().clearDisruptionScheme();
|
||||
// check all connections are restored
|
||||
for (String nodeA : side1) {
|
||||
for (String nodeB : side2) {
|
||||
TransportService serviceA = internalCluster().getInstance(TransportService.class, nodeA);
|
||||
TransportService serviceB = internalCluster().getInstance(TransportService.class, nodeB);
|
||||
assertTrue(nodeA + " is not connected to " + nodeB, serviceA.nodeConnected(serviceB.getLocalNode()));
|
||||
assertTrue(nodeB + " is not connected to " + nodeA, serviceB.nodeConnected(serviceA.getLocalNode()));
|
||||
// TODO assertBusy should not be here, see https://github.com/elastic/elasticsearch/issues/38348
|
||||
assertBusy(() -> {
|
||||
assertTrue(nodeA + " is not connected to " + nodeB, serviceA.nodeConnected(serviceB.getLocalNode()));
|
||||
assertTrue(nodeB + " is not connected to " + nodeA, serviceB.nodeConnected(serviceA.getLocalNode()));
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue