Unmute, fix, refactor and zen2ify NetworkDisruptionIT (#38351)

This commit unmutes NetworkDisruptionIT.

It makes changes necessary for Zen2 - avoids usage of
autoMinMasterNodes and selects cluster size, such that there is no
need to call AddVotingExclusion.
This test also introduces refactors a single method
prepareDistruptedCluster to be used by both test methods.
Unfortunately, NetworkDisruption is broken and the
testNetworkPartitionRemovalRestoresConnections "is fixed" by
introducing assertBusy - #38348.

Relates #36205
Relates #38348

(cherry picked from commit 97707c7f892636e5b75c3df546b067414acb27cd)
This commit is contained in:
Andrey Ershov 2019-03-18 18:37:20 +01:00
parent d35d9d1886
commit 42602478b8
1 changed files with 46 additions and 29 deletions

View File

@ -19,7 +19,9 @@
package org.elasticsearch.test.disruption;
import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
import org.elasticsearch.cluster.NodeConnectionsService;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.InternalTestCluster;
@ -35,52 +37,67 @@ import java.util.Set;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/36205")
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoMinMasterNodes = false)
public class NetworkDisruptionIT extends ESIntegTestCase {
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Arrays.asList(MockTransportService.TestPlugin.class);
}
public void testNetworkPartitionWithNodeShutdown() throws IOException {
internalCluster().ensureAtLeastNumDataNodes(2);
String[] nodeNames = internalCluster().getNodeNames();
NetworkDisruption networkDisruption =
new NetworkDisruption(new TwoPartitions(nodeNames[0], nodeNames[1]), new NetworkDisruption.NetworkUnresponsive());
internalCluster().setDisruptionScheme(networkDisruption);
networkDisruption.startDisrupting();
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(nodeNames[0]));
internalCluster().clearDisruptionScheme();
}
private static final Settings DISRUPTION_TUNED_SETTINGS = Settings.builder()
.put(NodeConnectionsService.CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.getKey(), "2s")
.build();
public void testNetworkPartitionRemovalRestoresConnections() throws IOException {
Set<String> nodes = new HashSet<>();
nodes.addAll(Arrays.asList(internalCluster().getNodeNames()));
nodes.remove(internalCluster().getMasterName());
if (nodes.size() <= 2) {
internalCluster().ensureAtLeastNumDataNodes(3 - nodes.size());
nodes.addAll(Arrays.asList(internalCluster().getNodeNames()));
nodes.remove(internalCluster().getMasterName());
}
Set<String> side1 = new HashSet<>(randomSubsetOf(randomIntBetween(1, nodes.size() - 1), nodes));
/**
* Creates 3 to 5 mixed-node cluster and splits it into 2 parts.
* The first part is guaranteed to have at least the majority of the nodes,
* so that master could be elected on this side.
*/
private Tuple<Set<String>, Set<String>> prepareDisruptedCluster() {
int numOfNodes = randomIntBetween(3, 5);
internalCluster().setBootstrapMasterNodeIndex(numOfNodes - 1);
Set<String> nodes = new HashSet<>(internalCluster().startNodes(numOfNodes, DISRUPTION_TUNED_SETTINGS));
ensureGreen();
assertThat(nodes.size(), greaterThanOrEqualTo(3));
int majority = nodes.size() / 2 + 1;
Set<String> side1 = new HashSet<>(randomSubsetOf(randomIntBetween(majority, nodes.size() - 1), nodes));
assertThat(side1.size(), greaterThanOrEqualTo(majority));
Set<String> side2 = new HashSet<>(nodes);
side2.removeAll(side1);
assertThat(side2.size(), greaterThanOrEqualTo(1));
NetworkDisruption networkDisruption = new NetworkDisruption(new TwoPartitions(side1, side2),
new NetworkDisruption.NetworkDisconnect());
new NetworkDisruption.NetworkDisconnect());
internalCluster().setDisruptionScheme(networkDisruption);
networkDisruption.startDisrupting();
// sends some requests
client(randomFrom(side1)).admin().cluster().prepareNodesInfo().get();
client(randomFrom(side2)).admin().cluster().prepareNodesInfo().get();
return Tuple.tuple(side1, side2);
}
public void testClearDisruptionSchemeWhenNodeIsDown() throws IOException {
Tuple<Set<String>, Set<String>> sides = prepareDisruptedCluster();
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(randomFrom(sides.v2())));
internalCluster().clearDisruptionScheme();
// check all connections are restore
}
public void testNetworkPartitionRemovalRestoresConnections() throws Exception {
Tuple<Set<String>, Set<String>> sides = prepareDisruptedCluster();
Set<String> side1 = sides.v1();
Set<String> side2 = sides.v2();
// sends some requests to the majority side part
client(randomFrom(side1)).admin().cluster().prepareNodesInfo().get();
internalCluster().clearDisruptionScheme();
// check all connections are restored
for (String nodeA : side1) {
for (String nodeB : side2) {
TransportService serviceA = internalCluster().getInstance(TransportService.class, nodeA);
TransportService serviceB = internalCluster().getInstance(TransportService.class, nodeB);
assertTrue(nodeA + " is not connected to " + nodeB, serviceA.nodeConnected(serviceB.getLocalNode()));
assertTrue(nodeB + " is not connected to " + nodeA, serviceB.nodeConnected(serviceA.getLocalNode()));
// TODO assertBusy should not be here, see https://github.com/elastic/elasticsearch/issues/38348
assertBusy(() -> {
assertTrue(nodeA + " is not connected to " + nodeB, serviceA.nodeConnected(serviceB.getLocalNode()));
assertTrue(nodeB + " is not connected to " + nodeA, serviceB.nodeConnected(serviceA.getLocalNode()));
});
}
}
}