Added test with AwaitsFix annotation that simulates a split brain.
This commit is contained in:
parent
7a26b49298
commit
7bf3ffe73c
|
@ -0,0 +1,145 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.discovery;
|
||||||
|
|
||||||
|
import com.google.common.base.Predicate;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
|
||||||
|
import org.elasticsearch.client.Client;
|
||||||
|
import org.elasticsearch.cluster.ClusterState;
|
||||||
|
import org.elasticsearch.cluster.node.DiscoveryNode;
|
||||||
|
import org.elasticsearch.cluster.node.DiscoveryNodes;
|
||||||
|
import org.elasticsearch.common.Priority;
|
||||||
|
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||||
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
||||||
|
import org.elasticsearch.test.transport.MockTransportService;
|
||||||
|
import org.elasticsearch.transport.TransportModule;
|
||||||
|
import org.elasticsearch.transport.TransportService;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import static org.elasticsearch.test.ElasticsearchIntegrationTest.ClusterScope;
|
||||||
|
import static org.elasticsearch.test.ElasticsearchIntegrationTest.Scope;
|
||||||
|
import static org.hamcrest.Matchers.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*/
|
||||||
|
@ClusterScope(scope= Scope.SUITE, numNodes=0)
|
||||||
|
public class DiscoveryWithNetworkFailuresTests extends ElasticsearchIntegrationTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/elasticsearch/elasticsearch/issues/2488")
|
||||||
|
public void failWithMinimumMasterNodesConfigured() throws Exception {
|
||||||
|
final Settings settings = ImmutableSettings.settingsBuilder()
|
||||||
|
.put("discovery.zen.minimum_master_nodes", 2)
|
||||||
|
.put("discovery.zen.fd.ping_timeout", "1s") // <-- for hitting simulated network failures quickly
|
||||||
|
.put(TransportModule.TRANSPORT_SERVICE_TYPE_KEY, MockTransportService.class.getName())
|
||||||
|
.build();
|
||||||
|
List<String>nodes = cluster().startNodesAsync(3, settings).get();
|
||||||
|
|
||||||
|
// Wait until a green status has been reaches and 3 nodes are part of the cluster
|
||||||
|
List<String> nodesList = Arrays.asList(nodes.toArray(new String[3]));
|
||||||
|
ClusterHealthResponse clusterHealthResponse = client().admin().cluster().prepareHealth()
|
||||||
|
.setWaitForEvents(Priority.LANGUID)
|
||||||
|
.setWaitForNodes("3")
|
||||||
|
.get();
|
||||||
|
assertThat(clusterHealthResponse.isTimedOut(), is(false));
|
||||||
|
|
||||||
|
// Figure out what is the elected master node
|
||||||
|
DiscoveryNode masterDiscoNode = null;
|
||||||
|
for (String node : nodesList) {
|
||||||
|
ClusterState state = cluster().client(node).admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
|
||||||
|
assertThat(state.nodes().size(), equalTo(3));
|
||||||
|
if (masterDiscoNode == null) {
|
||||||
|
masterDiscoNode = state.nodes().masterNode();
|
||||||
|
} else {
|
||||||
|
assertThat(state.nodes().masterNode(), equalTo(masterDiscoNode));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert masterDiscoNode != null;
|
||||||
|
logger.info("---> legit elected master node=" + masterDiscoNode);
|
||||||
|
final Client masterClient = cluster().masterClient();
|
||||||
|
|
||||||
|
// Everything is stable now, it is now time to simulate evil...
|
||||||
|
|
||||||
|
// Pick a node that isn't the elected master.
|
||||||
|
String unluckyNode = null;
|
||||||
|
for (String node : nodesList) {
|
||||||
|
if (!node.equals(masterDiscoNode.getName())) {
|
||||||
|
unluckyNode = node;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert unluckyNode != null;
|
||||||
|
|
||||||
|
// Simulate a network issue between the unlucky node and elected master node in both directions.
|
||||||
|
addFailToSendNoConnectRule(masterDiscoNode.getName(), unluckyNode);
|
||||||
|
addFailToSendNoConnectRule(unluckyNode, masterDiscoNode.getName());
|
||||||
|
try {
|
||||||
|
// Wait until elected master has removed that the unlucky node...
|
||||||
|
awaitBusy(new Predicate<Object>() {
|
||||||
|
@Override
|
||||||
|
public boolean apply(Object input) {
|
||||||
|
return masterClient.admin().cluster().prepareState().setLocal(true).get().getState().nodes().size() == 2;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// The unlucky node must report *no* master node, since it can't connect to master and in fact it should
|
||||||
|
// continuously ping until network failures have been resolved.
|
||||||
|
Client isolatedNodeClient = cluster().client(unluckyNode);
|
||||||
|
ClusterState localClusterState = isolatedNodeClient.admin().cluster().prepareState().setLocal(true).get().getState();
|
||||||
|
DiscoveryNodes localDiscoveryNodes = localClusterState.nodes();
|
||||||
|
assertThat(localDiscoveryNodes.masterNode(), nullValue());
|
||||||
|
} finally {
|
||||||
|
// stop simulating network failures, from this point on the unlucky node is able to rejoin
|
||||||
|
// We also need to do this even if assertions fail, since otherwise the test framework can't work properly
|
||||||
|
clearNoConnectRule(masterDiscoNode.getName(), unluckyNode);
|
||||||
|
clearNoConnectRule(unluckyNode, masterDiscoNode.getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait until the master node sees all 3 nodes again.
|
||||||
|
clusterHealthResponse = masterClient.admin().cluster().prepareHealth()
|
||||||
|
.setWaitForEvents(Priority.LANGUID)
|
||||||
|
.setWaitForNodes("3")
|
||||||
|
.get();
|
||||||
|
assertThat(clusterHealthResponse.isTimedOut(), is(false));
|
||||||
|
|
||||||
|
for (String node : nodesList) {
|
||||||
|
ClusterState state = cluster().client(node).admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
|
||||||
|
assertThat(state.nodes().size(), equalTo(3));
|
||||||
|
// The elected master shouldn't have changed, since the unlucky node never could have elected himself as
|
||||||
|
// master since m_m_n of 2 could never be satisfied.
|
||||||
|
assertThat(state.nodes().masterNode(), equalTo(masterDiscoNode));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addFailToSendNoConnectRule(String fromNode, String toNode) {
|
||||||
|
TransportService mockTransportService = cluster().getInstance(TransportService.class, fromNode);
|
||||||
|
((MockTransportService) mockTransportService).addFailToSendNoConnectRule(cluster().getInstance(Discovery.class, toNode).localNode());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void clearNoConnectRule(String fromNode, String toNode) {
|
||||||
|
TransportService mockTransportService = cluster().getInstance(TransportService.class, fromNode);
|
||||||
|
((MockTransportService) mockTransportService).clearRule(cluster().getInstance(Discovery.class, toNode).localNode());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue