From 02d4d8b4099b071b0f846a26b7c02adeb19a2e90 Mon Sep 17 00:00:00 2001 From: Andrey Ershov Date: Tue, 15 Jan 2019 13:09:48 -0500 Subject: [PATCH] MinimumMasterNodesIT changed for Zen2 (#37428) There were 5 tests in MinimumMasterNodesIT. 2 of them removed, 3 of them changed and renamed. 1) testSimpleMinimumMasterNodes -> testTwoNodesNoMasterBlock. The flow of this test is left intact but in order to make it work on Zen2, additional work for the cluster bootstrapping and voting exclusions is needed. 2) testDynamicUpdateMinimumMasterNodes -> removed, there is nothing that corresponds to the dynamic change of the minimum master nodes setting. 3) testCanNotBringClusterDown -> removed, it also plays with changing minimum master nodes dynamically. 4) testMultipleNodesShutdownNonMasterNodes -> testThreeNodesNoMasterBlock. Previously this test was checking that there would be no master block, if min_master_nodes=3 and 4 nodes are started, then 2 nodes are brought down. Zen2 dynamically accommodates to the number of nodes in the cluster, so it's possible that there still will be a master in 2 nodes cluster. For Zen2, we start up 3 nodes. And shut down 2 of them (w/o voting exclusions), which results in no master block. 5) testCanNotPublishWithoutMinMastNodes -> testCanNotCommitStateThreeNodes. Test flow is not changed. But previously there was no check that nodes in the bigger part of network partition will elect the master, before healing the network partition. For Zen2 it does not work, because persistent setting addition is accepted on the old master and if it's elected new master again, this setting will appear in the cluster state. Also, I have a feeling that we need to remove this class, but could not come up with a good name. --- .../cluster/MinimumMasterNodesIT.java | 226 +++++++----------- 1 file changed, 93 insertions(+), 133 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/MinimumMasterNodesIT.java b/server/src/test/java/org/elasticsearch/cluster/MinimumMasterNodesIT.java index ea00d0e6480..c16d9b11e0d 100644 --- a/server/src/test/java/org/elasticsearch/cluster/MinimumMasterNodesIT.java +++ b/server/src/test/java/org/elasticsearch/cluster/MinimumMasterNodesIT.java @@ -19,9 +19,15 @@ package org.elasticsearch.cluster; +import org.elasticsearch.action.admin.cluster.configuration.AddVotingConfigExclusionsAction; +import org.elasticsearch.action.admin.cluster.configuration.AddVotingConfigExclusionsRequest; +import org.elasticsearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsAction; +import org.elasticsearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsRequest; import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse; import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.coordination.ClusterBootstrapService; import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.Priority; import org.elasticsearch.common.settings.Settings; @@ -30,40 +36,41 @@ import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateExceptio import org.elasticsearch.discovery.zen.ElectMasterService; import org.elasticsearch.discovery.zen.ZenDiscovery; import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.node.Node; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.ESIntegTestCase.ClusterScope; import org.elasticsearch.test.ESIntegTestCase.Scope; -import org.elasticsearch.test.discovery.TestZenDiscovery; +import org.elasticsearch.test.InternalTestCluster; import org.elasticsearch.test.disruption.NetworkDisruption; import org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions; import org.elasticsearch.test.junit.annotations.TestLogging; import org.elasticsearch.test.transport.MockTransportService; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashSet; +import java.util.List; import java.util.Set; import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; -import java.util.function.Predicate; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoTimeout; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; -import static org.hamcrest.Matchers.isOneOf; import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.nullValue; @ClusterScope(scope = Scope.TEST, numDataNodes = 0, autoMinMasterNodes = false) @TestLogging("_root:DEBUG,org.elasticsearch.cluster.service:TRACE,org.elasticsearch.discovery.zen:TRACE") public class MinimumMasterNodesIT extends ESIntegTestCase { + private int bootstrapNodeId; + @Override protected Collection> nodePlugins() { final HashSet> classes = new HashSet<>(super.nodePlugins()); @@ -72,13 +79,27 @@ public class MinimumMasterNodesIT extends ESIntegTestCase { } @Override - protected Settings nodeSettings(int nodeOrdinal) { - return Settings.builder().put(super.nodeSettings(nodeOrdinal)) - .put(TestZenDiscovery.USE_ZEN2.getKey(), false) // Zen2 does not have minimum_master_nodes - .put(TestZenDiscovery.USE_MOCK_PINGS.getKey(), false).build(); + protected List addExtraClusterBootstrapSettings(List allNodesSettings) { + if (internalCluster().size() + allNodesSettings.size() == bootstrapNodeId) { + List nodeNames = new ArrayList<>(); + Collections.addAll(nodeNames, internalCluster().getNodeNames()); + allNodesSettings.forEach(settings -> nodeNames.add(Node.NODE_NAME_SETTING.get(settings))); + + List otherNodesSettings = allNodesSettings.subList(0, allNodesSettings.size() - 1); + Settings lastNodeSettings = allNodesSettings.get(allNodesSettings.size()-1); + List newSettings = new ArrayList<>(); + newSettings.addAll(otherNodesSettings); + newSettings.add(Settings.builder().put(lastNodeSettings) + .putList(ClusterBootstrapService.INITIAL_MASTER_NODES_SETTING.getKey(), nodeNames) + .build()); + return newSettings; + } + return allNodesSettings; } - public void testSimpleMinimumMasterNodes() throws Exception { + public void testTwoNodesNoMasterBlock() throws Exception { + //bootstrap cluster once second node is started + bootstrapNodeId = 2; Settings settings = Settings.builder() .put("discovery.zen.minimum_master_nodes", 2) @@ -87,7 +108,7 @@ public class MinimumMasterNodesIT extends ESIntegTestCase { .build(); logger.info("--> start first node"); - internalCluster().startNode(settings); + String node1Name = internalCluster().startNode(settings); logger.info("--> should be blocked, no master..."); ClusterState state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); @@ -95,7 +116,7 @@ public class MinimumMasterNodesIT extends ESIntegTestCase { assertThat(state.nodes().getSize(), equalTo(1)); // verify that we still see the local node in the cluster state logger.info("--> start second node, cluster should be formed"); - internalCluster().startNode(settings); + String node2Name = internalCluster().startNode(settings); ClusterHealthResponse clusterHealthResponse = client().admin().cluster().prepareHealth() .setWaitForEvents(Priority.LANGUID).setWaitForNodes("2").execute().actionGet(); @@ -122,13 +143,19 @@ public class MinimumMasterNodesIT extends ESIntegTestCase { // flush for simpler debugging flushAndRefresh(); - logger.info("--> verify we the data back"); + logger.info("--> verify we get the data back"); for (int i = 0; i < 10; i++) { assertThat(client().prepareSearch().setSize(0).setQuery(QueryBuilders.matchAllQuery()) .execute().actionGet().getHits().getTotalHits().value, equalTo(100L)); } - internalCluster().stopCurrentMasterNode(); + String masterNode = internalCluster().getMasterName(); + String otherNode = node1Name.equals(masterNode) ? node2Name : node1Name; + logger.info("--> add voting config exclusion for non-master node, to be sure it's not elected"); + client().execute(AddVotingConfigExclusionsAction.INSTANCE, new AddVotingConfigExclusionsRequest(new String[]{otherNode})).get(); + logger.info("--> stop master node, no master block should appear"); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(masterNode)); + awaitBusy(() -> { ClusterState clusterState = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); return clusterState.blocks().hasGlobalBlockWithId(DiscoverySettings.NO_MASTER_BLOCK_ID); @@ -140,7 +167,7 @@ public class MinimumMasterNodesIT extends ESIntegTestCase { assertThat(state.nodes().getMasterNode(), equalTo(null)); logger.info("--> starting the previous master node again..."); - internalCluster().startNode(settings); + node2Name = internalCluster().startNode(settings); clusterHealthResponse = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID) .setWaitForYellowStatus().setWaitForNodes("2").execute().actionGet(); @@ -157,12 +184,23 @@ public class MinimumMasterNodesIT extends ESIntegTestCase { ensureGreen(); - logger.info("--> verify we the data back after cluster reform"); + logger.info("--> verify we get the data back after cluster reform"); for (int i = 0; i < 10; i++) { assertHitCount(client().prepareSearch().setSize(0).setQuery(QueryBuilders.matchAllQuery()).execute().actionGet(), 100); } - internalCluster().stopRandomNonMasterNode(); + logger.info("--> clearing voting config exclusions"); + ClearVotingConfigExclusionsRequest clearRequest = new ClearVotingConfigExclusionsRequest(); + clearRequest.setWaitForRemoval(false); + client().execute(ClearVotingConfigExclusionsAction.INSTANCE, clearRequest).get(); + + masterNode = internalCluster().getMasterName(); + otherNode = node1Name.equals(masterNode) ? node2Name : node1Name; + logger.info("--> add voting config exclusion for master node, to be sure it's not elected"); + client().execute(AddVotingConfigExclusionsAction.INSTANCE, new AddVotingConfigExclusionsRequest(new String[]{masterNode})).get(); + logger.info("--> stop non-master node, no master block should appear"); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(otherNode)); + assertBusy(() -> { ClusterState state1 = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); assertThat(state1.blocks().hasGlobalBlockWithId(DiscoverySettings.NO_MASTER_BLOCK_ID), equalTo(true)); @@ -194,7 +232,10 @@ public class MinimumMasterNodesIT extends ESIntegTestCase { } } - public void testMultipleNodesShutdownNonMasterNodes() throws Exception { + public void testThreeNodesNoMasterBlock() throws Exception { + //bootstrap cluster once 3rd node is started + bootstrapNodeId = 3; + Settings settings = Settings.builder() .put("discovery.zen.minimum_master_nodes", 3) .put(ZenDiscovery.PING_TIMEOUT_SETTING.getKey(), "1s") @@ -213,16 +254,16 @@ public class MinimumMasterNodesIT extends ESIntegTestCase { } }); - logger.info("--> start two more nodes"); - internalCluster().startNodes(2, settings); + logger.info("--> start one more node"); + internalCluster().startNode(settings); ensureGreen(); ClusterHealthResponse clusterHealthResponse = client().admin().cluster().prepareHealth() - .setWaitForEvents(Priority.LANGUID).setWaitForNodes("4").execute().actionGet(); + .setWaitForEvents(Priority.LANGUID).setWaitForNodes("3").execute().actionGet(); assertThat(clusterHealthResponse.isTimedOut(), equalTo(false)); state = client().admin().cluster().prepareState().execute().actionGet().getState(); - assertThat(state.nodes().getSize(), equalTo(4)); + assertThat(state.nodes().getSize(), equalTo(3)); createIndex("test"); NumShards numShards = getNumShards("test"); @@ -238,7 +279,7 @@ public class MinimumMasterNodesIT extends ESIntegTestCase { client().admin().indices().prepareFlush().execute().actionGet(); refresh(); - logger.info("--> verify we the data back"); + logger.info("--> verify we get the data back"); for (int i = 0; i < 10; i++) { assertHitCount(client().prepareSearch().setSize(0).setQuery(QueryBuilders.matchAllQuery()).execute().actionGet(), 100); } @@ -246,20 +287,21 @@ public class MinimumMasterNodesIT extends ESIntegTestCase { internalCluster().stopRandomNonMasterNode(); internalCluster().stopRandomNonMasterNode(); - logger.info("--> verify that there is no master anymore on remaining nodes"); + logger.info("--> verify that there is no master anymore on remaining node"); // spin here to wait till the state is set - assertNoMasterBlockOnAllNodes(); + assertBusy(() -> { + ClusterState st = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); + assertThat(st.blocks().hasGlobalBlockWithId(DiscoverySettings.NO_MASTER_BLOCK_ID), equalTo(true)); + }); logger.info("--> start back the 2 nodes "); - String[] newNodes = internalCluster().startNodes(2, settings).stream().toArray(String[]::new); + internalCluster().startNodes(2, settings); internalCluster().validateClusterFormed(); ensureGreen(); state = client().admin().cluster().prepareState().execute().actionGet().getState(); - assertThat(state.nodes().getSize(), equalTo(4)); - // we prefer to elect up and running nodes - assertThat(state.nodes().getMasterNodeId(), not(isOneOf(newNodes))); + assertThat(state.nodes().getSize(), equalTo(3)); logger.info("--> verify we the data back"); for (int i = 0; i < 10; i++) { @@ -267,113 +309,18 @@ public class MinimumMasterNodesIT extends ESIntegTestCase { } } - public void testDynamicUpdateMinimumMasterNodes() throws Exception { - Settings settingsWithMinMaster1 = Settings.builder() - .put(ZenDiscovery.PING_TIMEOUT_SETTING.getKey(), "400ms") - .put(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey(), 1) - .build(); + public void testCannotCommitStateThreeNodes() throws Exception { + //bootstrap cluster once 3rd node is started + bootstrapNodeId = 3; - Settings settingsWithMinMaster2 = Settings.builder() - .put(settingsWithMinMaster1).put(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey(), 2) - .build(); - - logger.info("--> start two nodes and wait for them to form a cluster"); - internalCluster().startNodes(settingsWithMinMaster1, settingsWithMinMaster2); - ensureClusterSizeConsistency(); - - logger.info("--> setting minimum master node to 2"); - setMinimumMasterNodes(2); - - // make sure it has been processed on all nodes (master node spawns a secondary cluster state update task) - for (Client client : internalCluster().getClients()) { - assertThat(client.admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setLocal(true).get().isTimedOut(), - equalTo(false)); - } - - logger.info("--> stopping a node"); - internalCluster().stopRandomDataNode(); - logger.info("--> verifying min master node has effect"); - assertNoMasterBlockOnAllNodes(); - - logger.info("--> bringing another node up"); - internalCluster().startNode(settingsWithMinMaster2); - ensureClusterSizeConsistency(); - } - - private void assertNoMasterBlockOnAllNodes() throws InterruptedException { - Predicate hasNoMasterBlock = client -> { - ClusterState state = client.admin().cluster().prepareState().setLocal(true).execute().actionGet().getState(); - return state.blocks().hasGlobalBlockWithId(DiscoverySettings.NO_MASTER_BLOCK_ID); - }; - assertTrue(awaitBusy( - () -> { - boolean success = true; - for (Client client : internalCluster().getClients()) { - boolean clientHasNoMasterBlock = hasNoMasterBlock.test(client); - if (logger.isDebugEnabled()) { - logger.debug("Checking for NO_MASTER_BLOCK on client: {} NO_MASTER_BLOCK: [{}]", - client, clientHasNoMasterBlock); - } - success &= clientHasNoMasterBlock; - } - return success; - }, - 20, - TimeUnit.SECONDS - ) - ); - } - - public void testCanNotBringClusterDown() throws ExecutionException, InterruptedException { - int nodeCount = scaledRandomIntBetween(1, 5); - Settings.Builder settings = Settings.builder() - .put(ZenDiscovery.PING_TIMEOUT_SETTING.getKey(), "200ms") - .put("discovery.initial_state_timeout", "500ms"); - - // set an initial value which is at least quorum to avoid split brains during initial startup - int initialMinMasterNodes = randomIntBetween(nodeCount / 2 + 1, nodeCount); - settings.put(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey(), initialMinMasterNodes); - - - logger.info("--> starting [{}] nodes. min_master_nodes set to [{}]", nodeCount, initialMinMasterNodes); - internalCluster().startNodes(nodeCount, settings.build()); - - logger.info("--> waiting for nodes to join"); - assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes(Integer.toString(nodeCount)).get().isTimedOut()); - - int updateCount = randomIntBetween(1, nodeCount); - - logger.info("--> updating [{}] to [{}]", - ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey(), updateCount); - assertAcked(client().admin().cluster().prepareUpdateSettings() - .setPersistentSettings(Settings.builder() - .put(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey(), updateCount))); - - logger.info("--> verifying no node left and master is up"); - assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes(Integer.toString(nodeCount)).get().isTimedOut()); - - updateCount = nodeCount + randomIntBetween(1, 2000); - logger.info("--> trying to updating [{}] to [{}]", - ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey(), updateCount); - try { - client().admin().cluster().prepareUpdateSettings() - .setPersistentSettings(Settings.builder() - .put(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey(), updateCount)); - } catch (IllegalArgumentException ex) { - assertEquals(ex.getMessage(), - "cannot set discovery.zen.minimum_master_nodes to more than the current master nodes count [" +updateCount+ "]"); - } - - logger.info("--> verifying no node left and master is up"); - assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes(Integer.toString(nodeCount)).get().isTimedOut()); - } - - public void testCanNotPublishWithoutMinMastNodes() throws Exception { Settings settings = Settings.builder() .put(ZenDiscovery.PING_TIMEOUT_SETTING.getKey(), "200ms") + .put("discovery.initial_state_timeout", "500ms") .put(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey(), 2) .put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), "100ms") // speed things up .build(); + + internalCluster().startNodes(3, settings); ensureGreen(); // ensure cluster state is recovered before we disrupt things @@ -381,8 +328,8 @@ public class MinimumMasterNodesIT extends ESIntegTestCase { Set otherNodes = new HashSet<>(Arrays.asList(internalCluster().getNodeNames())); otherNodes.remove(master); NetworkDisruption partition = new NetworkDisruption( - new TwoPartitions(Collections.singleton(master), otherNodes), - new NetworkDisruption.NetworkDisconnect()); + new TwoPartitions(Collections.singleton(master), otherNodes), + new NetworkDisruption.NetworkDisconnect()); internalCluster().setDisruptionScheme(partition); final CountDownLatch latch = new CountDownLatch(1); @@ -416,8 +363,21 @@ public class MinimumMasterNodesIT extends ESIntegTestCase { latch.await(); assertThat(failure.get(), instanceOf(FailedToCommitClusterStateException.class)); + + logger.debug("--> check that there is no master in minor partition"); assertBusy(() -> assertThat(masterClusterService.state().nodes().getMasterNode(), nullValue())); + // let major partition to elect new master, to ensure that old master is not elected once partition is restored, + // otherwise persistent setting (which is a part of accepted state on old master) will be propagated to other nodes + logger.debug("--> wait for master to be elected in major partition"); + assertBusy(() -> { + DiscoveryNode masterNode = + internalCluster().client(randomFrom(otherNodes)) + .admin().cluster().prepareState().execute().actionGet().getState().nodes().getMasterNode(); + assertThat(masterNode, notNullValue()); + assertThat(masterNode.getName(), not(equalTo(master))); + }); + partition.stopDisrupting(); logger.debug("--> waiting for cluster to heal");