From 25404cbe3d004d9e3c8e1583a3b5d8016c0439b7 Mon Sep 17 00:00:00 2001 From: Yannick Welsch Date: Wed, 12 Aug 2020 16:37:32 +0200 Subject: [PATCH] Provide option to allow writes when master is down (#60605) Elasticsearch currently blocks writes by default when a master is unavailable. The cluster.no_master_block setting allows a user to change this behavior to also block reads when a master is unavailable. This PR introduces a way to now also still allow writes when a master is offline. Writes will continue to work as long as routing table changes are not needed (as those require the master for consistency), or if dynamic mapping updates are not required (as again, these require the master for consistency). Eventually we should switch the default of cluster.no_master_block to this new mode. --- .../discovery/discovery-settings.asciidoc | 14 ++- .../elasticsearch/cluster/NoMasterNodeIT.java | 87 +++++++++++++++++++ .../discovery/MasterDisruptionIT.java | 3 +- .../coordination/NoMasterBlockService.java | 6 +- .../coordination/CoordinatorTests.java | 5 ++ .../NoMasterBlockServiceTests.java | 9 ++ .../test/InternalTestCluster.java | 4 + 7 files changed, 122 insertions(+), 6 deletions(-) diff --git a/docs/reference/modules/discovery/discovery-settings.asciidoc b/docs/reference/modules/discovery/discovery-settings.asciidoc index 16aef32ef2d..f2156df886c 100644 --- a/docs/reference/modules/discovery/discovery-settings.asciidoc +++ b/docs/reference/modules/discovery/discovery-settings.asciidoc @@ -37,9 +37,9 @@ compatibility. Support for the old name will be removed in a future version. This setting was previously known as `discovery.zen.hosts_provider`. Its old name is deprecated but continues to work in order to preserve backwards compatibility. Support for the old name will be removed in a future version. - + `discovery.type`:: - + Specifies whether {es} should form a multiple-node cluster. By default, {es} discovers other nodes when forming a cluster and allows other nodes to join the cluster later. If `discovery.type` is set to `single-node`, {es} forms a @@ -52,7 +52,7 @@ compatibility. Support for the old name will be removed in a future version. Sets the initial set of master-eligible nodes in a brand-new cluster. By default this list is empty, meaning that this node expects to join a cluster that has already been bootstrapped. See <>. - + [discrete] ==== Expert settings @@ -220,7 +220,7 @@ or may become unstable or intolerant of certain failures. [[no-master-block]]`cluster.no_master_block`:: Specifies which operations are rejected when there is no active master in a -cluster. This setting has two valid values: +cluster. This setting has three valid values: + -- `all`::: All operations on the node (both read and write operations) are rejected. @@ -232,6 +232,12 @@ based on the last known cluster configuration. This situation may result in partial reads of stale data as this node may be isolated from the rest of the cluster. +`metadata_write`::: Only metadata write operations (e.g. mapping updates, +routing table changes) are rejected but regular indexing operations continue +to work. Read and write operations succeed, based on the last known cluster +configuration. This situation may result in partial reads of stale data as +this node may be isolated from the rest of the cluster. + [NOTE] =============================== * The `cluster.no_master_block` setting doesn't apply to nodes-based APIs diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/NoMasterNodeIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/NoMasterNodeIT.java index 0fe92962fdc..457a131a904 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/NoMasterNodeIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/NoMasterNodeIT.java @@ -20,6 +20,9 @@ package org.elasticsearch.cluster; import org.elasticsearch.action.ActionRequestBuilder; +import org.elasticsearch.action.UnavailableShardsException; +import org.elasticsearch.action.admin.cluster.configuration.AddVotingConfigExclusionsAction; +import org.elasticsearch.action.admin.cluster.configuration.AddVotingConfigExclusionsRequest; import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse; import org.elasticsearch.action.bulk.BulkRequestBuilder; import org.elasticsearch.action.get.GetResponse; @@ -27,9 +30,11 @@ import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.support.AutoCreateIndex; import org.elasticsearch.client.Client; import org.elasticsearch.client.Requests; +import org.elasticsearch.cluster.action.index.MappingUpdatedAction; import org.elasticsearch.cluster.block.ClusterBlockException; import org.elasticsearch.cluster.coordination.NoMasterBlockService; import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.XContentFactory; @@ -49,6 +54,7 @@ import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.List; +import java.util.stream.Collectors; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertExists; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; @@ -261,4 +267,85 @@ public class NoMasterNodeIT extends ESIntegTestCase { internalCluster().clearDisruptionScheme(true); } + + public void testNoMasterActionsMetadataWriteMasterBlock() throws Exception { + Settings settings = Settings.builder() + .put(NoMasterBlockService.NO_MASTER_BLOCK_SETTING.getKey(), "metadata_write") + .put(MappingUpdatedAction.INDICES_MAPPING_DYNAMIC_TIMEOUT_SETTING.getKey(), "100ms") + .build(); + + final List nodes = internalCluster().startNodes(3, settings); + + prepareCreate("test1").setSettings( + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)).get(); + client().admin().cluster().prepareHealth("_all").setWaitForGreenStatus().get(); + client().prepareIndex("test1", "type1").setId("1").setSource("field", "value1").get(); + refresh(); + + ensureGreen("test1"); + + ClusterStateResponse clusterState = client().admin().cluster().prepareState().get(); + logger.info("Cluster state:\n{}", clusterState.getState()); + + final List nodesWithShards = clusterState.getState().routingTable().index("test1").shard(0).activeShards().stream() + .map(shardRouting -> shardRouting.currentNodeId()).map(nodeId -> clusterState.getState().nodes().resolveNode(nodeId)) + .map(DiscoveryNode::getName).collect(Collectors.toList()); + + client().execute(AddVotingConfigExclusionsAction.INSTANCE, + new AddVotingConfigExclusionsRequest(nodesWithShards.toArray(new String[0]))).get(); + ensureGreen("test1"); + + String partitionedNode = nodes.stream().filter(n -> nodesWithShards.contains(n) == false).findFirst().get(); + + final NetworkDisruption disruptionScheme + = new NetworkDisruption(new NetworkDisruption.TwoPartitions(Collections.singleton(partitionedNode), + new HashSet<>(nodesWithShards)), NetworkDisruption.DISCONNECT); + internalCluster().setDisruptionScheme(disruptionScheme); + disruptionScheme.startDisrupting(); + + assertBusy(() -> { + for (String node : nodesWithShards) { + ClusterState state = client(node).admin().cluster().prepareState().setLocal(true).get().getState(); + assertTrue(state.blocks().hasGlobalBlockWithId(NoMasterBlockService.NO_MASTER_BLOCK_ID)); + } + }); + + GetResponse getResponse = client(randomFrom(nodesWithShards)).prepareGet("test1", "type1", "1").get(); + assertExists(getResponse); + + expectThrows(Exception.class, () -> client(partitionedNode).prepareGet("test1", "type1", "1").get()); + + SearchResponse countResponse = client(randomFrom(nodesWithShards)).prepareSearch("test1") + .setAllowPartialSearchResults(true).setSize(0).get(); + assertHitCount(countResponse, 1L); + + expectThrows(Exception.class, () -> client(partitionedNode).prepareSearch("test1") + .setAllowPartialSearchResults(true).setSize(0).get()); + + TimeValue timeout = TimeValue.timeValueMillis(200); + client(randomFrom(nodesWithShards)).prepareUpdate("test1", "type1", "1") + .setDoc(Requests.INDEX_CONTENT_TYPE, "field", "value2").setTimeout(timeout).get(); + + expectThrows(UnavailableShardsException.class, () -> client(partitionedNode).prepareUpdate("test1", "type1", "1") + .setDoc(Requests.INDEX_CONTENT_TYPE, "field", "value2").setTimeout(timeout).get()); + + client(randomFrom(nodesWithShards)).prepareIndex("test1", "type1").setId("1") + .setSource(XContentFactory.jsonBuilder().startObject().endObject()).setTimeout(timeout).get(); + + // dynamic mapping updates fail + expectThrows(MasterNotDiscoveredException.class, () -> client(randomFrom(nodesWithShards)).prepareIndex("test1", "type1") + .setId("1") + .setSource(XContentFactory.jsonBuilder().startObject().field("new_field", "value").endObject()) + .setTimeout(timeout).get()); + + // dynamic index creation fails + expectThrows(MasterNotDiscoveredException.class, () -> client(randomFrom(nodesWithShards)).prepareIndex("test2", "type1") + .setId("1") + .setSource(XContentFactory.jsonBuilder().startObject().endObject()).setTimeout(timeout).get()); + + expectThrows(UnavailableShardsException.class, () -> client(partitionedNode).prepareIndex("test1", "type1").setId("1") + .setSource(XContentFactory.jsonBuilder().startObject().endObject()).setTimeout(timeout).get()); + + internalCluster().clearDisruptionScheme(true); + } } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/discovery/MasterDisruptionIT.java b/server/src/internalClusterTest/java/org/elasticsearch/discovery/MasterDisruptionIT.java index ce876ed0f97..d4e4bc94643 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/discovery/MasterDisruptionIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/discovery/MasterDisruptionIT.java @@ -165,7 +165,8 @@ public class MasterDisruptionIT extends AbstractDisruptionTestCase { * Verify that the proper block is applied when nodes lose their master */ public void testVerifyApiBlocksDuringPartition() throws Exception { - internalCluster().startNodes(3); + internalCluster().startNodes(3, Settings.builder() + .putNull(NoMasterBlockService.NO_MASTER_BLOCK_SETTING.getKey()).build()); // Makes sure that the get request can be executed on each node locally: assertAcked(prepareCreate("test").setSettings(Settings.builder() diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/NoMasterBlockService.java b/server/src/main/java/org/elasticsearch/cluster/coordination/NoMasterBlockService.java index 2944c3bb232..6baed9e7c7a 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/NoMasterBlockService.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/NoMasterBlockService.java @@ -34,6 +34,8 @@ public class NoMasterBlockService { RestStatus.SERVICE_UNAVAILABLE, EnumSet.of(ClusterBlockLevel.WRITE, ClusterBlockLevel.METADATA_WRITE)); public static final ClusterBlock NO_MASTER_BLOCK_ALL = new ClusterBlock(NO_MASTER_BLOCK_ID, "no master", true, true, false, RestStatus.SERVICE_UNAVAILABLE, ClusterBlockLevel.ALL); + public static final ClusterBlock NO_MASTER_BLOCK_METADATA_WRITES = new ClusterBlock(NO_MASTER_BLOCK_ID, "no master", true, false, false, + RestStatus.SERVICE_UNAVAILABLE, EnumSet.of(ClusterBlockLevel.METADATA_WRITE)); public static final Setting LEGACY_NO_MASTER_BLOCK_SETTING = new Setting<>("discovery.zen.no_master_block", "write", NoMasterBlockService::parseNoMasterBlock, @@ -58,8 +60,10 @@ public class NoMasterBlockService { return NO_MASTER_BLOCK_ALL; case "write": return NO_MASTER_BLOCK_WRITES; + case "metadata_write": + return NO_MASTER_BLOCK_METADATA_WRITES; default: - throw new IllegalArgumentException("invalid no-master block [" + value + "], must be one of [all, write]"); + throw new IllegalArgumentException("invalid no-master block [" + value + "], must be one of [all, write, metadata_write]"); } } diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java index 8bf0872b94d..d5522c90d88 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java @@ -73,6 +73,7 @@ import static org.elasticsearch.cluster.coordination.LeaderChecker.LEADER_CHECK_ import static org.elasticsearch.cluster.coordination.LeaderChecker.LEADER_CHECK_RETRY_COUNT_SETTING; import static org.elasticsearch.cluster.coordination.LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING; import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_ALL; +import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_METADATA_WRITES; import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_SETTING; import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_WRITES; import static org.elasticsearch.cluster.coordination.Reconfigurator.CLUSTER_AUTO_SHRINK_VOTING_CONFIGURATION; @@ -1047,6 +1048,10 @@ public class CoordinatorTests extends AbstractCoordinatorTestCase { testAppliesNoMasterBlock("all", NO_MASTER_BLOCK_ALL); } + public void testAppliesNoMasterBlockMetadataWritesIfConfigured() { + testAppliesNoMasterBlock("metadata_write", NO_MASTER_BLOCK_METADATA_WRITES); + } + private void testAppliesNoMasterBlock(String noMasterBlockSetting, ClusterBlock expectedBlock) { try (Cluster cluster = new Cluster(3)) { cluster.runRandomly(); diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/NoMasterBlockServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/NoMasterBlockServiceTests.java index 990fd8a0146..bcc2803959d 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/NoMasterBlockServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/NoMasterBlockServiceTests.java @@ -23,6 +23,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.test.ESTestCase; import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_ALL; +import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_METADATA_WRITES; import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_SETTING; import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_WRITES; import static org.elasticsearch.cluster.coordination.NoMasterBlockService.LEGACY_NO_MASTER_BLOCK_SETTING; @@ -71,6 +72,11 @@ public class NoMasterBlockServiceTests extends ESTestCase { assertThat(noMasterBlockService.getNoMasterBlock(), sameInstance(NO_MASTER_BLOCK_ALL)); } + public void testBlocksMetadataWritesIfConfiguredBySetting() { + createService(Settings.builder().put(NO_MASTER_BLOCK_SETTING.getKey(), "metadata_write").build()); + assertThat(noMasterBlockService.getNoMasterBlock(), sameInstance(NO_MASTER_BLOCK_METADATA_WRITES)); + } + public void testRejectsInvalidSetting() { expectThrows(IllegalArgumentException.class, () -> createService(Settings.builder().put(NO_MASTER_BLOCK_SETTING.getKey(), "unknown").build())); @@ -88,6 +94,9 @@ public class NoMasterBlockServiceTests extends ESTestCase { clusterSettings.applySettings(Settings.builder().put(NO_MASTER_BLOCK_SETTING.getKey(), "write").build()); assertThat(noMasterBlockService.getNoMasterBlock(), sameInstance(NO_MASTER_BLOCK_WRITES)); + + clusterSettings.applySettings(Settings.builder().put(NO_MASTER_BLOCK_SETTING.getKey(), "metadata_write").build()); + assertThat(noMasterBlockService.getNoMasterBlock(), sameInstance(NO_MASTER_BLOCK_METADATA_WRITES)); } public void testIgnoresUpdatesToLegacySetting() { diff --git a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java index 321a3435c70..0aa10619bed 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java @@ -37,6 +37,7 @@ import org.elasticsearch.action.admin.cluster.configuration.ClearVotingConfigExc import org.elasticsearch.action.admin.cluster.node.stats.NodeStats; import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags; import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags.Flag; +import org.elasticsearch.cluster.coordination.NoMasterBlockService; import org.elasticsearch.index.IndexingPressure; import org.elasticsearch.action.support.replication.TransportReplicationAction; import org.elasticsearch.client.Client; @@ -407,6 +408,9 @@ public final class InternalTestCluster extends TestCluster { RandomNumbers.randomIntBetween(random, 1, 5)); builder.put(RecoverySettings.INDICES_RECOVERY_MAX_CONCURRENT_OPERATIONS_SETTING.getKey(), RandomNumbers.randomIntBetween(random, 1, 4)); + // TODO: currently we only randomize "cluster.no_master_block" between "write" and "metadata_write", as "all" is fragile + // and fails shards when a master abdicates, which breaks many tests. + builder.put(NoMasterBlockService.NO_MASTER_BLOCK_SETTING.getKey(), randomFrom(random,"write", "metadata_write")); defaultSettings = builder.build(); executor = EsExecutors.newScaling("internal_test_cluster_executor", 0, Integer.MAX_VALUE, 0, TimeUnit.SECONDS, EsExecutors.daemonThreadFactory("test_" + clusterName), new ThreadContext(Settings.EMPTY));