Provide option to allow writes when master is down (#60605)

Elasticsearch currently blocks writes by default when a master is unavailable. The cluster.no_master_block setting allows
a user to change this behavior to also block reads when a master is unavailable. This PR introduces a way to now also still
allow writes when a master is offline. Writes will continue to work as long as routing table changes are not needed (as
those require the master for consistency), or if dynamic mapping updates are not required (as again, these require the
master for consistency).

Eventually we should switch the default of cluster.no_master_block to this new mode.
This commit is contained in:
Yannick Welsch 2020-08-12 16:37:32 +02:00
parent 6644f2283d
commit 25404cbe3d
7 changed files with 122 additions and 6 deletions

View File

@ -37,9 +37,9 @@ compatibility. Support for the old name will be removed in a future version.
This setting was previously known as `discovery.zen.hosts_provider`. Its old
name is deprecated but continues to work in order to preserve backwards
compatibility. Support for the old name will be removed in a future version.
`discovery.type`::
Specifies whether {es} should form a multiple-node cluster. By default, {es}
discovers other nodes when forming a cluster and allows other nodes to join
the cluster later. If `discovery.type` is set to `single-node`, {es} forms a
@ -52,7 +52,7 @@ compatibility. Support for the old name will be removed in a future version.
Sets the initial set of master-eligible nodes in a brand-new cluster. By
default this list is empty, meaning that this node expects to join a cluster
that has already been bootstrapped. See <<initial_master_nodes>>.
[discrete]
==== Expert settings
@ -220,7 +220,7 @@ or may become unstable or intolerant of certain failures.
[[no-master-block]]`cluster.no_master_block`::
Specifies which operations are rejected when there is no active master in a
cluster. This setting has two valid values:
cluster. This setting has three valid values:
+
--
`all`::: All operations on the node (both read and write operations) are rejected.
@ -232,6 +232,12 @@ based on the last known cluster configuration. This situation may result in
partial reads of stale data as this node may be isolated from the rest of the
cluster.
`metadata_write`::: Only metadata write operations (e.g. mapping updates,
routing table changes) are rejected but regular indexing operations continue
to work. Read and write operations succeed, based on the last known cluster
configuration. This situation may result in partial reads of stale data as
this node may be isolated from the rest of the cluster.
[NOTE]
===============================
* The `cluster.no_master_block` setting doesn't apply to nodes-based APIs

View File

@ -20,6 +20,9 @@
package org.elasticsearch.cluster;
import org.elasticsearch.action.ActionRequestBuilder;
import org.elasticsearch.action.UnavailableShardsException;
import org.elasticsearch.action.admin.cluster.configuration.AddVotingConfigExclusionsAction;
import org.elasticsearch.action.admin.cluster.configuration.AddVotingConfigExclusionsRequest;
import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.get.GetResponse;
@ -27,9 +30,11 @@ import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.support.AutoCreateIndex;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.Requests;
import org.elasticsearch.cluster.action.index.MappingUpdatedAction;
import org.elasticsearch.cluster.block.ClusterBlockException;
import org.elasticsearch.cluster.coordination.NoMasterBlockService;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentFactory;
@ -49,6 +54,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.stream.Collectors;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertExists;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
@ -261,4 +267,85 @@ public class NoMasterNodeIT extends ESIntegTestCase {
internalCluster().clearDisruptionScheme(true);
}
public void testNoMasterActionsMetadataWriteMasterBlock() throws Exception {
Settings settings = Settings.builder()
.put(NoMasterBlockService.NO_MASTER_BLOCK_SETTING.getKey(), "metadata_write")
.put(MappingUpdatedAction.INDICES_MAPPING_DYNAMIC_TIMEOUT_SETTING.getKey(), "100ms")
.build();
final List<String> nodes = internalCluster().startNodes(3, settings);
prepareCreate("test1").setSettings(
Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)).get();
client().admin().cluster().prepareHealth("_all").setWaitForGreenStatus().get();
client().prepareIndex("test1", "type1").setId("1").setSource("field", "value1").get();
refresh();
ensureGreen("test1");
ClusterStateResponse clusterState = client().admin().cluster().prepareState().get();
logger.info("Cluster state:\n{}", clusterState.getState());
final List<String> nodesWithShards = clusterState.getState().routingTable().index("test1").shard(0).activeShards().stream()
.map(shardRouting -> shardRouting.currentNodeId()).map(nodeId -> clusterState.getState().nodes().resolveNode(nodeId))
.map(DiscoveryNode::getName).collect(Collectors.toList());
client().execute(AddVotingConfigExclusionsAction.INSTANCE,
new AddVotingConfigExclusionsRequest(nodesWithShards.toArray(new String[0]))).get();
ensureGreen("test1");
String partitionedNode = nodes.stream().filter(n -> nodesWithShards.contains(n) == false).findFirst().get();
final NetworkDisruption disruptionScheme
= new NetworkDisruption(new NetworkDisruption.TwoPartitions(Collections.singleton(partitionedNode),
new HashSet<>(nodesWithShards)), NetworkDisruption.DISCONNECT);
internalCluster().setDisruptionScheme(disruptionScheme);
disruptionScheme.startDisrupting();
assertBusy(() -> {
for (String node : nodesWithShards) {
ClusterState state = client(node).admin().cluster().prepareState().setLocal(true).get().getState();
assertTrue(state.blocks().hasGlobalBlockWithId(NoMasterBlockService.NO_MASTER_BLOCK_ID));
}
});
GetResponse getResponse = client(randomFrom(nodesWithShards)).prepareGet("test1", "type1", "1").get();
assertExists(getResponse);
expectThrows(Exception.class, () -> client(partitionedNode).prepareGet("test1", "type1", "1").get());
SearchResponse countResponse = client(randomFrom(nodesWithShards)).prepareSearch("test1")
.setAllowPartialSearchResults(true).setSize(0).get();
assertHitCount(countResponse, 1L);
expectThrows(Exception.class, () -> client(partitionedNode).prepareSearch("test1")
.setAllowPartialSearchResults(true).setSize(0).get());
TimeValue timeout = TimeValue.timeValueMillis(200);
client(randomFrom(nodesWithShards)).prepareUpdate("test1", "type1", "1")
.setDoc(Requests.INDEX_CONTENT_TYPE, "field", "value2").setTimeout(timeout).get();
expectThrows(UnavailableShardsException.class, () -> client(partitionedNode).prepareUpdate("test1", "type1", "1")
.setDoc(Requests.INDEX_CONTENT_TYPE, "field", "value2").setTimeout(timeout).get());
client(randomFrom(nodesWithShards)).prepareIndex("test1", "type1").setId("1")
.setSource(XContentFactory.jsonBuilder().startObject().endObject()).setTimeout(timeout).get();
// dynamic mapping updates fail
expectThrows(MasterNotDiscoveredException.class, () -> client(randomFrom(nodesWithShards)).prepareIndex("test1", "type1")
.setId("1")
.setSource(XContentFactory.jsonBuilder().startObject().field("new_field", "value").endObject())
.setTimeout(timeout).get());
// dynamic index creation fails
expectThrows(MasterNotDiscoveredException.class, () -> client(randomFrom(nodesWithShards)).prepareIndex("test2", "type1")
.setId("1")
.setSource(XContentFactory.jsonBuilder().startObject().endObject()).setTimeout(timeout).get());
expectThrows(UnavailableShardsException.class, () -> client(partitionedNode).prepareIndex("test1", "type1").setId("1")
.setSource(XContentFactory.jsonBuilder().startObject().endObject()).setTimeout(timeout).get());
internalCluster().clearDisruptionScheme(true);
}
}

View File

@ -165,7 +165,8 @@ public class MasterDisruptionIT extends AbstractDisruptionTestCase {
* Verify that the proper block is applied when nodes lose their master
*/
public void testVerifyApiBlocksDuringPartition() throws Exception {
internalCluster().startNodes(3);
internalCluster().startNodes(3, Settings.builder()
.putNull(NoMasterBlockService.NO_MASTER_BLOCK_SETTING.getKey()).build());
// Makes sure that the get request can be executed on each node locally:
assertAcked(prepareCreate("test").setSettings(Settings.builder()

View File

@ -34,6 +34,8 @@ public class NoMasterBlockService {
RestStatus.SERVICE_UNAVAILABLE, EnumSet.of(ClusterBlockLevel.WRITE, ClusterBlockLevel.METADATA_WRITE));
public static final ClusterBlock NO_MASTER_BLOCK_ALL = new ClusterBlock(NO_MASTER_BLOCK_ID, "no master", true, true, false,
RestStatus.SERVICE_UNAVAILABLE, ClusterBlockLevel.ALL);
public static final ClusterBlock NO_MASTER_BLOCK_METADATA_WRITES = new ClusterBlock(NO_MASTER_BLOCK_ID, "no master", true, false, false,
RestStatus.SERVICE_UNAVAILABLE, EnumSet.of(ClusterBlockLevel.METADATA_WRITE));
public static final Setting<ClusterBlock> LEGACY_NO_MASTER_BLOCK_SETTING =
new Setting<>("discovery.zen.no_master_block", "write", NoMasterBlockService::parseNoMasterBlock,
@ -58,8 +60,10 @@ public class NoMasterBlockService {
return NO_MASTER_BLOCK_ALL;
case "write":
return NO_MASTER_BLOCK_WRITES;
case "metadata_write":
return NO_MASTER_BLOCK_METADATA_WRITES;
default:
throw new IllegalArgumentException("invalid no-master block [" + value + "], must be one of [all, write]");
throw new IllegalArgumentException("invalid no-master block [" + value + "], must be one of [all, write, metadata_write]");
}
}

View File

@ -73,6 +73,7 @@ import static org.elasticsearch.cluster.coordination.LeaderChecker.LEADER_CHECK_
import static org.elasticsearch.cluster.coordination.LeaderChecker.LEADER_CHECK_RETRY_COUNT_SETTING;
import static org.elasticsearch.cluster.coordination.LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING;
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_ALL;
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_METADATA_WRITES;
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_SETTING;
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_WRITES;
import static org.elasticsearch.cluster.coordination.Reconfigurator.CLUSTER_AUTO_SHRINK_VOTING_CONFIGURATION;
@ -1047,6 +1048,10 @@ public class CoordinatorTests extends AbstractCoordinatorTestCase {
testAppliesNoMasterBlock("all", NO_MASTER_BLOCK_ALL);
}
public void testAppliesNoMasterBlockMetadataWritesIfConfigured() {
testAppliesNoMasterBlock("metadata_write", NO_MASTER_BLOCK_METADATA_WRITES);
}
private void testAppliesNoMasterBlock(String noMasterBlockSetting, ClusterBlock expectedBlock) {
try (Cluster cluster = new Cluster(3)) {
cluster.runRandomly();

View File

@ -23,6 +23,7 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.test.ESTestCase;
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_ALL;
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_METADATA_WRITES;
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_SETTING;
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_WRITES;
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.LEGACY_NO_MASTER_BLOCK_SETTING;
@ -71,6 +72,11 @@ public class NoMasterBlockServiceTests extends ESTestCase {
assertThat(noMasterBlockService.getNoMasterBlock(), sameInstance(NO_MASTER_BLOCK_ALL));
}
public void testBlocksMetadataWritesIfConfiguredBySetting() {
createService(Settings.builder().put(NO_MASTER_BLOCK_SETTING.getKey(), "metadata_write").build());
assertThat(noMasterBlockService.getNoMasterBlock(), sameInstance(NO_MASTER_BLOCK_METADATA_WRITES));
}
public void testRejectsInvalidSetting() {
expectThrows(IllegalArgumentException.class, () ->
createService(Settings.builder().put(NO_MASTER_BLOCK_SETTING.getKey(), "unknown").build()));
@ -88,6 +94,9 @@ public class NoMasterBlockServiceTests extends ESTestCase {
clusterSettings.applySettings(Settings.builder().put(NO_MASTER_BLOCK_SETTING.getKey(), "write").build());
assertThat(noMasterBlockService.getNoMasterBlock(), sameInstance(NO_MASTER_BLOCK_WRITES));
clusterSettings.applySettings(Settings.builder().put(NO_MASTER_BLOCK_SETTING.getKey(), "metadata_write").build());
assertThat(noMasterBlockService.getNoMasterBlock(), sameInstance(NO_MASTER_BLOCK_METADATA_WRITES));
}
public void testIgnoresUpdatesToLegacySetting() {

View File

@ -37,6 +37,7 @@ import org.elasticsearch.action.admin.cluster.configuration.ClearVotingConfigExc
import org.elasticsearch.action.admin.cluster.node.stats.NodeStats;
import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags;
import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags.Flag;
import org.elasticsearch.cluster.coordination.NoMasterBlockService;
import org.elasticsearch.index.IndexingPressure;
import org.elasticsearch.action.support.replication.TransportReplicationAction;
import org.elasticsearch.client.Client;
@ -407,6 +408,9 @@ public final class InternalTestCluster extends TestCluster {
RandomNumbers.randomIntBetween(random, 1, 5));
builder.put(RecoverySettings.INDICES_RECOVERY_MAX_CONCURRENT_OPERATIONS_SETTING.getKey(),
RandomNumbers.randomIntBetween(random, 1, 4));
// TODO: currently we only randomize "cluster.no_master_block" between "write" and "metadata_write", as "all" is fragile
// and fails shards when a master abdicates, which breaks many tests.
builder.put(NoMasterBlockService.NO_MASTER_BLOCK_SETTING.getKey(), randomFrom(random,"write", "metadata_write"));
defaultSettings = builder.build();
executor = EsExecutors.newScaling("internal_test_cluster_executor", 0, Integer.MAX_VALUE, 0, TimeUnit.SECONDS,
EsExecutors.daemonThreadFactory("test_" + clusterName), new ThreadContext(Settings.EMPTY));