Provide option to allow writes when master is down (#60605)
Elasticsearch currently blocks writes by default when a master is unavailable. The cluster.no_master_block setting allows a user to change this behavior to also block reads when a master is unavailable. This PR introduces a way to now also still allow writes when a master is offline. Writes will continue to work as long as routing table changes are not needed (as those require the master for consistency), or if dynamic mapping updates are not required (as again, these require the master for consistency). Eventually we should switch the default of cluster.no_master_block to this new mode.
This commit is contained in:
parent
6644f2283d
commit
25404cbe3d
|
@ -220,7 +220,7 @@ or may become unstable or intolerant of certain failures.
|
|||
|
||||
[[no-master-block]]`cluster.no_master_block`::
|
||||
Specifies which operations are rejected when there is no active master in a
|
||||
cluster. This setting has two valid values:
|
||||
cluster. This setting has three valid values:
|
||||
+
|
||||
--
|
||||
`all`::: All operations on the node (both read and write operations) are rejected.
|
||||
|
@ -232,6 +232,12 @@ based on the last known cluster configuration. This situation may result in
|
|||
partial reads of stale data as this node may be isolated from the rest of the
|
||||
cluster.
|
||||
|
||||
`metadata_write`::: Only metadata write operations (e.g. mapping updates,
|
||||
routing table changes) are rejected but regular indexing operations continue
|
||||
to work. Read and write operations succeed, based on the last known cluster
|
||||
configuration. This situation may result in partial reads of stale data as
|
||||
this node may be isolated from the rest of the cluster.
|
||||
|
||||
[NOTE]
|
||||
===============================
|
||||
* The `cluster.no_master_block` setting doesn't apply to nodes-based APIs
|
||||
|
|
|
@ -20,6 +20,9 @@
|
|||
package org.elasticsearch.cluster;
|
||||
|
||||
import org.elasticsearch.action.ActionRequestBuilder;
|
||||
import org.elasticsearch.action.UnavailableShardsException;
|
||||
import org.elasticsearch.action.admin.cluster.configuration.AddVotingConfigExclusionsAction;
|
||||
import org.elasticsearch.action.admin.cluster.configuration.AddVotingConfigExclusionsRequest;
|
||||
import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
|
||||
import org.elasticsearch.action.bulk.BulkRequestBuilder;
|
||||
import org.elasticsearch.action.get.GetResponse;
|
||||
|
@ -27,9 +30,11 @@ import org.elasticsearch.action.search.SearchResponse;
|
|||
import org.elasticsearch.action.support.AutoCreateIndex;
|
||||
import org.elasticsearch.client.Client;
|
||||
import org.elasticsearch.client.Requests;
|
||||
import org.elasticsearch.cluster.action.index.MappingUpdatedAction;
|
||||
import org.elasticsearch.cluster.block.ClusterBlockException;
|
||||
import org.elasticsearch.cluster.coordination.NoMasterBlockService;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetadata;
|
||||
import org.elasticsearch.cluster.node.DiscoveryNode;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.unit.TimeValue;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
|
@ -49,6 +54,7 @@ import java.util.Collection;
|
|||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertExists;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
|
||||
|
@ -261,4 +267,85 @@ public class NoMasterNodeIT extends ESIntegTestCase {
|
|||
|
||||
internalCluster().clearDisruptionScheme(true);
|
||||
}
|
||||
|
||||
public void testNoMasterActionsMetadataWriteMasterBlock() throws Exception {
|
||||
Settings settings = Settings.builder()
|
||||
.put(NoMasterBlockService.NO_MASTER_BLOCK_SETTING.getKey(), "metadata_write")
|
||||
.put(MappingUpdatedAction.INDICES_MAPPING_DYNAMIC_TIMEOUT_SETTING.getKey(), "100ms")
|
||||
.build();
|
||||
|
||||
final List<String> nodes = internalCluster().startNodes(3, settings);
|
||||
|
||||
prepareCreate("test1").setSettings(
|
||||
Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)).get();
|
||||
client().admin().cluster().prepareHealth("_all").setWaitForGreenStatus().get();
|
||||
client().prepareIndex("test1", "type1").setId("1").setSource("field", "value1").get();
|
||||
refresh();
|
||||
|
||||
ensureGreen("test1");
|
||||
|
||||
ClusterStateResponse clusterState = client().admin().cluster().prepareState().get();
|
||||
logger.info("Cluster state:\n{}", clusterState.getState());
|
||||
|
||||
final List<String> nodesWithShards = clusterState.getState().routingTable().index("test1").shard(0).activeShards().stream()
|
||||
.map(shardRouting -> shardRouting.currentNodeId()).map(nodeId -> clusterState.getState().nodes().resolveNode(nodeId))
|
||||
.map(DiscoveryNode::getName).collect(Collectors.toList());
|
||||
|
||||
client().execute(AddVotingConfigExclusionsAction.INSTANCE,
|
||||
new AddVotingConfigExclusionsRequest(nodesWithShards.toArray(new String[0]))).get();
|
||||
ensureGreen("test1");
|
||||
|
||||
String partitionedNode = nodes.stream().filter(n -> nodesWithShards.contains(n) == false).findFirst().get();
|
||||
|
||||
final NetworkDisruption disruptionScheme
|
||||
= new NetworkDisruption(new NetworkDisruption.TwoPartitions(Collections.singleton(partitionedNode),
|
||||
new HashSet<>(nodesWithShards)), NetworkDisruption.DISCONNECT);
|
||||
internalCluster().setDisruptionScheme(disruptionScheme);
|
||||
disruptionScheme.startDisrupting();
|
||||
|
||||
assertBusy(() -> {
|
||||
for (String node : nodesWithShards) {
|
||||
ClusterState state = client(node).admin().cluster().prepareState().setLocal(true).get().getState();
|
||||
assertTrue(state.blocks().hasGlobalBlockWithId(NoMasterBlockService.NO_MASTER_BLOCK_ID));
|
||||
}
|
||||
});
|
||||
|
||||
GetResponse getResponse = client(randomFrom(nodesWithShards)).prepareGet("test1", "type1", "1").get();
|
||||
assertExists(getResponse);
|
||||
|
||||
expectThrows(Exception.class, () -> client(partitionedNode).prepareGet("test1", "type1", "1").get());
|
||||
|
||||
SearchResponse countResponse = client(randomFrom(nodesWithShards)).prepareSearch("test1")
|
||||
.setAllowPartialSearchResults(true).setSize(0).get();
|
||||
assertHitCount(countResponse, 1L);
|
||||
|
||||
expectThrows(Exception.class, () -> client(partitionedNode).prepareSearch("test1")
|
||||
.setAllowPartialSearchResults(true).setSize(0).get());
|
||||
|
||||
TimeValue timeout = TimeValue.timeValueMillis(200);
|
||||
client(randomFrom(nodesWithShards)).prepareUpdate("test1", "type1", "1")
|
||||
.setDoc(Requests.INDEX_CONTENT_TYPE, "field", "value2").setTimeout(timeout).get();
|
||||
|
||||
expectThrows(UnavailableShardsException.class, () -> client(partitionedNode).prepareUpdate("test1", "type1", "1")
|
||||
.setDoc(Requests.INDEX_CONTENT_TYPE, "field", "value2").setTimeout(timeout).get());
|
||||
|
||||
client(randomFrom(nodesWithShards)).prepareIndex("test1", "type1").setId("1")
|
||||
.setSource(XContentFactory.jsonBuilder().startObject().endObject()).setTimeout(timeout).get();
|
||||
|
||||
// dynamic mapping updates fail
|
||||
expectThrows(MasterNotDiscoveredException.class, () -> client(randomFrom(nodesWithShards)).prepareIndex("test1", "type1")
|
||||
.setId("1")
|
||||
.setSource(XContentFactory.jsonBuilder().startObject().field("new_field", "value").endObject())
|
||||
.setTimeout(timeout).get());
|
||||
|
||||
// dynamic index creation fails
|
||||
expectThrows(MasterNotDiscoveredException.class, () -> client(randomFrom(nodesWithShards)).prepareIndex("test2", "type1")
|
||||
.setId("1")
|
||||
.setSource(XContentFactory.jsonBuilder().startObject().endObject()).setTimeout(timeout).get());
|
||||
|
||||
expectThrows(UnavailableShardsException.class, () -> client(partitionedNode).prepareIndex("test1", "type1").setId("1")
|
||||
.setSource(XContentFactory.jsonBuilder().startObject().endObject()).setTimeout(timeout).get());
|
||||
|
||||
internalCluster().clearDisruptionScheme(true);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -165,7 +165,8 @@ public class MasterDisruptionIT extends AbstractDisruptionTestCase {
|
|||
* Verify that the proper block is applied when nodes lose their master
|
||||
*/
|
||||
public void testVerifyApiBlocksDuringPartition() throws Exception {
|
||||
internalCluster().startNodes(3);
|
||||
internalCluster().startNodes(3, Settings.builder()
|
||||
.putNull(NoMasterBlockService.NO_MASTER_BLOCK_SETTING.getKey()).build());
|
||||
|
||||
// Makes sure that the get request can be executed on each node locally:
|
||||
assertAcked(prepareCreate("test").setSettings(Settings.builder()
|
||||
|
|
|
@ -34,6 +34,8 @@ public class NoMasterBlockService {
|
|||
RestStatus.SERVICE_UNAVAILABLE, EnumSet.of(ClusterBlockLevel.WRITE, ClusterBlockLevel.METADATA_WRITE));
|
||||
public static final ClusterBlock NO_MASTER_BLOCK_ALL = new ClusterBlock(NO_MASTER_BLOCK_ID, "no master", true, true, false,
|
||||
RestStatus.SERVICE_UNAVAILABLE, ClusterBlockLevel.ALL);
|
||||
public static final ClusterBlock NO_MASTER_BLOCK_METADATA_WRITES = new ClusterBlock(NO_MASTER_BLOCK_ID, "no master", true, false, false,
|
||||
RestStatus.SERVICE_UNAVAILABLE, EnumSet.of(ClusterBlockLevel.METADATA_WRITE));
|
||||
|
||||
public static final Setting<ClusterBlock> LEGACY_NO_MASTER_BLOCK_SETTING =
|
||||
new Setting<>("discovery.zen.no_master_block", "write", NoMasterBlockService::parseNoMasterBlock,
|
||||
|
@ -58,8 +60,10 @@ public class NoMasterBlockService {
|
|||
return NO_MASTER_BLOCK_ALL;
|
||||
case "write":
|
||||
return NO_MASTER_BLOCK_WRITES;
|
||||
case "metadata_write":
|
||||
return NO_MASTER_BLOCK_METADATA_WRITES;
|
||||
default:
|
||||
throw new IllegalArgumentException("invalid no-master block [" + value + "], must be one of [all, write]");
|
||||
throw new IllegalArgumentException("invalid no-master block [" + value + "], must be one of [all, write, metadata_write]");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -73,6 +73,7 @@ import static org.elasticsearch.cluster.coordination.LeaderChecker.LEADER_CHECK_
|
|||
import static org.elasticsearch.cluster.coordination.LeaderChecker.LEADER_CHECK_RETRY_COUNT_SETTING;
|
||||
import static org.elasticsearch.cluster.coordination.LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING;
|
||||
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_ALL;
|
||||
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_METADATA_WRITES;
|
||||
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_SETTING;
|
||||
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_WRITES;
|
||||
import static org.elasticsearch.cluster.coordination.Reconfigurator.CLUSTER_AUTO_SHRINK_VOTING_CONFIGURATION;
|
||||
|
@ -1047,6 +1048,10 @@ public class CoordinatorTests extends AbstractCoordinatorTestCase {
|
|||
testAppliesNoMasterBlock("all", NO_MASTER_BLOCK_ALL);
|
||||
}
|
||||
|
||||
public void testAppliesNoMasterBlockMetadataWritesIfConfigured() {
|
||||
testAppliesNoMasterBlock("metadata_write", NO_MASTER_BLOCK_METADATA_WRITES);
|
||||
}
|
||||
|
||||
private void testAppliesNoMasterBlock(String noMasterBlockSetting, ClusterBlock expectedBlock) {
|
||||
try (Cluster cluster = new Cluster(3)) {
|
||||
cluster.runRandomly();
|
||||
|
|
|
@ -23,6 +23,7 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.test.ESTestCase;
|
||||
|
||||
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_ALL;
|
||||
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_METADATA_WRITES;
|
||||
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_SETTING;
|
||||
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_WRITES;
|
||||
import static org.elasticsearch.cluster.coordination.NoMasterBlockService.LEGACY_NO_MASTER_BLOCK_SETTING;
|
||||
|
@ -71,6 +72,11 @@ public class NoMasterBlockServiceTests extends ESTestCase {
|
|||
assertThat(noMasterBlockService.getNoMasterBlock(), sameInstance(NO_MASTER_BLOCK_ALL));
|
||||
}
|
||||
|
||||
public void testBlocksMetadataWritesIfConfiguredBySetting() {
|
||||
createService(Settings.builder().put(NO_MASTER_BLOCK_SETTING.getKey(), "metadata_write").build());
|
||||
assertThat(noMasterBlockService.getNoMasterBlock(), sameInstance(NO_MASTER_BLOCK_METADATA_WRITES));
|
||||
}
|
||||
|
||||
public void testRejectsInvalidSetting() {
|
||||
expectThrows(IllegalArgumentException.class, () ->
|
||||
createService(Settings.builder().put(NO_MASTER_BLOCK_SETTING.getKey(), "unknown").build()));
|
||||
|
@ -88,6 +94,9 @@ public class NoMasterBlockServiceTests extends ESTestCase {
|
|||
|
||||
clusterSettings.applySettings(Settings.builder().put(NO_MASTER_BLOCK_SETTING.getKey(), "write").build());
|
||||
assertThat(noMasterBlockService.getNoMasterBlock(), sameInstance(NO_MASTER_BLOCK_WRITES));
|
||||
|
||||
clusterSettings.applySettings(Settings.builder().put(NO_MASTER_BLOCK_SETTING.getKey(), "metadata_write").build());
|
||||
assertThat(noMasterBlockService.getNoMasterBlock(), sameInstance(NO_MASTER_BLOCK_METADATA_WRITES));
|
||||
}
|
||||
|
||||
public void testIgnoresUpdatesToLegacySetting() {
|
||||
|
|
|
@ -37,6 +37,7 @@ import org.elasticsearch.action.admin.cluster.configuration.ClearVotingConfigExc
|
|||
import org.elasticsearch.action.admin.cluster.node.stats.NodeStats;
|
||||
import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags;
|
||||
import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags.Flag;
|
||||
import org.elasticsearch.cluster.coordination.NoMasterBlockService;
|
||||
import org.elasticsearch.index.IndexingPressure;
|
||||
import org.elasticsearch.action.support.replication.TransportReplicationAction;
|
||||
import org.elasticsearch.client.Client;
|
||||
|
@ -407,6 +408,9 @@ public final class InternalTestCluster extends TestCluster {
|
|||
RandomNumbers.randomIntBetween(random, 1, 5));
|
||||
builder.put(RecoverySettings.INDICES_RECOVERY_MAX_CONCURRENT_OPERATIONS_SETTING.getKey(),
|
||||
RandomNumbers.randomIntBetween(random, 1, 4));
|
||||
// TODO: currently we only randomize "cluster.no_master_block" between "write" and "metadata_write", as "all" is fragile
|
||||
// and fails shards when a master abdicates, which breaks many tests.
|
||||
builder.put(NoMasterBlockService.NO_MASTER_BLOCK_SETTING.getKey(), randomFrom(random,"write", "metadata_write"));
|
||||
defaultSettings = builder.build();
|
||||
executor = EsExecutors.newScaling("internal_test_cluster_executor", 0, Integer.MAX_VALUE, 0, TimeUnit.SECONDS,
|
||||
EsExecutors.daemonThreadFactory("test_" + clusterName), new ThreadContext(Settings.EMPTY));
|
||||
|
|
Loading…
Reference in New Issue