[TEST] Randomly add and remove no_master blocks in IndicesClusterStateServiceRandomUpdatesTests

Checks that IndicesClusterStateService stays consistent with incoming cluster states that contain no_master blocks (especially
discovery.zen.no_master_block=all which disables state persistence). In particular this checks that active shards which have no in-memory data
structures on a node are failed.
This commit is contained in:
Yannick Welsch 2017-04-18 14:19:28 +02:00
parent ac41fb2c4a
commit 0b2cb68f6f
2 changed files with 69 additions and 36 deletions

View File

@ -91,7 +91,11 @@ public abstract class AbstractIndicesClusterStateServiceTestCase extends ESTestC
RoutingNode localRoutingNode = state.getRoutingNodes().node(state.getNodes().getLocalNodeId());
if (localRoutingNode != null) {
if (enableRandomFailures == false) {
assertThat("failed shard cache should be empty", failedShardsCache.values(), empty());
// initializing a shard should succeed when enableRandomFailures is disabled
// active shards can be failed if state persistence was disabled in an earlier CS update
if (failedShardsCache.values().stream().anyMatch(ShardRouting::initializing)) {
fail("failed shard cache should not contain initializing shard routing: " + failedShardsCache.values());
}
}
// check that all shards in local routing nodes have been allocated
for (ShardRouting shardRouting : localRoutingNode) {
@ -100,41 +104,50 @@ public abstract class AbstractIndicesClusterStateServiceTestCase extends ESTestC
MockIndexShard shard = indicesService.getShardOrNull(shardRouting.shardId());
ShardRouting failedShard = failedShardsCache.get(shardRouting.shardId());
if (enableRandomFailures) {
if (shard == null && failedShard == null) {
fail("Shard with id " + shardRouting + " expected but missing in indicesService and failedShardsCache");
if (state.blocks().disableStatePersistence()) {
if (shard != null) {
fail("Shard with id " + shardRouting + " should be removed from indicesService due to disabled state persistence");
}
} else {
if (failedShard != null && failedShard.isSameAllocation(shardRouting) == false) {
fail("Shard cache has not been properly cleaned for " + failedShard);
}
} else {
if (shard == null) {
fail("Shard with id " + shardRouting + " expected but missing in indicesService");
if (shard == null && failedShard == null) {
// shard must either be there or there must be a failure
fail("Shard with id " + shardRouting + " expected but missing in indicesService and failedShardsCache");
}
}
if (shard != null) {
AllocatedIndex<? extends Shard> indexService = indicesService.indexService(index);
assertTrue("Index " + index + " expected but missing in indicesService", indexService != null);
// index metadata has been updated
assertThat(indexService.getIndexSettings().getIndexMetaData(), equalTo(indexMetaData));
// shard has been created
if (enableRandomFailures == false || failedShard == null) {
assertTrue("Shard with id " + shardRouting + " expected but missing in indexService", shard != null);
// shard has latest shard routing
assertThat(shard.routingEntry(), equalTo(shardRouting));
if (enableRandomFailures == false) {
if (shard == null && shardRouting.initializing() && failedShard == shardRouting) {
// initializing a shard should succeed when enableRandomFailures is disabled
fail("Shard with id " + shardRouting + " expected but missing in indicesService " + failedShard);
}
}
if (shard.routingEntry().primary() && shard.routingEntry().active()) {
IndexShardRoutingTable shardRoutingTable = state.routingTable().shardRoutingTable(shard.shardId());
Set<String> activeIds = shardRoutingTable.activeShards().stream()
.map(r -> r.allocationId().getId()).collect(Collectors.toSet());
Set<String> initializingIds = shardRoutingTable.getAllInitializingShards().stream()
.map(r -> r.allocationId().getId()).collect(Collectors.toSet());
assertThat(shard.routingEntry() + " isn't updated with active aIDs", shard.activeAllocationIds, equalTo(activeIds));
assertThat(shard.routingEntry() + " isn't updated with init aIDs", shard.initializingAllocationIds,
equalTo(initializingIds));
if (shard != null) {
AllocatedIndex<? extends Shard> indexService = indicesService.indexService(index);
assertTrue("Index " + index + " expected but missing in indicesService", indexService != null);
// index metadata has been updated
assertThat(indexService.getIndexSettings().getIndexMetaData(), equalTo(indexMetaData));
// shard has been created
if (enableRandomFailures == false || failedShard == null) {
assertTrue("Shard with id " + shardRouting + " expected but missing in indexService", shard != null);
// shard has latest shard routing
assertThat(shard.routingEntry(), equalTo(shardRouting));
}
if (shard.routingEntry().primary() && shard.routingEntry().active()) {
IndexShardRoutingTable shardRoutingTable = state.routingTable().shardRoutingTable(shard.shardId());
Set<String> activeIds = shardRoutingTable.activeShards().stream()
.map(r -> r.allocationId().getId()).collect(Collectors.toSet());
Set<String> initializingIds = shardRoutingTable.getAllInitializingShards().stream()
.map(r -> r.allocationId().getId()).collect(Collectors.toSet());
assertThat(shard.routingEntry() + " isn't updated with active aIDs", shard.activeAllocationIds,
equalTo(activeIds));
assertThat(shard.routingEntry() + " isn't updated with init aIDs", shard.initializingAllocationIds,
equalTo(initializingIds));
}
}
}
}
@ -142,6 +155,10 @@ public abstract class AbstractIndicesClusterStateServiceTestCase extends ESTestC
// all other shards / indices have been cleaned up
for (AllocatedIndex<? extends Shard> indexService : indicesService) {
if (state.blocks().disableStatePersistence()) {
fail("Index service " + indexService.index() + " should be removed from indicesService due to disabled state persistence");
}
assertTrue(state.metaData().getIndexSafe(indexService.index()) != null);
boolean shardsFound = false;
@ -158,13 +175,9 @@ public abstract class AbstractIndicesClusterStateServiceTestCase extends ESTestC
}
if (shardsFound == false) {
if (enableRandomFailures) {
// check if we have shards of that index in failedShardsCache
// if yes, we might not have cleaned the index as failedShardsCache can be populated by another thread
assertFalse(failedShardsCache.keySet().stream().noneMatch(shardId -> shardId.getIndex().equals(indexService.index())));
} else {
fail("index service for index " + indexService.index() + " has no shards");
}
// check if we have shards of that index in failedShardsCache
// if yes, we might not have cleaned the index as failedShardsCache can be populated by another thread
assertFalse(failedShardsCache.keySet().stream().noneMatch(shardId -> shardId.getIndex().equals(indexService.index())));
}
}

View File

@ -32,6 +32,8 @@ import org.elasticsearch.action.support.replication.ClusterStateCreationUtils;
import org.elasticsearch.cluster.ClusterChangedEvent;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.action.shard.ShardStateAction;
import org.elasticsearch.cluster.block.ClusterBlock;
import org.elasticsearch.cluster.block.ClusterBlocks;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.cluster.node.DiscoveryNode;
@ -44,6 +46,7 @@ import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.UUIDs;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.discovery.DiscoverySettings;
import org.elasticsearch.index.Index;
import org.elasticsearch.indices.recovery.PeerRecoveryTargetService;
import org.elasticsearch.repositories.RepositoriesService;
@ -231,6 +234,23 @@ public class IndicesClusterStateServiceRandomUpdatesTests extends AbstractIndice
public ClusterState randomlyUpdateClusterState(ClusterState state,
Map<DiscoveryNode, IndicesClusterStateService> clusterStateServiceMap,
Supplier<MockIndicesService> indicesServiceSupplier) {
// randomly remove no_master blocks
if (randomBoolean() && state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID)) {
state = ClusterState.builder(state).blocks(
ClusterBlocks.builder().blocks(state.blocks()).removeGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID)).build();
}
// randomly add no_master blocks
if (rarely() && state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID) == false) {
ClusterBlock block = randomBoolean() ? DiscoverySettings.NO_MASTER_BLOCK_ALL : DiscoverySettings.NO_MASTER_BLOCK_WRITES;
state = ClusterState.builder(state).blocks(ClusterBlocks.builder().blocks(state.blocks()).addGlobalBlock(block)).build();
}
// if no_master block is in place, make no other cluster state changes
if (state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID)) {
return state;
}
// randomly create new indices (until we have 200 max)
for (int i = 0; i < randomInt(5); i++) {
if (state.metaData().indices().size() > 200) {