From 35e4f24467ce279772a9e28d8cc8703b25dc0ce5 Mon Sep 17 00:00:00 2001 From: Yannick Welsch Date: Fri, 12 Aug 2016 16:42:10 +0200 Subject: [PATCH] Remove dead code that promotes replica relocation target to primary (#19973) If a primary fails, an active replica is promoted to primary. Once we do the promotion, however, we are sure that the active replica is not relocating anymore. The reason is that when the primary fails, we first remove/cancel all initializing replicas (also if they are relocation targets). This is the only safe thing to do anyhow, because promoting relocating replica to primary would also mean that the replica recovery of the replica relocation target is suddenly promoted to primary relocation, which the recovery code treats in a different way. --- .../cluster/routing/RoutingNodes.java | 27 ++---- .../allocation/AllocationCommandsTests.java | 89 ++++++++++--------- 2 files changed, 58 insertions(+), 58 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/RoutingNodes.java b/core/src/main/java/org/elasticsearch/cluster/routing/RoutingNodes.java index 1b410ce781e..eb9a18228f3 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/RoutingNodes.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/RoutingNodes.java @@ -546,24 +546,15 @@ public class RoutingNodes implements Iterable { assert failedShard.active(); if (failedShard.primary()) { // promote active replica to primary if active replica exists - ShardRouting candidate = activeReplica(failedShard.shardId()); - if (candidate == null) { + ShardRouting activeReplica = activeReplica(failedShard.shardId()); + if (activeReplica == null) { moveToUnassigned(failedShard, unassignedInfo); } else { + // if the activeReplica was relocating before this call to failShard, its relocation was cancelled above when we + // failed initializing replica shards (and moved replica relocation source back to started) + assert activeReplica.started() : "replica relocation should have been cancelled: " + activeReplica; movePrimaryToUnassignedAndDemoteToReplica(failedShard, unassignedInfo); - ShardRouting primarySwappedCandidate = promoteAssignedReplicaShardToPrimary(candidate); - if (primarySwappedCandidate.relocatingNodeId() != null) { - // its also relocating, make sure to move the other routing to primary - RoutingNode node = node(primarySwappedCandidate.relocatingNodeId()); - if (node != null) { - for (ShardRouting shardRouting : node) { - if (shardRouting.shardId().equals(primarySwappedCandidate.shardId()) && !shardRouting.primary()) { - promoteAssignedReplicaShardToPrimary(shardRouting); - break; - } - } - } - } + ShardRouting primarySwappedCandidate = promoteActiveReplicaShardToPrimary(activeReplica); if (IndexMetaData.isIndexUsingShadowReplicas(indexMetaData.getSettings())) { reinitShadowPrimary(primarySwappedCandidate); } @@ -621,8 +612,8 @@ public class RoutingNodes implements Iterable { * @param replicaShard the replica shard to be promoted to primary * @return the resulting primary shard */ - private ShardRouting promoteAssignedReplicaShardToPrimary(ShardRouting replicaShard) { - assert replicaShard.unassigned() == false : "unassigned shard cannot be promoted to primary: " + replicaShard; + private ShardRouting promoteActiveReplicaShardToPrimary(ShardRouting replicaShard) { + assert replicaShard.active() : "non-active shard cannot be promoted to primary: " + replicaShard; assert replicaShard.primary() == false : "primary shard cannot be promoted to primary: " + replicaShard; ShardRouting primaryShard = replicaShard.moveToPrimary(); updateAssigned(replicaShard, primaryShard); @@ -729,7 +720,7 @@ public class RoutingNodes implements Iterable { /** * Moves assigned primary to unassigned and demotes it to a replica. - * Used in conjunction with {@link #promoteAssignedReplicaShardToPrimary} when an active replica is promoted to primary. + * Used in conjunction with {@link #promoteActiveReplicaShardToPrimary} when an active replica is promoted to primary. */ private ShardRouting movePrimaryToUnassignedAndDemoteToReplica(ShardRouting shard, UnassignedInfo unassignedInfo) { assert shard.unassigned() == false : "only assigned shards can be moved to unassigned (" + shard + ")"; diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java index ed91d98e532..354b18d0b2a 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java @@ -374,50 +374,59 @@ public class AllocationCommandsTests extends ESAllocationTestCase { assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(INITIALIZING).size(), equalTo(1)); - logger.info("--> cancel the move of the replica shard"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node3", false)), false, false); - clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); - assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node2").shardsWithState(STARTED).size(), equalTo(1)); + if (randomBoolean()) { + logger.info("--> cancel the primary allocation (with allow_primary set to true)"); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", true)), false, false); + clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); + assertThat(rerouteResult.changed(), equalTo(true)); + assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0)); + assertThat(clusterState.getRoutingNodes().node("node2").shardsWithState(STARTED).iterator().next().primary(), equalTo(true)); + assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(0)); + } else { + logger.info("--> cancel the move of the replica shard"); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node3", false)), false, false); + clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); + assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node2").shardsWithState(STARTED).size(), equalTo(1)); - logger.info("--> move the replica shard again"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, "node2", "node3")), false, false); - clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); - assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node2").shardsWithState(RELOCATING).size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(INITIALIZING).size(), equalTo(1)); + logger.info("--> move the replica shard again"); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, "node2", "node3")), false, false); + clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); + assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node2").shardsWithState(RELOCATING).size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(INITIALIZING).size(), equalTo(1)); - logger.info("--> cancel the source replica shard"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node2", false)), false, false); - clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); - assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(0)); - assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(INITIALIZING).size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(INITIALIZING).get(0).relocatingNodeId(), nullValue()); + logger.info("--> cancel the source replica shard"); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node2", false)), false, false); + clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); + assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(0)); + assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(INITIALIZING).size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(INITIALIZING).get(0).relocatingNodeId(), nullValue()); - logger.info("--> start the former target replica shard"); - rerouteResult = allocation.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING)); - clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); - assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(0)); - assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(STARTED).size(), equalTo(1)); + logger.info("--> start the former target replica shard"); + rerouteResult = allocation.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING)); + clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); + assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(0)); + assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(STARTED).size(), equalTo(1)); - - logger.info("--> cancel the primary allocation (with allow_primary set to true)"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", true)), false, false); - clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); - assertThat(rerouteResult.changed(), equalTo(true)); - assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(STARTED).iterator().next().primary(), equalTo(true)); - assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0)); - assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(0)); + logger.info("--> cancel the primary allocation (with allow_primary set to true)"); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", true)), false, false); + clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); + assertThat(rerouteResult.changed(), equalTo(true)); + assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(STARTED).iterator().next().primary(), equalTo(true)); + assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0)); + assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(0)); + } } public void testSerialization() throws Exception {