diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/RoutingNodes.java b/core/src/main/java/org/elasticsearch/cluster/routing/RoutingNodes.java index 1b410ce781e..eb9a18228f3 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/RoutingNodes.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/RoutingNodes.java @@ -546,24 +546,15 @@ public class RoutingNodes implements Iterable { assert failedShard.active(); if (failedShard.primary()) { // promote active replica to primary if active replica exists - ShardRouting candidate = activeReplica(failedShard.shardId()); - if (candidate == null) { + ShardRouting activeReplica = activeReplica(failedShard.shardId()); + if (activeReplica == null) { moveToUnassigned(failedShard, unassignedInfo); } else { + // if the activeReplica was relocating before this call to failShard, its relocation was cancelled above when we + // failed initializing replica shards (and moved replica relocation source back to started) + assert activeReplica.started() : "replica relocation should have been cancelled: " + activeReplica; movePrimaryToUnassignedAndDemoteToReplica(failedShard, unassignedInfo); - ShardRouting primarySwappedCandidate = promoteAssignedReplicaShardToPrimary(candidate); - if (primarySwappedCandidate.relocatingNodeId() != null) { - // its also relocating, make sure to move the other routing to primary - RoutingNode node = node(primarySwappedCandidate.relocatingNodeId()); - if (node != null) { - for (ShardRouting shardRouting : node) { - if (shardRouting.shardId().equals(primarySwappedCandidate.shardId()) && !shardRouting.primary()) { - promoteAssignedReplicaShardToPrimary(shardRouting); - break; - } - } - } - } + ShardRouting primarySwappedCandidate = promoteActiveReplicaShardToPrimary(activeReplica); if (IndexMetaData.isIndexUsingShadowReplicas(indexMetaData.getSettings())) { reinitShadowPrimary(primarySwappedCandidate); } @@ -621,8 +612,8 @@ public class RoutingNodes implements Iterable { * @param replicaShard the replica shard to be promoted to primary * @return the resulting primary shard */ - private ShardRouting promoteAssignedReplicaShardToPrimary(ShardRouting replicaShard) { - assert replicaShard.unassigned() == false : "unassigned shard cannot be promoted to primary: " + replicaShard; + private ShardRouting promoteActiveReplicaShardToPrimary(ShardRouting replicaShard) { + assert replicaShard.active() : "non-active shard cannot be promoted to primary: " + replicaShard; assert replicaShard.primary() == false : "primary shard cannot be promoted to primary: " + replicaShard; ShardRouting primaryShard = replicaShard.moveToPrimary(); updateAssigned(replicaShard, primaryShard); @@ -729,7 +720,7 @@ public class RoutingNodes implements Iterable { /** * Moves assigned primary to unassigned and demotes it to a replica. - * Used in conjunction with {@link #promoteAssignedReplicaShardToPrimary} when an active replica is promoted to primary. + * Used in conjunction with {@link #promoteActiveReplicaShardToPrimary} when an active replica is promoted to primary. */ private ShardRouting movePrimaryToUnassignedAndDemoteToReplica(ShardRouting shard, UnassignedInfo unassignedInfo) { assert shard.unassigned() == false : "only assigned shards can be moved to unassigned (" + shard + ")"; diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java index ed91d98e532..354b18d0b2a 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java @@ -374,50 +374,59 @@ public class AllocationCommandsTests extends ESAllocationTestCase { assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(INITIALIZING).size(), equalTo(1)); - logger.info("--> cancel the move of the replica shard"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node3", false)), false, false); - clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); - assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node2").shardsWithState(STARTED).size(), equalTo(1)); + if (randomBoolean()) { + logger.info("--> cancel the primary allocation (with allow_primary set to true)"); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", true)), false, false); + clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); + assertThat(rerouteResult.changed(), equalTo(true)); + assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0)); + assertThat(clusterState.getRoutingNodes().node("node2").shardsWithState(STARTED).iterator().next().primary(), equalTo(true)); + assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(0)); + } else { + logger.info("--> cancel the move of the replica shard"); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node3", false)), false, false); + clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); + assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node2").shardsWithState(STARTED).size(), equalTo(1)); - logger.info("--> move the replica shard again"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, "node2", "node3")), false, false); - clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); - assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node2").shardsWithState(RELOCATING).size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(INITIALIZING).size(), equalTo(1)); + logger.info("--> move the replica shard again"); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, "node2", "node3")), false, false); + clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); + assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node2").shardsWithState(RELOCATING).size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(INITIALIZING).size(), equalTo(1)); - logger.info("--> cancel the source replica shard"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node2", false)), false, false); - clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); - assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(0)); - assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(INITIALIZING).size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(INITIALIZING).get(0).relocatingNodeId(), nullValue()); + logger.info("--> cancel the source replica shard"); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node2", false)), false, false); + clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); + assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(0)); + assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(INITIALIZING).size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(INITIALIZING).get(0).relocatingNodeId(), nullValue()); - logger.info("--> start the former target replica shard"); - rerouteResult = allocation.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING)); - clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); - assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); - assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(0)); - assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(STARTED).size(), equalTo(1)); + logger.info("--> start the former target replica shard"); + rerouteResult = allocation.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING)); + clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); + assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(0)); + assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(STARTED).size(), equalTo(1)); - - logger.info("--> cancel the primary allocation (with allow_primary set to true)"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", true)), false, false); - clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); - assertThat(rerouteResult.changed(), equalTo(true)); - assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(STARTED).iterator().next().primary(), equalTo(true)); - assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0)); - assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(0)); + logger.info("--> cancel the primary allocation (with allow_primary set to true)"); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", true)), false, false); + clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); + assertThat(rerouteResult.changed(), equalTo(true)); + assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(STARTED).iterator().next().primary(), equalTo(true)); + assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(0)); + assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(0)); + } } public void testSerialization() throws Exception {