Fix issue when relocation source and target routings are failed in same batch update

PR #19715 made AllocationService less lenient, requiring ShardRouting instances that are passed to its applyStartedShards and
applyFailedShards methods to exist in the routing table. As primary shard failures also fail initializing replica shards,
concurrent replica shard failures that are treated in the same cluster state update might not reference existing replica entries
in the routing table anymore. To solve this, PR #19715 ordered the failures by first handling replica before
primary failures. There are other failures that influence more than one routing entry, however. When we have a failed shard entry
for both a relocation source and target, then, depending on the order, either one or the other might point to an out-dated shard
entry. As finding a good order is more difficult than applying the failures, this commit re-adds parts of the ShardRouting
re-resolve logic so that the applyFailedShards method can properly treat shard failure batches.
This commit is contained in:
Yannick Welsch 2016-08-08 11:23:32 +02:00
parent 97dfa2ba40
commit 180eff14dd

View File

@ -235,18 +235,24 @@ public class AllocationService extends AbstractComponent {
FailedRerouteAllocation allocation = new FailedRerouteAllocation(allocationDeciders, routingNodes, clusterState, failedShards,
clusterInfoService.getClusterInfo(), currentNanoTime);
// as failing primaries also fail associated replicas, we fail replicas first here to avoid re-resolving replica ShardRouting
List<FailedRerouteAllocation.FailedShard> orderedFailedShards = new ArrayList<>(failedShards);
orderedFailedShards.sort(Comparator.comparing(failedShard -> failedShard.routingEntry.primary()));
for (FailedRerouteAllocation.FailedShard failedShardEntry : orderedFailedShards) {
ShardRouting failedShard = failedShardEntry.routingEntry;
final int failedAllocations = failedShard.unassignedInfo() != null ? failedShard.unassignedInfo().getNumFailedAllocations() : 0;
UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.ALLOCATION_FAILED, failedShardEntry.message,
failedShardEntry.failure, failedAllocations + 1, currentNanoTime, System.currentTimeMillis(), false,
AllocationStatus.NO_ATTEMPT);
allocation.addIgnoreShardForNode(failedShard.shardId(), failedShard.currentNodeId());
applyFailedShard(allocation, failedShard, unassignedInfo);
for (FailedRerouteAllocation.FailedShard failedShardEntry : failedShards) {
ShardRouting shardToFail = failedShardEntry.routingEntry;
allocation.addIgnoreShardForNode(shardToFail.shardId(), shardToFail.currentNodeId());
// failing a primary also fails initializing replica shards, re-resolve ShardRouting
ShardRouting failedShard = routingNodes.getByAllocationId(shardToFail.shardId(), shardToFail.allocationId().getId());
if (failedShard != null) {
if (failedShard != shardToFail) {
logger.trace("{} shard routing modified in an earlier iteration (previous: {}, current: {})",
shardToFail.shardId(), shardToFail, failedShard);
}
int failedAllocations = failedShard.unassignedInfo() != null ? failedShard.unassignedInfo().getNumFailedAllocations() : 0;
UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.ALLOCATION_FAILED, failedShardEntry.message,
failedShardEntry.failure, failedAllocations + 1, currentNanoTime, System.currentTimeMillis(), false,
AllocationStatus.NO_ATTEMPT);
applyFailedShard(allocation, failedShard, unassignedInfo);
} else {
logger.trace("{} shard routing failed in an earlier iteration (routing: {})", shardToFail.shardId(), shardToFail);
}
}
gatewayAllocator.applyFailedShards(allocation);