Don't break allocation if resize source index is missing (#29311)

DiskThresholdDecider currently assumes that the source index of a resize operation (e.g. shrink)
is available, and throws an IndexNotFoundException otherwise, thereby breaking any kind of shard
allocation. This can be quite harmful if the source index is deleted during a shrink, or if the source
index is unavailable during state recovery.

While this behavior has been partly fixed in 6.1 and above (due to #26931), it relies on the order in
which AllocationDeciders are executed (i.e. that ResizeAllocationDecider returns NO, ensuring that
DiskThresholdDecider does not run, something that for example does not hold for the allocation
explain API).

This change adds a more complete fix, and also solves the situation for 5.6.
This commit is contained in:
Yannick Welsch 2018-04-03 11:51:06 +02:00 committed by GitHub
parent 989e465964
commit 2dc546ccec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 22 additions and 5 deletions

View File

@ -409,11 +409,14 @@ public class DiskThresholdDecider extends AllocationDecider {
// the worst case // the worst case
long targetShardSize = 0; long targetShardSize = 0;
final Index mergeSourceIndex = metaData.getResizeSourceIndex(); final Index mergeSourceIndex = metaData.getResizeSourceIndex();
final IndexMetaData sourceIndexMeta = allocation.metaData().getIndexSafe(mergeSourceIndex); final IndexMetaData sourceIndexMeta = allocation.metaData().index(mergeSourceIndex);
final Set<ShardId> shardIds = IndexMetaData.selectRecoverFromShards(shard.id(), sourceIndexMeta, metaData.getNumberOfShards()); if (sourceIndexMeta != null) {
for (IndexShardRoutingTable shardRoutingTable : allocation.routingTable().index(mergeSourceIndex.getName())) { final Set<ShardId> shardIds = IndexMetaData.selectRecoverFromShards(shard.id(),
if (shardIds.contains(shardRoutingTable.shardId())) { sourceIndexMeta, metaData.getNumberOfShards());
targetShardSize += info.getShardSize(shardRoutingTable.primaryShard(), 0); for (IndexShardRoutingTable shardRoutingTable : allocation.routingTable().index(mergeSourceIndex.getName())) {
if (shardIds.contains(shardRoutingTable.shardId())) {
targetShardSize += info.getShardSize(shardRoutingTable.primaryShard(), 0);
}
} }
} }
return targetShardSize == 0 ? defaultValue : targetShardSize; return targetShardSize == 0 ? defaultValue : targetShardSize;

View File

@ -342,6 +342,20 @@ public class DiskThresholdDeciderUnitTests extends ESAllocationTestCase {
target2 = ShardRouting.newUnassigned(new ShardId(new Index("target2", "9101112"), 1), target2 = ShardRouting.newUnassigned(new ShardId(new Index("target2", "9101112"), 1),
true, LocalShardsRecoverySource.INSTANCE, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo")); true, LocalShardsRecoverySource.INSTANCE, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "foo"));
assertEquals(1000L, DiskThresholdDecider.getExpectedShardSize(target2, allocation, 0)); assertEquals(1000L, DiskThresholdDecider.getExpectedShardSize(target2, allocation, 0));
// check that the DiskThresholdDecider still works even if the source index has been deleted
ClusterState clusterStateWithMissingSourceIndex = ClusterState.builder(clusterState)
.metaData(MetaData.builder(metaData).remove("test"))
.routingTable(RoutingTable.builder(clusterState.routingTable()).remove("test").build())
.build();
allocationService.reroute(clusterState, "foo");
RoutingAllocation allocationWithMissingSourceIndex = new RoutingAllocation(null,
clusterStateWithMissingSourceIndex.getRoutingNodes(), clusterStateWithMissingSourceIndex, info, 0);
assertEquals(42L, DiskThresholdDecider.getExpectedShardSize(target, allocationWithMissingSourceIndex, 42L));
assertEquals(42L, DiskThresholdDecider.getExpectedShardSize(target2, allocationWithMissingSourceIndex, 42L));
} }
} }