From 35e705877be8e18946bb110a6f3e38a58905a793 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 20 May 2016 20:37:45 +0200 Subject: [PATCH] Limit retries of failed allocations per index (#18467) Today if a shard fails during initialization phase due to misconfiguration, broken disks, missing analyzers, not installed plugins etc. elasticsaerch keeps on trying to initialize or rather allocate that shard. Yet, in the worst case scenario this ends in an endless allocation loop. To prevent this loop and all it's sideeffects like spamming log files over and over again this commit adds an allocation decider that stops allocating a shard that failed more than N times in a row to allocate. The number or retries can be configured via `index.allocation.max_retry` and it's default is set to `5`. Once the setting is updated shards with less failures than the number set per index will be allowed to allocate again. Internally we maintain a counter on the UnassignedInfo that is reset to `0` once the shards has been started. Relates to #18417 --- ...ansportClusterAllocationExplainAction.java | 2 +- .../reroute/ClusterRerouteRequest.java | 33 ++- .../reroute/ClusterRerouteRequestBuilder.java | 9 + .../TransportClusterRerouteAction.java | 86 ++++--- .../elasticsearch/cluster/ClusterModule.java | 2 + .../cluster/routing/UnassignedInfo.java | 32 ++- .../routing/allocation/AllocationService.java | 22 +- .../allocation/FailedRerouteAllocation.java | 2 +- .../routing/allocation/RoutingAllocation.java | 9 +- .../allocation/StartedRerouteAllocation.java | 2 +- ...AllocateEmptyPrimaryAllocationCommand.java | 2 +- .../decider/MaxRetryAllocationDecider.java | 83 +++++++ .../common/settings/IndexScopedSettings.java | 3 +- .../gateway/ReplicaShardAllocator.java | 2 +- .../reroute/RestClusterRerouteAction.java | 1 + .../cluster/reroute/ClusterRerouteTests.java | 181 +++++++++++++++ .../cluster/routing/UnassignedInfoTests.java | 16 +- .../allocation/AllocationCommandsTests.java | 46 ++-- .../allocation/AwarenessAllocationTests.java | 2 +- .../allocation/DeadNodesAllocationTests.java | 8 +- .../ExpectedShardSizeAllocationTests.java | 2 +- .../allocation/FailedShardsRoutingTests.java | 8 +- .../MaxRetryAllocationDeciderTests.java | 210 ++++++++++++++++++ .../NodeVersionAllocationDeciderTests.java | 4 +- .../allocation/ThrottlingAllocationTests.java | 6 +- .../decider/DiskThresholdDeciderTests.java | 14 +- .../DiskThresholdDeciderUnitTests.java | 4 +- .../gateway/PrimaryShardAllocatorTests.java | 18 +- .../gateway/ReplicaShardAllocatorTests.java | 4 +- .../indices/state/RareClusterStateIT.java | 2 +- .../SharedClusterSnapshotRestoreIT.java | 2 +- docs/reference/cluster/reroute.asciidoc | 13 ++ .../rest-api-spec/api/cluster.reroute.json | 4 + 33 files changed, 711 insertions(+), 123 deletions(-) create mode 100644 core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java create mode 100644 core/src/test/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteTests.java create mode 100644 core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java diff --git a/core/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java b/core/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java index 28b62083d42..f986a5679a9 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java @@ -250,7 +250,7 @@ public class TransportClusterAllocationExplainAction final ActionListener listener) { final RoutingNodes routingNodes = state.getRoutingNodes(); final RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, state, - clusterInfoService.getClusterInfo(), System.nanoTime()); + clusterInfoService.getClusterInfo(), System.nanoTime(), false); ShardRouting foundShard = null; if (request.useAnyUnassignedShard()) { diff --git a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java index a241f01ea28..dcc45ab21b9 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java +++ b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java @@ -38,9 +38,10 @@ import java.io.IOException; * Request to submit cluster reroute allocation commands */ public class ClusterRerouteRequest extends AcknowledgedRequest { - AllocationCommands commands = new AllocationCommands(); - boolean dryRun; - boolean explain; + private AllocationCommands commands = new AllocationCommands(); + private boolean dryRun; + private boolean explain; + private boolean retryFailed; public ClusterRerouteRequest() { } @@ -81,6 +82,15 @@ public class ClusterRerouteRequest extends AcknowledgedRequestfalse). If true, the + * request will retry allocating shards that can't currently be allocated due to too many allocation failures. + */ + public ClusterRerouteRequest setRetryFailed(boolean retryFailed) { + this.retryFailed = retryFailed; + return this; + } + /** * Returns the current explain flag */ @@ -88,6 +98,14 @@ public class ClusterRerouteRequest extends AcknowledgedRequestfalse). If true, the + * request will retry allocating shards that can't currently be allocated due to too many allocation failures. + */ + public ClusterRerouteRequestBuilder setRetryFailed(boolean retryFailed) { + request.setRetryFailed(retryFailed); + return this; + } + /** * Sets the commands for the request to execute. */ diff --git a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java index e6116dbfbc4..b0b676f6e2e 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java @@ -33,6 +33,7 @@ import org.elasticsearch.cluster.routing.allocation.RoutingExplanations; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.Priority; import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; @@ -68,38 +69,55 @@ public class TransportClusterRerouteAction extends TransportMasterNodeAction listener) { - clusterService.submitStateUpdateTask("cluster_reroute (api)", new AckedClusterStateUpdateTask(Priority.IMMEDIATE, request, listener) { - - private volatile ClusterState clusterStateToSend; - private volatile RoutingExplanations explanations; - - @Override - protected ClusterRerouteResponse newResponse(boolean acknowledged) { - return new ClusterRerouteResponse(acknowledged, clusterStateToSend, explanations); - } - - @Override - public void onAckTimeout() { - listener.onResponse(new ClusterRerouteResponse(false, clusterStateToSend, new RoutingExplanations())); - } - - @Override - public void onFailure(String source, Throwable t) { - logger.debug("failed to perform [{}]", t, source); - super.onFailure(source, t); - } - - @Override - public ClusterState execute(ClusterState currentState) { - RoutingAllocation.Result routingResult = allocationService.reroute(currentState, request.commands, request.explain()); - ClusterState newState = ClusterState.builder(currentState).routingResult(routingResult).build(); - clusterStateToSend = newState; - explanations = routingResult.explanations(); - if (request.dryRun) { - return currentState; - } - return newState; - } - }); + clusterService.submitStateUpdateTask("cluster_reroute (api)", new ClusterRerouteResponseAckedClusterStateUpdateTask(logger, + allocationService, request, listener)); } -} \ No newline at end of file + + static class ClusterRerouteResponseAckedClusterStateUpdateTask extends AckedClusterStateUpdateTask { + + private final ClusterRerouteRequest request; + private final ActionListener listener; + private final ESLogger logger; + private final AllocationService allocationService; + private volatile ClusterState clusterStateToSend; + private volatile RoutingExplanations explanations; + + ClusterRerouteResponseAckedClusterStateUpdateTask(ESLogger logger, AllocationService allocationService, ClusterRerouteRequest request, + ActionListener listener) { + super(Priority.IMMEDIATE, request, listener); + this.request = request; + this.listener = listener; + this.logger = logger; + this.allocationService = allocationService; + } + + @Override + protected ClusterRerouteResponse newResponse(boolean acknowledged) { + return new ClusterRerouteResponse(acknowledged, clusterStateToSend, explanations); + } + + @Override + public void onAckTimeout() { + listener.onResponse(new ClusterRerouteResponse(false, clusterStateToSend, new RoutingExplanations())); + } + + @Override + public void onFailure(String source, Throwable t) { + logger.debug("failed to perform [{}]", t, source); + super.onFailure(source, t); + } + + @Override + public ClusterState execute(ClusterState currentState) { + RoutingAllocation.Result routingResult = allocationService.reroute(currentState, request.getCommands(), request.explain(), + request.isRetryFailed()); + ClusterState newState = ClusterState.builder(currentState).routingResult(routingResult).build(); + clusterStateToSend = newState; + explanations = routingResult.explanations(); + if (request.dryRun()) { + return currentState; + } + return newState; + } + } +} diff --git a/core/src/main/java/org/elasticsearch/cluster/ClusterModule.java b/core/src/main/java/org/elasticsearch/cluster/ClusterModule.java index 47dd2ce9ae6..a02e399ac0c 100644 --- a/core/src/main/java/org/elasticsearch/cluster/ClusterModule.java +++ b/core/src/main/java/org/elasticsearch/cluster/ClusterModule.java @@ -49,6 +49,7 @@ import org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDeci import org.elasticsearch.cluster.routing.allocation.decider.NodeVersionAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.RebalanceOnlyWhenActiveAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ReplicaAfterPrimaryActiveAllocationDecider; +import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.SameShardAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.SnapshotInProgressAllocationDecider; @@ -79,6 +80,7 @@ public class ClusterModule extends AbstractModule { new Setting<>("cluster.routing.allocation.type", BALANCED_ALLOCATOR, Function.identity(), Property.NodeScope); public static final List> DEFAULT_ALLOCATION_DECIDERS = Collections.unmodifiableList(Arrays.asList( + MaxRetryAllocationDecider.class, SameShardAllocationDecider.class, FilterAllocationDecider.class, ReplicaAfterPrimaryActiveAllocationDecider.class, diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java b/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java index 2670363364d..bc44cd1701c 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java @@ -48,7 +48,6 @@ public final class UnassignedInfo implements ToXContent, Writeable { public static final Setting INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING = Setting.timeSetting("index.unassigned.node_left.delayed_timeout", DEFAULT_DELAYED_NODE_LEFT_TIMEOUT, Property.Dynamic, Property.IndexScope); - /** * Reason why the shard is in unassigned state. *

@@ -103,7 +102,11 @@ public final class UnassignedInfo implements ToXContent, Writeable { /** * A better replica location is identified and causes the existing replica allocation to be cancelled. */ - REALLOCATED_REPLICA; + REALLOCATED_REPLICA, + /** + * Unassigned as a result of a failed primary while the replica was initializing. + */ + PRIMARY_FAILED; } private final Reason reason; @@ -112,6 +115,7 @@ public final class UnassignedInfo implements ToXContent, Writeable { private final long lastComputedLeftDelayNanos; // how long to delay shard allocation, not serialized (always positive, 0 means no delay) private final String message; private final Throwable failure; + private final int failedAllocations; /** * creates an UnassingedInfo object based **current** time @@ -120,7 +124,7 @@ public final class UnassignedInfo implements ToXContent, Writeable { * @param message more information about cause. **/ public UnassignedInfo(Reason reason, String message) { - this(reason, message, null, System.nanoTime(), System.currentTimeMillis()); + this(reason, message, null, reason == Reason.ALLOCATION_FAILED ? 1 : 0, System.nanoTime(), System.currentTimeMillis()); } /** @@ -130,13 +134,16 @@ public final class UnassignedInfo implements ToXContent, Writeable { * @param unassignedTimeNanos the time to use as the base for any delayed re-assignment calculation * @param unassignedTimeMillis the time of unassignment used to display to in our reporting. */ - public UnassignedInfo(Reason reason, @Nullable String message, @Nullable Throwable failure, long unassignedTimeNanos, long unassignedTimeMillis) { + public UnassignedInfo(Reason reason, @Nullable String message, @Nullable Throwable failure, int failedAllocations, long unassignedTimeNanos, long unassignedTimeMillis) { this.reason = reason; this.unassignedTimeMillis = unassignedTimeMillis; this.unassignedTimeNanos = unassignedTimeNanos; this.lastComputedLeftDelayNanos = 0L; this.message = message; this.failure = failure; + this.failedAllocations = failedAllocations; + assert (failedAllocations > 0) == (reason == Reason.ALLOCATION_FAILED): + "failedAllocations: " + failedAllocations + " for reason " + reason; assert !(message == null && failure != null) : "provide a message if a failure exception is provided"; } @@ -147,17 +154,19 @@ public final class UnassignedInfo implements ToXContent, Writeable { this.lastComputedLeftDelayNanos = newComputedLeftDelayNanos; this.message = unassignedInfo.message; this.failure = unassignedInfo.failure; + this.failedAllocations = unassignedInfo.failedAllocations; } public UnassignedInfo(StreamInput in) throws IOException { this.reason = Reason.values()[(int) in.readByte()]; this.unassignedTimeMillis = in.readLong(); // As System.nanoTime() cannot be compared across different JVMs, reset it to now. - // This means that in master failover situations, elapsed delay time is forgotten. + // This means that in master fail-over situations, elapsed delay time is forgotten. this.unassignedTimeNanos = System.nanoTime(); this.lastComputedLeftDelayNanos = 0L; this.message = in.readOptionalString(); this.failure = in.readThrowable(); + this.failedAllocations = in.readVInt(); } public void writeTo(StreamOutput out) throws IOException { @@ -166,12 +175,18 @@ public final class UnassignedInfo implements ToXContent, Writeable { // Do not serialize unassignedTimeNanos as System.nanoTime() cannot be compared across different JVMs out.writeOptionalString(message); out.writeThrowable(failure); + out.writeVInt(failedAllocations); } public UnassignedInfo readFrom(StreamInput in) throws IOException { return new UnassignedInfo(in); } + /** + * Returns the number of previously failed allocations of this shard. + */ + public int getNumFailedAllocations() { return failedAllocations; } + /** * The reason why the shard is unassigned. */ @@ -325,7 +340,11 @@ public final class UnassignedInfo implements ToXContent, Writeable { StringBuilder sb = new StringBuilder(); sb.append("[reason=").append(reason).append("]"); sb.append(", at[").append(DATE_TIME_FORMATTER.printer().print(unassignedTimeMillis)).append("]"); + if (failedAllocations > 0) { + sb.append(", failed_attempts[").append(failedAllocations).append("]"); + } String details = getDetails(); + if (details != null) { sb.append(", details[").append(details).append("]"); } @@ -342,6 +361,9 @@ public final class UnassignedInfo implements ToXContent, Writeable { builder.startObject("unassigned_info"); builder.field("reason", reason); builder.field("at", DATE_TIME_FORMATTER.printer().print(unassignedTimeMillis)); + if (failedAllocations > 0) { + builder.field("failed_attempts", failedAllocations); + } String details = getDetails(); if (details != null) { builder.field("details", details); diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java index e1bbbb7f4ab..d59113675d8 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java @@ -222,8 +222,10 @@ public class AllocationService extends AbstractComponent { List orderedFailedShards = new ArrayList<>(failedShards); orderedFailedShards.sort(Comparator.comparing(failedShard -> failedShard.shard.primary())); for (FailedRerouteAllocation.FailedShard failedShard : orderedFailedShards) { + UnassignedInfo unassignedInfo = failedShard.shard.unassignedInfo(); + final int failedAllocations = unassignedInfo != null ? unassignedInfo.getNumFailedAllocations() : 0; changed |= applyFailedShard(allocation, failedShard.shard, true, new UnassignedInfo(UnassignedInfo.Reason.ALLOCATION_FAILED, failedShard.message, failedShard.failure, - System.nanoTime(), System.currentTimeMillis())); + failedAllocations + 1, System.nanoTime(), System.currentTimeMillis())); } if (!changed) { return new RoutingAllocation.Result(false, clusterState.routingTable(), clusterState.metaData()); @@ -257,16 +259,13 @@ public class AllocationService extends AbstractComponent { .collect(Collectors.joining(", ")); } - public RoutingAllocation.Result reroute(ClusterState clusterState, AllocationCommands commands) { - return reroute(clusterState, commands, false); - } - - public RoutingAllocation.Result reroute(ClusterState clusterState, AllocationCommands commands, boolean explain) { + public RoutingAllocation.Result reroute(ClusterState clusterState, AllocationCommands commands, boolean explain, boolean retryFailed) { RoutingNodes routingNodes = getMutableRoutingNodes(clusterState); // we don't shuffle the unassigned shards here, to try and get as close as possible to // a consistent result of the effect the commands have on the routing // this allows systems to dry run the commands, see the resulting cluster state, and act on it - RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState, clusterInfoService.getClusterInfo(), currentNanoTime()); + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState, + clusterInfoService.getClusterInfo(), currentNanoTime(), retryFailed); // don't short circuit deciders, we want a full explanation allocation.debugDecision(true); // we ignore disable allocation, because commands are explicit @@ -305,7 +304,8 @@ public class AllocationService extends AbstractComponent { RoutingNodes routingNodes = getMutableRoutingNodes(clusterState); // shuffle the unassigned nodes, just so we won't have things like poison failed shards routingNodes.unassigned().shuffle(); - RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState, clusterInfoService.getClusterInfo(), currentNanoTime()); + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState, + clusterInfoService.getClusterInfo(), currentNanoTime(), false); allocation.debugDecision(debug); if (!reroute(allocation)) { return new RoutingAllocation.Result(false, clusterState.routingTable(), clusterState.metaData()); @@ -437,7 +437,7 @@ public class AllocationService extends AbstractComponent { // now, go over all the shards routing on the node, and fail them for (ShardRouting shardRouting : node.copyShards()) { UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.NODE_LEFT, "node_left[" + node.nodeId() + "]", null, - allocation.getCurrentNanoTime(), System.currentTimeMillis()); + 0, allocation.getCurrentNanoTime(), System.currentTimeMillis()); applyFailedShard(allocation, shardRouting, false, unassignedInfo); } // its a dead node, remove it, note, its important to remove it *after* we apply failed shard @@ -457,8 +457,8 @@ public class AllocationService extends AbstractComponent { boolean changed = false; for (ShardRouting routing : replicas) { changed |= applyFailedShard(allocation, routing, false, - new UnassignedInfo(UnassignedInfo.Reason.ALLOCATION_FAILED, "primary failed while replica initializing", - null, allocation.getCurrentNanoTime(), System.currentTimeMillis())); + new UnassignedInfo(UnassignedInfo.Reason.PRIMARY_FAILED, "primary failed while replica initializing", + null, 0, allocation.getCurrentNanoTime(), System.currentTimeMillis())); } return changed; } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/FailedRerouteAllocation.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/FailedRerouteAllocation.java index a13862fed26..ef2e42eed76 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/FailedRerouteAllocation.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/FailedRerouteAllocation.java @@ -58,7 +58,7 @@ public class FailedRerouteAllocation extends RoutingAllocation { private final List failedShards; public FailedRerouteAllocation(AllocationDeciders deciders, RoutingNodes routingNodes, ClusterState clusterState, List failedShards, ClusterInfo clusterInfo) { - super(deciders, routingNodes, clusterState, clusterInfo, System.nanoTime()); + super(deciders, routingNodes, clusterState, clusterInfo, System.nanoTime(), false); this.failedShards = failedShards; } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java index 60ca3a8d5fd..0df8074e14c 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java @@ -134,6 +134,8 @@ public class RoutingAllocation { private boolean ignoreDisable = false; + private final boolean retryFailed; + private boolean debugDecision = false; private boolean hasPendingAsyncFetch = false; @@ -148,7 +150,7 @@ public class RoutingAllocation { * @param clusterState cluster state before rerouting * @param currentNanoTime the nano time to use for all delay allocation calculation (typically {@link System#nanoTime()}) */ - public RoutingAllocation(AllocationDeciders deciders, RoutingNodes routingNodes, ClusterState clusterState, ClusterInfo clusterInfo, long currentNanoTime) { + public RoutingAllocation(AllocationDeciders deciders, RoutingNodes routingNodes, ClusterState clusterState, ClusterInfo clusterInfo, long currentNanoTime, boolean retryFailed) { this.deciders = deciders; this.routingNodes = routingNodes; this.metaData = clusterState.metaData(); @@ -156,6 +158,7 @@ public class RoutingAllocation { this.customs = clusterState.customs(); this.clusterInfo = clusterInfo; this.currentNanoTime = currentNanoTime; + this.retryFailed = retryFailed; } /** returns the nano time captured at the beginning of the allocation. used to make sure all time based decisions are aligned */ @@ -297,4 +300,8 @@ public class RoutingAllocation { public void setHasPendingAsyncFetch() { this.hasPendingAsyncFetch = true; } + + public boolean isRetryFailed() { + return retryFailed; + } } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/StartedRerouteAllocation.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/StartedRerouteAllocation.java index e9570edd9c3..0f55ab4fda1 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/StartedRerouteAllocation.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/StartedRerouteAllocation.java @@ -36,7 +36,7 @@ public class StartedRerouteAllocation extends RoutingAllocation { private final List startedShards; public StartedRerouteAllocation(AllocationDeciders deciders, RoutingNodes routingNodes, ClusterState clusterState, List startedShards, ClusterInfo clusterInfo) { - super(deciders, routingNodes, clusterState, clusterInfo, System.nanoTime()); + super(deciders, routingNodes, clusterState, clusterInfo, System.nanoTime(), false); this.startedShards = startedShards; } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/AllocateEmptyPrimaryAllocationCommand.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/AllocateEmptyPrimaryAllocationCommand.java index d4191292cfc..c80afde3086 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/AllocateEmptyPrimaryAllocationCommand.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/AllocateEmptyPrimaryAllocationCommand.java @@ -125,7 +125,7 @@ public class AllocateEmptyPrimaryAllocationCommand extends BasePrimaryAllocation // we need to move the unassigned info back to treat it as if it was index creation unassignedInfoToUpdate = new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "force empty allocation from previous reason " + shardRouting.unassignedInfo().getReason() + ", " + shardRouting.unassignedInfo().getMessage(), - shardRouting.unassignedInfo().getFailure(), System.nanoTime(), System.currentTimeMillis()); + shardRouting.unassignedInfo().getFailure(), 0, System.nanoTime(), System.currentTimeMillis()); } initializeUnassignedShard(allocation, routingNodes, routingNode, shardRouting, unassignedInfoToUpdate); diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java new file mode 100644 index 00000000000..6a8a0ccc5fa --- /dev/null +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java @@ -0,0 +1,83 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.cluster.routing.allocation.decider; + +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.cluster.routing.RoutingNode; +import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.UnassignedInfo; +import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.settings.Setting; +import org.elasticsearch.common.settings.Settings; + +/** + * An allocation decider that prevents shards from being allocated on any node if the shards allocation has been retried N times without + * success. This means if a shard has been INITIALIZING N times in a row without being moved to STARTED the shard will be ignored until + * the setting for index.allocation.max_retry is raised. The default value is 5. + * Note: This allocation decider also allows allocation of repeatedly failing shards when the /_cluster/reroute?retry_failed=true + * API is manually invoked. This allows single retries without raising the limits. + * + * @see RoutingAllocation#isRetryFailed() + */ +public class MaxRetryAllocationDecider extends AllocationDecider { + + public static final Setting SETTING_ALLOCATION_MAX_RETRY = Setting.intSetting("index.allocation.max_retries", 5, 0, + Setting.Property.Dynamic, Setting.Property.IndexScope); + + public static final String NAME = "max_retry"; + + /** + * Initializes a new {@link MaxRetryAllocationDecider} + * + * @param settings {@link Settings} used by this {@link AllocationDecider} + */ + @Inject + public MaxRetryAllocationDecider(Settings settings) { + super(settings); + } + + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingAllocation allocation) { + UnassignedInfo unassignedInfo = shardRouting.unassignedInfo(); + if (unassignedInfo != null && unassignedInfo.getNumFailedAllocations() > 0) { + final IndexMetaData indexMetaData = allocation.metaData().getIndexSafe(shardRouting.index()); + final int maxRetry = SETTING_ALLOCATION_MAX_RETRY.get(indexMetaData.getSettings()); + if (allocation.isRetryFailed()) { // manual allocation - retry + // if we are called via the _reroute API we ignore the failure counter and try to allocate + // this improves the usability since people don't need to raise the limits to issue retries since a simple _reroute call is + // enough to manually retry. + return allocation.decision(Decision.YES, NAME, "shard has already failed allocating [" + + unassignedInfo.getNumFailedAllocations() + "] times vs. [" + maxRetry + "] retries allowed " + + unassignedInfo.toString() + " - retrying once on manual allocation"); + } else if (unassignedInfo.getNumFailedAllocations() >= maxRetry) { + return allocation.decision(Decision.NO, NAME, "shard has already failed allocating [" + + unassignedInfo.getNumFailedAllocations() + "] times vs. [" + maxRetry + "] retries allowed " + + unassignedInfo.toString() + " - manually call [/_cluster/reroute?retry_failed=true] to retry"); + } + } + return allocation.decision(Decision.YES, NAME, "shard has no previous failures"); + } + + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return canAllocate(shardRouting, allocation); + } +} diff --git a/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index 9ebf6853166..203d1db76b3 100644 --- a/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -21,6 +21,7 @@ package org.elasticsearch.common.settings; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.routing.UnassignedInfo; import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider; +import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.gateway.PrimaryShardAllocator; @@ -40,7 +41,6 @@ import org.elasticsearch.index.similarity.SimilarityService; import org.elasticsearch.index.store.FsDirectoryService; import org.elasticsearch.index.store.IndexStore; import org.elasticsearch.index.store.Store; -import org.elasticsearch.index.IndexWarmer; import org.elasticsearch.indices.IndicesRequestCache; import java.util.Arrays; @@ -59,6 +59,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { public static final Predicate INDEX_SETTINGS_KEY_PREDICATE = (s) -> s.startsWith(IndexMetaData.INDEX_SETTING_PREFIX); public static final Set> BUILT_IN_INDEX_SETTINGS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList( + MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY, IndexSettings.INDEX_TTL_DISABLE_PURGE_SETTING, IndexStore.INDEX_STORE_THROTTLE_TYPE_SETTING, IndexStore.INDEX_STORE_THROTTLE_MAX_BYTES_PER_SEC_SETTING, diff --git a/core/src/main/java/org/elasticsearch/gateway/ReplicaShardAllocator.java b/core/src/main/java/org/elasticsearch/gateway/ReplicaShardAllocator.java index d2e3d7f42cf..8b6e425c26a 100644 --- a/core/src/main/java/org/elasticsearch/gateway/ReplicaShardAllocator.java +++ b/core/src/main/java/org/elasticsearch/gateway/ReplicaShardAllocator.java @@ -108,7 +108,7 @@ public abstract class ReplicaShardAllocator extends AbstractComponent { currentNode, nodeWithHighestMatch); it.moveToUnassigned(new UnassignedInfo(UnassignedInfo.Reason.REALLOCATED_REPLICA, "existing allocation of replica to [" + currentNode + "] cancelled, sync id match found on node [" + nodeWithHighestMatch + "]", - null, allocation.getCurrentNanoTime(), System.currentTimeMillis())); + null, 0, allocation.getCurrentNanoTime(), System.currentTimeMillis())); changed = true; } } diff --git a/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/reroute/RestClusterRerouteAction.java b/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/reroute/RestClusterRerouteAction.java index 8a4afd89ac4..b34c6726c09 100644 --- a/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/reroute/RestClusterRerouteAction.java +++ b/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/reroute/RestClusterRerouteAction.java @@ -64,6 +64,7 @@ public class RestClusterRerouteAction extends BaseRestHandler { public void handleRequest(final RestRequest request, final RestChannel channel, final Client client) throws Exception { final ClusterRerouteRequest clusterRerouteRequest = Requests.clusterRerouteRequest(); clusterRerouteRequest.dryRun(request.paramAsBoolean("dry_run", clusterRerouteRequest.dryRun())); + clusterRerouteRequest.setRetryFailed(request.paramAsBoolean("retry_failed", clusterRerouteRequest.isRetryFailed())); clusterRerouteRequest.explain(request.paramAsBoolean("explain", clusterRerouteRequest.explain())); clusterRerouteRequest.timeout(request.paramAsTime("timeout", clusterRerouteRequest.timeout())); clusterRerouteRequest.masterNodeTimeout(request.paramAsTime("master_timeout", clusterRerouteRequest.masterNodeTimeout())); diff --git a/core/src/test/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteTests.java b/core/src/test/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteTests.java new file mode 100644 index 00000000000..927f572487c --- /dev/null +++ b/core/src/test/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteTests.java @@ -0,0 +1,181 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.action.admin.cluster.reroute; + +import org.elasticsearch.Version; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.EmptyClusterInfoService; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.cluster.routing.RoutingTable; +import org.elasticsearch.cluster.routing.allocation.AllocationService; +import org.elasticsearch.cluster.routing.allocation.FailedRerouteAllocation; +import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; +import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; +import org.elasticsearch.cluster.routing.allocation.command.AllocateEmptyPrimaryAllocationCommand; +import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders; +import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.io.stream.NamedWriteableAwareStreamInput; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.network.NetworkModule; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.test.ESAllocationTestCase; +import org.elasticsearch.test.gateway.NoopGatewayAllocator; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.atomic.AtomicReference; + +import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING; +import static org.elasticsearch.cluster.routing.ShardRoutingState.UNASSIGNED; + +public class ClusterRerouteTests extends ESAllocationTestCase { + + public void testSerializeRequest() throws IOException { + ClusterRerouteRequest req = new ClusterRerouteRequest(); + req.setRetryFailed(randomBoolean()); + req.dryRun(randomBoolean()); + req.explain(randomBoolean()); + req.commands(new AllocateEmptyPrimaryAllocationCommand("foo", 1, "bar", randomBoolean())); + req.timeout(TimeValue.timeValueMillis(randomIntBetween(0, 100))); + BytesStreamOutput out = new BytesStreamOutput(); + req.writeTo(out); + BytesReference bytes = out.bytes(); + NamedWriteableRegistry namedWriteableRegistry = new NamedWriteableRegistry(); + new NetworkModule(null, Settings.EMPTY, true, namedWriteableRegistry); + StreamInput wrap = new NamedWriteableAwareStreamInput(StreamInput.wrap(bytes.toBytes()), + namedWriteableRegistry); + ClusterRerouteRequest deserializedReq = new ClusterRerouteRequest(); + deserializedReq.readFrom(wrap); + + assertEquals(req.isRetryFailed(), deserializedReq.isRetryFailed()); + assertEquals(req.dryRun(), deserializedReq.dryRun()); + assertEquals(req.explain(), deserializedReq.explain()); + assertEquals(req.timeout(), deserializedReq.timeout()); + assertEquals(1, deserializedReq.getCommands().commands().size()); // allocation commands have their own tests + assertEquals(req.getCommands().commands().size(), deserializedReq.getCommands().commands().size()); + } + + public void testClusterStateUpdateTask() { + AllocationService allocationService = new AllocationService(Settings.builder().build(), new AllocationDeciders(Settings.EMPTY, + Collections.singleton(new MaxRetryAllocationDecider(Settings.EMPTY))), + NoopGatewayAllocator.INSTANCE, new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE); + ClusterState clusterState = createInitialClusterState(allocationService); + ClusterRerouteRequest req = new ClusterRerouteRequest(); + req.dryRun(true); + AtomicReference responseRef = new AtomicReference<>(); + ActionListener responseActionListener = new ActionListener() { + @Override + public void onResponse(ClusterRerouteResponse clusterRerouteResponse) { + responseRef.set(clusterRerouteResponse); + } + + @Override + public void onFailure(Throwable e) { + + } + }; + TransportClusterRerouteAction.ClusterRerouteResponseAckedClusterStateUpdateTask task = + new TransportClusterRerouteAction.ClusterRerouteResponseAckedClusterStateUpdateTask(logger, allocationService, req, + responseActionListener ); + ClusterState execute = task.execute(clusterState); + assertSame(execute, clusterState); // dry-run + task.onAllNodesAcked(null); + assertNotSame(responseRef.get().getState(), execute); + + req.dryRun(false);// now we allocate + + final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY); + // now fail it N-1 times + for (int i = 0; i < retries; i++) { + ClusterState newState = task.execute(clusterState); + assertNotSame(newState, clusterState); // dry-run=false + clusterState = newState; + RoutingTable routingTable = clusterState.routingTable(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i); + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i, + new UnsupportedOperationException())); + RoutingAllocation.Result result = allocationService.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + clusterState = ClusterState.builder(clusterState).routingTable(result.routingTable()).build(); + routingTable = clusterState.routingTable(); + assertEquals(routingTable.index("idx").shards().size(), 1); + if (i == retries-1) { + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + } else { + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + } + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i+1); + } + + + // without retry_failed we won't allocate that shard + ClusterState newState = task.execute(clusterState); + assertNotSame(newState, clusterState); // dry-run=false + task.onAllNodesAcked(null); + assertSame(responseRef.get().getState(), newState); + RoutingTable routingTable = clusterState.routingTable(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); + + req.setRetryFailed(true); // now we manually retry and get the shard back into initializing + newState = task.execute(clusterState); + assertNotSame(newState, clusterState); // dry-run=false + clusterState = newState; + routingTable = clusterState.routingTable(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); + } + + private ClusterState createInitialClusterState(AllocationService service) { + MetaData.Builder metaBuilder = MetaData.builder(); + metaBuilder.put(IndexMetaData.builder("idx").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(0)); + MetaData metaData = metaBuilder.build(); + RoutingTable.Builder routingTableBuilder = RoutingTable.builder(); + routingTableBuilder.addAsNew(metaData.index("idx")); + + RoutingTable routingTable = routingTableBuilder.build(); + ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT) + .metaData(metaData).routingTable(routingTable).build(); + clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))) + .build(); + RoutingTable prevRoutingTable = routingTable; + routingTable = service.reroute(clusterState, "reroute").routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + + assertEquals(prevRoutingTable.index("idx").shards().size(), 1); + assertEquals(prevRoutingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + return clusterState; + } +} diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java index 708f8ca5079..f46224570b0 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java @@ -64,7 +64,8 @@ public class UnassignedInfoTests extends ESAllocationTestCase { UnassignedInfo.Reason.NODE_LEFT, UnassignedInfo.Reason.REROUTE_CANCELLED, UnassignedInfo.Reason.REINITIALIZED, - UnassignedInfo.Reason.REALLOCATED_REPLICA}; + UnassignedInfo.Reason.REALLOCATED_REPLICA, + UnassignedInfo.Reason.PRIMARY_FAILED}; for (int i = 0; i < order.length; i++) { assertThat(order[i].ordinal(), equalTo(i)); } @@ -72,7 +73,10 @@ public class UnassignedInfoTests extends ESAllocationTestCase { } public void testSerialization() throws Exception { - UnassignedInfo meta = new UnassignedInfo(RandomPicks.randomFrom(random(), UnassignedInfo.Reason.values()), randomBoolean() ? randomAsciiOfLength(4) : null); + UnassignedInfo.Reason reason = RandomPicks.randomFrom(random(), UnassignedInfo.Reason.values()); + UnassignedInfo meta = reason == UnassignedInfo.Reason.ALLOCATION_FAILED ? + new UnassignedInfo(reason, randomBoolean() ? randomAsciiOfLength(4) : null, null, randomIntBetween(1, 100), System.nanoTime(), System.currentTimeMillis()): + new UnassignedInfo(reason, randomBoolean() ? randomAsciiOfLength(4) : null); BytesStreamOutput out = new BytesStreamOutput(); meta.writeTo(out); out.close(); @@ -82,6 +86,7 @@ public class UnassignedInfoTests extends ESAllocationTestCase { assertThat(read.getUnassignedTimeInMillis(), equalTo(meta.getUnassignedTimeInMillis())); assertThat(read.getMessage(), equalTo(meta.getMessage())); assertThat(read.getDetails(), equalTo(meta.getDetails())); + assertThat(read.getNumFailedAllocations(), equalTo(meta.getNumFailedAllocations())); } public void testIndexCreated() { @@ -273,7 +278,10 @@ public class UnassignedInfoTests extends ESAllocationTestCase { public void testUnassignedDelayOnlyNodeLeftNonNodeLeftReason() throws Exception { EnumSet reasons = EnumSet.allOf(UnassignedInfo.Reason.class); reasons.remove(UnassignedInfo.Reason.NODE_LEFT); - UnassignedInfo unassignedInfo = new UnassignedInfo(RandomPicks.randomFrom(random(), reasons), null); + UnassignedInfo.Reason reason = RandomPicks.randomFrom(random(), reasons); + UnassignedInfo unassignedInfo = reason == UnassignedInfo.Reason.ALLOCATION_FAILED ? + new UnassignedInfo(reason, null, null, 1, System.nanoTime(), System.currentTimeMillis()): + new UnassignedInfo(reason, null); unassignedInfo = unassignedInfo.updateDelay(unassignedInfo.getUnassignedTimeInNanos() + 1, // add 1 tick delay Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "10h").build(), Settings.EMPTY); long delay = unassignedInfo.getLastComputedLeftDelayNanos(); @@ -287,7 +295,7 @@ public class UnassignedInfoTests extends ESAllocationTestCase { */ public void testLeftDelayCalculation() throws Exception { final long baseTime = System.nanoTime(); - UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.NODE_LEFT, "test", null, baseTime, System.currentTimeMillis()); + UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.NODE_LEFT, "test", null, 0, baseTime, System.currentTimeMillis()); final long totalDelayNanos = TimeValue.timeValueMillis(10).nanos(); final Settings settings = Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), TimeValue.timeValueNanos(totalDelayNanos)).build(); unassignedInfo = unassignedInfo.updateDelay(baseTime, settings, Settings.EMPTY); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java index 7aa8576ece3..b63692e0d2a 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java @@ -94,7 +94,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { } else { toNodeId = "node1"; } - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, existingNodeId, toNodeId))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, existingNodeId, toNodeId)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node(existingNodeId).iterator().next().state(), equalTo(ShardRoutingState.RELOCATING)); @@ -148,7 +148,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> allocating to non-existent node, should fail"); try { - allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand(index, shardId.id(), "node42"))); + allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand(index, shardId.id(), "node42")), false, false); fail("expected IllegalArgumentException when allocating to non-existing node"); } catch (IllegalArgumentException e) { assertThat(e.getMessage(), containsString("failed to resolve [node42], no matching nodes")); @@ -156,7 +156,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> allocating to non-data node, should fail"); try { - allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand(index, shardId.id(), "node4"))); + allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand(index, shardId.id(), "node4")), false, false); fail("expected IllegalArgumentException when allocating to non-data node"); } catch (IllegalArgumentException e) { assertThat(e.getMessage(), containsString("allocation can only be done on data nodes")); @@ -164,7 +164,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> allocating non-existing shard, should fail"); try { - allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test", 1, "node2"))); + allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test", 1, "node2")), false, false); fail("expected ShardNotFoundException when allocating non-existing shard"); } catch (ShardNotFoundException e) { assertThat(e.getMessage(), containsString("no such shard")); @@ -172,7 +172,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> allocating non-existing index, should fail"); try { - allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test2", 0, "node2"))); + allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test2", 0, "node2")), false, false); fail("expected ShardNotFoundException when allocating non-existing index"); } catch (IndexNotFoundException e) { assertThat(e.getMessage(), containsString("no such index")); @@ -180,7 +180,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> allocating empty primary with acceptDataLoss flag set to false"); try { - allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", false))); + allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", false)), false, false); fail("expected IllegalArgumentException when allocating empty primary with acceptDataLoss flag set to false"); } catch (IllegalArgumentException e) { assertThat(e.getMessage(), containsString("allocating an empty primary for " + shardId + " can result in data loss. Please confirm by setting the accept_data_loss parameter to true")); @@ -188,14 +188,14 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> allocating stale primary with acceptDataLoss flag set to false"); try { - allocation.reroute(clusterState, new AllocationCommands(new AllocateStalePrimaryAllocationCommand(index, shardId.id(), "node1", false))); + allocation.reroute(clusterState, new AllocationCommands(new AllocateStalePrimaryAllocationCommand(index, shardId.id(), "node1", false)), false, false); fail("expected IllegalArgumentException when allocating stale primary with acceptDataLoss flag set to false"); } catch (IllegalArgumentException e) { assertThat(e.getMessage(), containsString("allocating an empty primary for " + shardId + " can result in data loss. Please confirm by setting the accept_data_loss parameter to true")); } logger.info("--> allocating empty primary with acceptDataLoss flag set to true"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", true))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", true)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -211,13 +211,13 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> allocate the replica shard on the primary shard node, should fail"); try { - allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node1"))); + allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node1")), false, false); fail("expected IllegalArgumentException when allocating replica shard on the primary shard node"); } catch (IllegalArgumentException e) { } logger.info("--> allocate the replica shard on on the second node"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2"))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2")), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -236,7 +236,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> verify that we fail when there are no unassigned shards"); try { - allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test", 0, "node3"))); + allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test", 0, "node3")), false, false); fail("expected IllegalArgumentException when allocating shard while no unassigned shard available"); } catch (IllegalArgumentException e) { } @@ -268,7 +268,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(0)); logger.info("--> allocating empty primary shard with accept_data_loss flag set to true"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", true))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", true)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -277,7 +277,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> cancel primary allocation, make sure it fails..."); try { - allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false))); + allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false)), false, false); fail(); } catch (IllegalArgumentException e) { } @@ -291,13 +291,13 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> cancel primary allocation, make sure it fails..."); try { - allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false))); + allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false)), false, false); fail(); } catch (IllegalArgumentException e) { } logger.info("--> allocate the replica shard on on the second node"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2"))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2")), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -306,7 +306,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { assertThat(clusterState.getRoutingNodes().node("node2").shardsWithState(INITIALIZING).size(), equalTo(1)); logger.info("--> cancel the relocation allocation"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node2", false))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node2", false)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -315,7 +315,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(0)); logger.info("--> allocate the replica shard on on the second node"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2"))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2")), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -325,7 +325,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> cancel the primary being replicated, make sure it fails"); try { - allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false))); + allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false)), false, false); fail(); } catch (IllegalArgumentException e) { } @@ -339,7 +339,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { assertThat(clusterState.getRoutingNodes().node("node2").shardsWithState(STARTED).size(), equalTo(1)); logger.info("--> cancel allocation of the replica shard"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node2", false))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node2", false)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -348,7 +348,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(0)); logger.info("--> allocate the replica shard on on the second node"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2"))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2")), false, false); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(rerouteResult.changed(), equalTo(true)); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -364,7 +364,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { assertThat(clusterState.getRoutingNodes().node("node2").shardsWithState(STARTED).size(), equalTo(1)); logger.info("--> move the replica shard"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, "node2", "node3"))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, "node2", "node3")), false, false); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); @@ -374,7 +374,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(INITIALIZING).size(), equalTo(1)); logger.info("--> cancel the move of the replica shard"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node3", false))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node3", false)), false, false); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); @@ -383,7 +383,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> cancel the primary allocation (with allow_primary set to true)"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", true))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", true)), false, false); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(rerouteResult.changed(), equalTo(true)); logger.error(clusterState.prettyPrint()); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AwarenessAllocationTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AwarenessAllocationTests.java index ee993bf3ebd..805ab0321ba 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AwarenessAllocationTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AwarenessAllocationTests.java @@ -868,7 +868,7 @@ public class AwarenessAllocationTests extends ESAllocationTestCase { } commands.add(new MoveAllocationCommand("test", 0, primaryNode, "A-4")); - routingTable = strategy.reroute(clusterState, commands).routingTable(); + routingTable = strategy.reroute(clusterState, commands, false, false).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(0)); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/DeadNodesAllocationTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/DeadNodesAllocationTests.java index 11b78d2ae6a..b14aeca890e 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/DeadNodesAllocationTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/DeadNodesAllocationTests.java @@ -149,8 +149,8 @@ public class DeadNodesAllocationTests extends ESAllocationTestCase { logger.info("--> moving primary shard to node3"); rerouteResult = allocation.reroute(clusterState, new AllocationCommands( - new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")) - ); + new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")), + false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(RELOCATING)); @@ -223,8 +223,8 @@ public class DeadNodesAllocationTests extends ESAllocationTestCase { logger.info("--> moving primary shard to node3"); rerouteResult = allocation.reroute(clusterState, new AllocationCommands( - new MoveAllocationCommand("test",0 , clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")) - ); + new MoveAllocationCommand("test",0 , clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")), + false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(RELOCATING)); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ExpectedShardSizeAllocationTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ExpectedShardSizeAllocationTests.java index bfa27a36d8b..29644f07944 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ExpectedShardSizeAllocationTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ExpectedShardSizeAllocationTests.java @@ -149,7 +149,7 @@ public class ExpectedShardSizeAllocationTests extends ESAllocationTestCase { } else { toNodeId = "node1"; } - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, existingNodeId, toNodeId))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, existingNodeId, toNodeId)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertEquals(clusterState.getRoutingNodes().node(existingNodeId).iterator().next().state(), ShardRoutingState.RELOCATING); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedShardsRoutingTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedShardsRoutingTests.java index 58e2397b043..e859c5811c3 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedShardsRoutingTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedShardsRoutingTests.java @@ -109,8 +109,8 @@ public class FailedShardsRoutingTests extends ESAllocationTestCase { logger.info("--> moving primary shard to node3"); rerouteResult = allocation.reroute(clusterState, new AllocationCommands( - new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")) - ); + new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")), + false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(RELOCATING)); @@ -125,8 +125,8 @@ public class FailedShardsRoutingTests extends ESAllocationTestCase { logger.info("--> moving primary shard to node3"); rerouteResult = allocation.reroute(clusterState, new AllocationCommands( - new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")) - ); + new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")), + false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(RELOCATING)); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java new file mode 100644 index 00000000000..f76851cfef9 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java @@ -0,0 +1,210 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.cluster.routing.allocation; + +import org.elasticsearch.Version; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.EmptyClusterInfoService; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.cluster.routing.RoutingTable; +import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; +import org.elasticsearch.cluster.routing.allocation.command.AllocateEmptyPrimaryAllocationCommand; +import org.elasticsearch.cluster.routing.allocation.command.AllocateReplicaAllocationCommand; +import org.elasticsearch.cluster.routing.allocation.command.AllocationCommands; +import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders; +import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.test.ESAllocationTestCase; +import org.elasticsearch.test.gateway.NoopGatewayAllocator; + +import java.util.Collections; +import java.util.List; + +import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING; +import static org.elasticsearch.cluster.routing.ShardRoutingState.STARTED; +import static org.elasticsearch.cluster.routing.ShardRoutingState.UNASSIGNED; + +public class MaxRetryAllocationDeciderTests extends ESAllocationTestCase { + + private AllocationService strategy; + + @Override + public void setUp() throws Exception { + super.setUp(); + strategy = new AllocationService(Settings.builder().build(), new AllocationDeciders(Settings.EMPTY, + Collections.singleton(new MaxRetryAllocationDecider(Settings.EMPTY))), + NoopGatewayAllocator.INSTANCE, new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE); + } + + private ClusterState createInitialClusterState() { + MetaData.Builder metaBuilder = MetaData.builder(); + metaBuilder.put(IndexMetaData.builder("idx").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(0)); + MetaData metaData = metaBuilder.build(); + RoutingTable.Builder routingTableBuilder = RoutingTable.builder(); + routingTableBuilder.addAsNew(metaData.index("idx")); + + RoutingTable routingTable = routingTableBuilder.build(); + ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT) + .metaData(metaData).routingTable(routingTable).build(); + clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))) + .build(); + RoutingTable prevRoutingTable = routingTable; + routingTable = strategy.reroute(clusterState, "reroute", false).routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + + assertEquals(prevRoutingTable.index("idx").shards().size(), 1); + assertEquals(prevRoutingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + return clusterState; + } + + public void testSingleRetryOnIgnore() { + ClusterState clusterState = createInitialClusterState(); + RoutingTable routingTable = clusterState.routingTable(); + final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY); + // now fail it N-1 times + for (int i = 0; i < retries-1; i++) { + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i, + new UnsupportedOperationException())); + RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i+1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom" + i); + } + // now we go and check that we are actually stick to unassigned on the next failure + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", + new UnsupportedOperationException())); + RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); + + result = strategy.reroute(clusterState, new AllocationCommands(), false, true); // manual reroute should retry once + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); + + // now we go and check that we are actually stick to unassigned on the next failure ie. no retry + failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", + new UnsupportedOperationException())); + result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries+1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); + + } + + public void testFailedAllocation() { + ClusterState clusterState = createInitialClusterState(); + RoutingTable routingTable = clusterState.routingTable(); + final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY); + // now fail it N-1 times + for (int i = 0; i < retries-1; i++) { + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i, + new UnsupportedOperationException())); + RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i+1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom" + i); + } + // now we go and check that we are actually stick to unassigned on the next failure + { + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", + new UnsupportedOperationException())); + RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); + } + + // change the settings and ensure we can do another round of allocation for that index. + clusterState = ClusterState.builder(clusterState).routingTable(routingTable) + .metaData(MetaData.builder(clusterState.metaData()) + .put(IndexMetaData.builder(clusterState.metaData().index("idx")).settings( + Settings.builder().put(clusterState.metaData().index("idx").getSettings()).put("index.allocation.max_retries", + retries+1).build() + ).build(), true).build()).build(); + RoutingAllocation.Result result = strategy.reroute(clusterState, "settings changed", false); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + // good we are initializing and we are maintaining failure information + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); + + // now we start the shard + routingTable = strategy.applyStartedShards(clusterState, Collections.singletonList(routingTable.index("idx") + .shard(0).shards().get(0))).routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + + // all counters have been reset to 0 ie. no unassigned info + assertEquals(routingTable.index("idx").shards().size(), 1); + assertNull(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo()); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), STARTED); + + // now fail again and see if it has a new counter + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "ZOOOMG", + new UnsupportedOperationException())); + result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "ZOOOMG"); + } +} diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/NodeVersionAllocationDeciderTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/NodeVersionAllocationDeciderTests.java index 97a3003ab2f..d0fc64b4b6b 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/NodeVersionAllocationDeciderTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/NodeVersionAllocationDeciderTests.java @@ -337,7 +337,7 @@ public class NodeVersionAllocationDeciderTests extends ESAllocationTestCase { AllocationService strategy = new MockAllocationService(Settings.EMPTY, allocationDeciders, NoopGatewayAllocator.INSTANCE, new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE); - RoutingAllocation.Result result = strategy.reroute(state, new AllocationCommands(), true); + RoutingAllocation.Result result = strategy.reroute(state, new AllocationCommands(), true, false); // the two indices must stay as is, the replicas cannot move to oldNode2 because versions don't match state = ClusterState.builder(state).routingResult(result).build(); assertThat(result.routingTable().index(shard2.getIndex()).shardsWithState(ShardRoutingState.RELOCATING).size(), equalTo(0)); @@ -369,7 +369,7 @@ public class NodeVersionAllocationDeciderTests extends ESAllocationTestCase { AllocationService strategy = new MockAllocationService(Settings.EMPTY, allocationDeciders, NoopGatewayAllocator.INSTANCE, new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE); - RoutingAllocation.Result result = strategy.reroute(state, new AllocationCommands(), true); + RoutingAllocation.Result result = strategy.reroute(state, new AllocationCommands(), true, false); // Make sure that primary shards are only allocated on the new node for (int i = 0; i < numberOfShards; i++) { diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationTests.java index 28d916e20c1..61a72bc352a 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationTests.java @@ -283,7 +283,7 @@ public class ThrottlingAllocationTests extends ESAllocationTestCase { assertEquals(clusterState.getRoutingNodes().getOutgoingRecoveries("node2"), 0); assertEquals(clusterState.getRoutingNodes().getOutgoingRecoveries("node3"), 0); - RoutingAllocation.Result reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node1").iterator().next().shardId().id(), "node1", "node2"))); + RoutingAllocation.Result reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node1").iterator().next().shardId().id(), "node1", "node2")), false, false); assertEquals(reroute.explanations().explanations().size(), 1); assertEquals(reroute.explanations().explanations().get(0).decisions().type(), Decision.Type.YES); routingTable = reroute.routingTable(); @@ -296,7 +296,7 @@ public class ThrottlingAllocationTests extends ESAllocationTestCase { assertEquals(clusterState.getRoutingNodes().getOutgoingRecoveries("node3"), 0); // outgoing throttles - reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node3").iterator().next().shardId().id(), "node3", "node1")), true); + reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node3").iterator().next().shardId().id(), "node3", "node1")), true, false); assertEquals(reroute.explanations().explanations().size(), 1); assertEquals(reroute.explanations().explanations().get(0).decisions().type(), Decision.Type.THROTTLE); assertEquals(clusterState.getRoutingNodes().getIncomingRecoveries("node1"), 0); @@ -311,7 +311,7 @@ public class ThrottlingAllocationTests extends ESAllocationTestCase { assertThat(routingTable.shardsWithState(UNASSIGNED).size(), equalTo(0)); // incoming throttles - reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node3").iterator().next().shardId().id(), "node3", "node2")), true); + reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node3").iterator().next().shardId().id(), "node3", "node2")), true, false); assertEquals(reroute.explanations().explanations().size(), 1); assertEquals(reroute.explanations().explanations().get(0).decisions().type(), Decision.Type.THROTTLE); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java index 2c4e86ad4b1..579e87150a7 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java @@ -796,7 +796,7 @@ public class DiskThresholdDeciderTests extends ESAllocationTestCase { AllocationCommand relocate1 = new MoveAllocationCommand("test", 0, "node2", "node3"); AllocationCommands cmds = new AllocationCommands(relocate1); - routingTable = strategy.reroute(clusterState, cmds).routingTable(); + routingTable = strategy.reroute(clusterState, cmds, false, false).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); @@ -808,7 +808,7 @@ public class DiskThresholdDeciderTests extends ESAllocationTestCase { // node3, which will put it over the low watermark when it // completes, with shard relocations taken into account this should // throw an exception about not being able to complete - strategy.reroute(clusterState, cmds).routingTable(); + strategy.reroute(clusterState, cmds, false, false).routingTable(); fail("should not have been able to reroute the shard"); } catch (IllegalArgumentException e) { assertThat("can't allocated because there isn't enough room: " + e.getMessage(), @@ -876,7 +876,7 @@ public class DiskThresholdDeciderTests extends ESAllocationTestCase { ); ClusterState clusterState = ClusterState.builder(baseClusterState).routingTable(builder.build()).build(); RoutingAllocation routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, - System.nanoTime()); + System.nanoTime(), false); Decision decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation); assertThat(decision.type(), equalTo(Decision.Type.NO)); @@ -896,7 +896,8 @@ public class DiskThresholdDeciderTests extends ESAllocationTestCase { ) ); clusterState = ClusterState.builder(baseClusterState).routingTable(builder.build()).build(); - routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime()); + routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime(), + false); decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation); assertThat(decision.type(), equalTo(Decision.Type.YES)); @@ -992,7 +993,7 @@ public class DiskThresholdDeciderTests extends ESAllocationTestCase { ); ClusterState clusterState = ClusterState.builder(baseClusterState).routingTable(builder.build()).build(); RoutingAllocation routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, - System.nanoTime()); + System.nanoTime(), false); Decision decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation); // Two shards should start happily @@ -1051,7 +1052,8 @@ public class DiskThresholdDeciderTests extends ESAllocationTestCase { ); clusterState = ClusterState.builder(updateClusterState).routingTable(builder.build()).build(); - routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime()); + routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime(), + false); decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation); assertThat(decision.type(), equalTo(Decision.Type.YES)); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java index d9e9fb95445..008884cbb8d 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java @@ -136,7 +136,7 @@ public class DiskThresholdDeciderUnitTests extends ESTestCase { ImmutableOpenMap.Builder shardSizes = ImmutableOpenMap.builder(); shardSizes.put("[test][0][p]", 10L); // 10 bytes final ClusterInfo clusterInfo = new ClusterInfo(leastAvailableUsages.build(), mostAvailableUsage.build(), shardSizes.build(), ImmutableOpenMap.of()); - RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Settings.EMPTY, new AllocationDecider[]{decider}), clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime()); + RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Settings.EMPTY, new AllocationDecider[]{decider}), clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime(), false); assertEquals(mostAvailableUsage.toString(), Decision.YES, decider.canAllocate(test_0, new RoutingNode("node_0", node_0), allocation)); assertEquals(mostAvailableUsage.toString(), Decision.NO, decider.canAllocate(test_0, new RoutingNode("node_1", node_1), allocation)); } @@ -204,7 +204,7 @@ public class DiskThresholdDeciderUnitTests extends ESTestCase { shardSizes.put("[test][2][p]", 10L); final ClusterInfo clusterInfo = new ClusterInfo(leastAvailableUsages.build(), mostAvailableUsage.build(), shardSizes.build(), shardRoutingMap.build()); - RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Settings.EMPTY, new AllocationDecider[]{decider}), clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime()); + RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Settings.EMPTY, new AllocationDecider[]{decider}), clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime(), false); assertEquals(Decision.YES, decider.canRemain(test_0, new RoutingNode("node_0", node_0), allocation)); assertEquals(Decision.NO, decider.canRemain(test_1, new RoutingNode("node_1", node_1), allocation)); try { diff --git a/core/src/test/java/org/elasticsearch/gateway/PrimaryShardAllocatorTests.java b/core/src/test/java/org/elasticsearch/gateway/PrimaryShardAllocatorTests.java index 01e717e6fa9..ee1cf7280e7 100644 --- a/core/src/test/java/org/elasticsearch/gateway/PrimaryShardAllocatorTests.java +++ b/core/src/test/java/org/elasticsearch/gateway/PrimaryShardAllocatorTests.java @@ -346,7 +346,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase { .metaData(metaData) .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - return new RoutingAllocation(allocationDeciders, new RoutingNodes(state, false), state, null, System.nanoTime()); + return new RoutingAllocation(allocationDeciders, new RoutingNodes(state, false), state, null, System.nanoTime(), false); } /** @@ -425,7 +425,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase { .metaData(metaData) .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - return new RoutingAllocation(allocationDeciders, new RoutingNodes(state, false), state, null, System.nanoTime()); + return new RoutingAllocation(allocationDeciders, new RoutingNodes(state, false), state, null, System.nanoTime(), false); } /** @@ -444,7 +444,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase { .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - RoutingAllocation allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + RoutingAllocation allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); boolean changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(false)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1)); @@ -452,7 +452,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase { assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas testAllocator.addData(node1, 1, null, randomBoolean()); - allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(false)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1)); @@ -460,7 +460,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase { assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas testAllocator.addData(node2, 1, null, randomBoolean()); - allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(true)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(0)); @@ -485,7 +485,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase { .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - RoutingAllocation allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + RoutingAllocation allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); boolean changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(false)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1)); @@ -493,7 +493,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase { assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas testAllocator.addData(node1, 1, null, randomBoolean()); - allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(false)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1)); @@ -501,7 +501,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase { assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas testAllocator.addData(node2, 2, null, randomBoolean()); - allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(true)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(0)); @@ -525,7 +525,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase { .metaData(metaData) .routingTable(routingTableBuilder.build()) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, null, System.nanoTime()); + return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, null, System.nanoTime(), false); } class TestAllocator extends PrimaryShardAllocator { diff --git a/core/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorTests.java b/core/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorTests.java index 672c9de3d3e..20eb6286813 100644 --- a/core/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorTests.java +++ b/core/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorTests.java @@ -302,7 +302,7 @@ public class ReplicaShardAllocatorTests extends ESAllocationTestCase { .metaData(metaData) .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, ClusterInfo.EMPTY, System.nanoTime()); + return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, ClusterInfo.EMPTY, System.nanoTime(), false); } private RoutingAllocation onePrimaryOnNode1And1ReplicaRecovering(AllocationDeciders deciders) { @@ -324,7 +324,7 @@ public class ReplicaShardAllocatorTests extends ESAllocationTestCase { .metaData(metaData) .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, ClusterInfo.EMPTY, System.nanoTime()); + return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, ClusterInfo.EMPTY, System.nanoTime(), false); } class TestAllocator extends ReplicaShardAllocator { diff --git a/core/src/test/java/org/elasticsearch/indices/state/RareClusterStateIT.java b/core/src/test/java/org/elasticsearch/indices/state/RareClusterStateIT.java index dc533737886..81c50cc4f9c 100644 --- a/core/src/test/java/org/elasticsearch/indices/state/RareClusterStateIT.java +++ b/core/src/test/java/org/elasticsearch/indices/state/RareClusterStateIT.java @@ -103,7 +103,7 @@ public class RareClusterStateIT extends ESIntegTestCase { .nodes(DiscoveryNodes.EMPTY_NODES) .build(), false ); - RoutingAllocation routingAllocation = new RoutingAllocation(allocationDeciders, routingNodes, current, ClusterInfo.EMPTY, System.nanoTime()); + RoutingAllocation routingAllocation = new RoutingAllocation(allocationDeciders, routingNodes, current, ClusterInfo.EMPTY, System.nanoTime(), false); allocator.allocateUnassigned(routingAllocation); } diff --git a/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java b/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java index 5ed63b519a4..7ca30132a4a 100644 --- a/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java +++ b/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java @@ -640,7 +640,7 @@ public class SharedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCas assertAcked(client.admin().cluster().preparePutRepository("test-repo") .setType("fs").setSettings(Settings.builder().put("location", repositoryLocation))); - createIndex("test-idx"); + prepareCreate("test-idx").setSettings(Settings.builder().put("index.allocation.max_retries", Integer.MAX_VALUE)).get(); ensureGreen(); logger.info("--> indexing some data"); diff --git a/docs/reference/cluster/reroute.asciidoc b/docs/reference/cluster/reroute.asciidoc index 99e754df529..bb48a00fbe5 100644 --- a/docs/reference/cluster/reroute.asciidoc +++ b/docs/reference/cluster/reroute.asciidoc @@ -103,3 +103,16 @@ are available: To ensure that these implications are well-understood, this command requires the special field `accept_data_loss` to be explicitly set to `true` for it to work. + +[float] +=== Retry failed shards + +The cluster will attempt to allocate a shard a maximum of +`index.allocation.max_retries` times in a row (defaults to `5`), before giving +up and leaving the shard unallocated. This scenario can be caused by +structural problems such as having an analyzer which refers to a stopwords +file which doesn't exist on all nodes. + +Once the problem has been corrected, allocation can be manually retried by +calling the <> API with `?retry_failed`, which +will attempt a single retry round for these shards. \ No newline at end of file diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.reroute.json b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.reroute.json index 2ae42c089d3..8bb85ca087a 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.reroute.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.reroute.json @@ -16,6 +16,10 @@ "type" : "boolean", "description" : "Return an explanation of why the commands can or cannot be executed" }, + "retry_failed": { + "type" : "boolean", + "description" : "Retries allocation of shards that are blocked due to too many subsequent allocation failures" + }, "metric": { "type": "list", "options": ["_all", "blocks", "metadata", "nodes", "routing_table", "master_node", "version"],