diff --git a/core/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java b/core/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java index 28b62083d42..f986a5679a9 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/cluster/allocation/TransportClusterAllocationExplainAction.java @@ -250,7 +250,7 @@ public class TransportClusterAllocationExplainAction final ActionListener listener) { final RoutingNodes routingNodes = state.getRoutingNodes(); final RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, state, - clusterInfoService.getClusterInfo(), System.nanoTime()); + clusterInfoService.getClusterInfo(), System.nanoTime(), false); ShardRouting foundShard = null; if (request.useAnyUnassignedShard()) { diff --git a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java index a241f01ea28..dcc45ab21b9 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java +++ b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteRequest.java @@ -38,9 +38,10 @@ import java.io.IOException; * Request to submit cluster reroute allocation commands */ public class ClusterRerouteRequest extends AcknowledgedRequest { - AllocationCommands commands = new AllocationCommands(); - boolean dryRun; - boolean explain; + private AllocationCommands commands = new AllocationCommands(); + private boolean dryRun; + private boolean explain; + private boolean retryFailed; public ClusterRerouteRequest() { } @@ -81,6 +82,15 @@ public class ClusterRerouteRequest extends AcknowledgedRequestfalse). If true, the + * request will retry allocating shards that can't currently be allocated due to too many allocation failures. + */ + public ClusterRerouteRequest setRetryFailed(boolean retryFailed) { + this.retryFailed = retryFailed; + return this; + } + /** * Returns the current explain flag */ @@ -88,6 +98,14 @@ public class ClusterRerouteRequest extends AcknowledgedRequestfalse). If true, the + * request will retry allocating shards that can't currently be allocated due to too many allocation failures. + */ + public ClusterRerouteRequestBuilder setRetryFailed(boolean retryFailed) { + request.setRetryFailed(retryFailed); + return this; + } + /** * Sets the commands for the request to execute. */ diff --git a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java index e6116dbfbc4..b0b676f6e2e 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/cluster/reroute/TransportClusterRerouteAction.java @@ -33,6 +33,7 @@ import org.elasticsearch.cluster.routing.allocation.RoutingExplanations; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.Priority; import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; @@ -68,38 +69,55 @@ public class TransportClusterRerouteAction extends TransportMasterNodeAction listener) { - clusterService.submitStateUpdateTask("cluster_reroute (api)", new AckedClusterStateUpdateTask(Priority.IMMEDIATE, request, listener) { - - private volatile ClusterState clusterStateToSend; - private volatile RoutingExplanations explanations; - - @Override - protected ClusterRerouteResponse newResponse(boolean acknowledged) { - return new ClusterRerouteResponse(acknowledged, clusterStateToSend, explanations); - } - - @Override - public void onAckTimeout() { - listener.onResponse(new ClusterRerouteResponse(false, clusterStateToSend, new RoutingExplanations())); - } - - @Override - public void onFailure(String source, Throwable t) { - logger.debug("failed to perform [{}]", t, source); - super.onFailure(source, t); - } - - @Override - public ClusterState execute(ClusterState currentState) { - RoutingAllocation.Result routingResult = allocationService.reroute(currentState, request.commands, request.explain()); - ClusterState newState = ClusterState.builder(currentState).routingResult(routingResult).build(); - clusterStateToSend = newState; - explanations = routingResult.explanations(); - if (request.dryRun) { - return currentState; - } - return newState; - } - }); + clusterService.submitStateUpdateTask("cluster_reroute (api)", new ClusterRerouteResponseAckedClusterStateUpdateTask(logger, + allocationService, request, listener)); } -} \ No newline at end of file + + static class ClusterRerouteResponseAckedClusterStateUpdateTask extends AckedClusterStateUpdateTask { + + private final ClusterRerouteRequest request; + private final ActionListener listener; + private final ESLogger logger; + private final AllocationService allocationService; + private volatile ClusterState clusterStateToSend; + private volatile RoutingExplanations explanations; + + ClusterRerouteResponseAckedClusterStateUpdateTask(ESLogger logger, AllocationService allocationService, ClusterRerouteRequest request, + ActionListener listener) { + super(Priority.IMMEDIATE, request, listener); + this.request = request; + this.listener = listener; + this.logger = logger; + this.allocationService = allocationService; + } + + @Override + protected ClusterRerouteResponse newResponse(boolean acknowledged) { + return new ClusterRerouteResponse(acknowledged, clusterStateToSend, explanations); + } + + @Override + public void onAckTimeout() { + listener.onResponse(new ClusterRerouteResponse(false, clusterStateToSend, new RoutingExplanations())); + } + + @Override + public void onFailure(String source, Throwable t) { + logger.debug("failed to perform [{}]", t, source); + super.onFailure(source, t); + } + + @Override + public ClusterState execute(ClusterState currentState) { + RoutingAllocation.Result routingResult = allocationService.reroute(currentState, request.getCommands(), request.explain(), + request.isRetryFailed()); + ClusterState newState = ClusterState.builder(currentState).routingResult(routingResult).build(); + clusterStateToSend = newState; + explanations = routingResult.explanations(); + if (request.dryRun()) { + return currentState; + } + return newState; + } + } +} diff --git a/core/src/main/java/org/elasticsearch/cluster/ClusterModule.java b/core/src/main/java/org/elasticsearch/cluster/ClusterModule.java index 47dd2ce9ae6..a02e399ac0c 100644 --- a/core/src/main/java/org/elasticsearch/cluster/ClusterModule.java +++ b/core/src/main/java/org/elasticsearch/cluster/ClusterModule.java @@ -49,6 +49,7 @@ import org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDeci import org.elasticsearch.cluster.routing.allocation.decider.NodeVersionAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.RebalanceOnlyWhenActiveAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ReplicaAfterPrimaryActiveAllocationDecider; +import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.SameShardAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.SnapshotInProgressAllocationDecider; @@ -79,6 +80,7 @@ public class ClusterModule extends AbstractModule { new Setting<>("cluster.routing.allocation.type", BALANCED_ALLOCATOR, Function.identity(), Property.NodeScope); public static final List> DEFAULT_ALLOCATION_DECIDERS = Collections.unmodifiableList(Arrays.asList( + MaxRetryAllocationDecider.class, SameShardAllocationDecider.class, FilterAllocationDecider.class, ReplicaAfterPrimaryActiveAllocationDecider.class, diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java b/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java index 2670363364d..bc44cd1701c 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java @@ -48,7 +48,6 @@ public final class UnassignedInfo implements ToXContent, Writeable { public static final Setting INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING = Setting.timeSetting("index.unassigned.node_left.delayed_timeout", DEFAULT_DELAYED_NODE_LEFT_TIMEOUT, Property.Dynamic, Property.IndexScope); - /** * Reason why the shard is in unassigned state. *

@@ -103,7 +102,11 @@ public final class UnassignedInfo implements ToXContent, Writeable { /** * A better replica location is identified and causes the existing replica allocation to be cancelled. */ - REALLOCATED_REPLICA; + REALLOCATED_REPLICA, + /** + * Unassigned as a result of a failed primary while the replica was initializing. + */ + PRIMARY_FAILED; } private final Reason reason; @@ -112,6 +115,7 @@ public final class UnassignedInfo implements ToXContent, Writeable { private final long lastComputedLeftDelayNanos; // how long to delay shard allocation, not serialized (always positive, 0 means no delay) private final String message; private final Throwable failure; + private final int failedAllocations; /** * creates an UnassingedInfo object based **current** time @@ -120,7 +124,7 @@ public final class UnassignedInfo implements ToXContent, Writeable { * @param message more information about cause. **/ public UnassignedInfo(Reason reason, String message) { - this(reason, message, null, System.nanoTime(), System.currentTimeMillis()); + this(reason, message, null, reason == Reason.ALLOCATION_FAILED ? 1 : 0, System.nanoTime(), System.currentTimeMillis()); } /** @@ -130,13 +134,16 @@ public final class UnassignedInfo implements ToXContent, Writeable { * @param unassignedTimeNanos the time to use as the base for any delayed re-assignment calculation * @param unassignedTimeMillis the time of unassignment used to display to in our reporting. */ - public UnassignedInfo(Reason reason, @Nullable String message, @Nullable Throwable failure, long unassignedTimeNanos, long unassignedTimeMillis) { + public UnassignedInfo(Reason reason, @Nullable String message, @Nullable Throwable failure, int failedAllocations, long unassignedTimeNanos, long unassignedTimeMillis) { this.reason = reason; this.unassignedTimeMillis = unassignedTimeMillis; this.unassignedTimeNanos = unassignedTimeNanos; this.lastComputedLeftDelayNanos = 0L; this.message = message; this.failure = failure; + this.failedAllocations = failedAllocations; + assert (failedAllocations > 0) == (reason == Reason.ALLOCATION_FAILED): + "failedAllocations: " + failedAllocations + " for reason " + reason; assert !(message == null && failure != null) : "provide a message if a failure exception is provided"; } @@ -147,17 +154,19 @@ public final class UnassignedInfo implements ToXContent, Writeable { this.lastComputedLeftDelayNanos = newComputedLeftDelayNanos; this.message = unassignedInfo.message; this.failure = unassignedInfo.failure; + this.failedAllocations = unassignedInfo.failedAllocations; } public UnassignedInfo(StreamInput in) throws IOException { this.reason = Reason.values()[(int) in.readByte()]; this.unassignedTimeMillis = in.readLong(); // As System.nanoTime() cannot be compared across different JVMs, reset it to now. - // This means that in master failover situations, elapsed delay time is forgotten. + // This means that in master fail-over situations, elapsed delay time is forgotten. this.unassignedTimeNanos = System.nanoTime(); this.lastComputedLeftDelayNanos = 0L; this.message = in.readOptionalString(); this.failure = in.readThrowable(); + this.failedAllocations = in.readVInt(); } public void writeTo(StreamOutput out) throws IOException { @@ -166,12 +175,18 @@ public final class UnassignedInfo implements ToXContent, Writeable { // Do not serialize unassignedTimeNanos as System.nanoTime() cannot be compared across different JVMs out.writeOptionalString(message); out.writeThrowable(failure); + out.writeVInt(failedAllocations); } public UnassignedInfo readFrom(StreamInput in) throws IOException { return new UnassignedInfo(in); } + /** + * Returns the number of previously failed allocations of this shard. + */ + public int getNumFailedAllocations() { return failedAllocations; } + /** * The reason why the shard is unassigned. */ @@ -325,7 +340,11 @@ public final class UnassignedInfo implements ToXContent, Writeable { StringBuilder sb = new StringBuilder(); sb.append("[reason=").append(reason).append("]"); sb.append(", at[").append(DATE_TIME_FORMATTER.printer().print(unassignedTimeMillis)).append("]"); + if (failedAllocations > 0) { + sb.append(", failed_attempts[").append(failedAllocations).append("]"); + } String details = getDetails(); + if (details != null) { sb.append(", details[").append(details).append("]"); } @@ -342,6 +361,9 @@ public final class UnassignedInfo implements ToXContent, Writeable { builder.startObject("unassigned_info"); builder.field("reason", reason); builder.field("at", DATE_TIME_FORMATTER.printer().print(unassignedTimeMillis)); + if (failedAllocations > 0) { + builder.field("failed_attempts", failedAllocations); + } String details = getDetails(); if (details != null) { builder.field("details", details); diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java index e1bbbb7f4ab..d59113675d8 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java @@ -222,8 +222,10 @@ public class AllocationService extends AbstractComponent { List orderedFailedShards = new ArrayList<>(failedShards); orderedFailedShards.sort(Comparator.comparing(failedShard -> failedShard.shard.primary())); for (FailedRerouteAllocation.FailedShard failedShard : orderedFailedShards) { + UnassignedInfo unassignedInfo = failedShard.shard.unassignedInfo(); + final int failedAllocations = unassignedInfo != null ? unassignedInfo.getNumFailedAllocations() : 0; changed |= applyFailedShard(allocation, failedShard.shard, true, new UnassignedInfo(UnassignedInfo.Reason.ALLOCATION_FAILED, failedShard.message, failedShard.failure, - System.nanoTime(), System.currentTimeMillis())); + failedAllocations + 1, System.nanoTime(), System.currentTimeMillis())); } if (!changed) { return new RoutingAllocation.Result(false, clusterState.routingTable(), clusterState.metaData()); @@ -257,16 +259,13 @@ public class AllocationService extends AbstractComponent { .collect(Collectors.joining(", ")); } - public RoutingAllocation.Result reroute(ClusterState clusterState, AllocationCommands commands) { - return reroute(clusterState, commands, false); - } - - public RoutingAllocation.Result reroute(ClusterState clusterState, AllocationCommands commands, boolean explain) { + public RoutingAllocation.Result reroute(ClusterState clusterState, AllocationCommands commands, boolean explain, boolean retryFailed) { RoutingNodes routingNodes = getMutableRoutingNodes(clusterState); // we don't shuffle the unassigned shards here, to try and get as close as possible to // a consistent result of the effect the commands have on the routing // this allows systems to dry run the commands, see the resulting cluster state, and act on it - RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState, clusterInfoService.getClusterInfo(), currentNanoTime()); + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState, + clusterInfoService.getClusterInfo(), currentNanoTime(), retryFailed); // don't short circuit deciders, we want a full explanation allocation.debugDecision(true); // we ignore disable allocation, because commands are explicit @@ -305,7 +304,8 @@ public class AllocationService extends AbstractComponent { RoutingNodes routingNodes = getMutableRoutingNodes(clusterState); // shuffle the unassigned nodes, just so we won't have things like poison failed shards routingNodes.unassigned().shuffle(); - RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState, clusterInfoService.getClusterInfo(), currentNanoTime()); + RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState, + clusterInfoService.getClusterInfo(), currentNanoTime(), false); allocation.debugDecision(debug); if (!reroute(allocation)) { return new RoutingAllocation.Result(false, clusterState.routingTable(), clusterState.metaData()); @@ -437,7 +437,7 @@ public class AllocationService extends AbstractComponent { // now, go over all the shards routing on the node, and fail them for (ShardRouting shardRouting : node.copyShards()) { UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.NODE_LEFT, "node_left[" + node.nodeId() + "]", null, - allocation.getCurrentNanoTime(), System.currentTimeMillis()); + 0, allocation.getCurrentNanoTime(), System.currentTimeMillis()); applyFailedShard(allocation, shardRouting, false, unassignedInfo); } // its a dead node, remove it, note, its important to remove it *after* we apply failed shard @@ -457,8 +457,8 @@ public class AllocationService extends AbstractComponent { boolean changed = false; for (ShardRouting routing : replicas) { changed |= applyFailedShard(allocation, routing, false, - new UnassignedInfo(UnassignedInfo.Reason.ALLOCATION_FAILED, "primary failed while replica initializing", - null, allocation.getCurrentNanoTime(), System.currentTimeMillis())); + new UnassignedInfo(UnassignedInfo.Reason.PRIMARY_FAILED, "primary failed while replica initializing", + null, 0, allocation.getCurrentNanoTime(), System.currentTimeMillis())); } return changed; } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/FailedRerouteAllocation.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/FailedRerouteAllocation.java index a13862fed26..ef2e42eed76 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/FailedRerouteAllocation.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/FailedRerouteAllocation.java @@ -58,7 +58,7 @@ public class FailedRerouteAllocation extends RoutingAllocation { private final List failedShards; public FailedRerouteAllocation(AllocationDeciders deciders, RoutingNodes routingNodes, ClusterState clusterState, List failedShards, ClusterInfo clusterInfo) { - super(deciders, routingNodes, clusterState, clusterInfo, System.nanoTime()); + super(deciders, routingNodes, clusterState, clusterInfo, System.nanoTime(), false); this.failedShards = failedShards; } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java index 60ca3a8d5fd..0df8074e14c 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/RoutingAllocation.java @@ -134,6 +134,8 @@ public class RoutingAllocation { private boolean ignoreDisable = false; + private final boolean retryFailed; + private boolean debugDecision = false; private boolean hasPendingAsyncFetch = false; @@ -148,7 +150,7 @@ public class RoutingAllocation { * @param clusterState cluster state before rerouting * @param currentNanoTime the nano time to use for all delay allocation calculation (typically {@link System#nanoTime()}) */ - public RoutingAllocation(AllocationDeciders deciders, RoutingNodes routingNodes, ClusterState clusterState, ClusterInfo clusterInfo, long currentNanoTime) { + public RoutingAllocation(AllocationDeciders deciders, RoutingNodes routingNodes, ClusterState clusterState, ClusterInfo clusterInfo, long currentNanoTime, boolean retryFailed) { this.deciders = deciders; this.routingNodes = routingNodes; this.metaData = clusterState.metaData(); @@ -156,6 +158,7 @@ public class RoutingAllocation { this.customs = clusterState.customs(); this.clusterInfo = clusterInfo; this.currentNanoTime = currentNanoTime; + this.retryFailed = retryFailed; } /** returns the nano time captured at the beginning of the allocation. used to make sure all time based decisions are aligned */ @@ -297,4 +300,8 @@ public class RoutingAllocation { public void setHasPendingAsyncFetch() { this.hasPendingAsyncFetch = true; } + + public boolean isRetryFailed() { + return retryFailed; + } } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/StartedRerouteAllocation.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/StartedRerouteAllocation.java index e9570edd9c3..0f55ab4fda1 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/StartedRerouteAllocation.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/StartedRerouteAllocation.java @@ -36,7 +36,7 @@ public class StartedRerouteAllocation extends RoutingAllocation { private final List startedShards; public StartedRerouteAllocation(AllocationDeciders deciders, RoutingNodes routingNodes, ClusterState clusterState, List startedShards, ClusterInfo clusterInfo) { - super(deciders, routingNodes, clusterState, clusterInfo, System.nanoTime()); + super(deciders, routingNodes, clusterState, clusterInfo, System.nanoTime(), false); this.startedShards = startedShards; } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/AllocateEmptyPrimaryAllocationCommand.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/AllocateEmptyPrimaryAllocationCommand.java index d4191292cfc..c80afde3086 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/AllocateEmptyPrimaryAllocationCommand.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/AllocateEmptyPrimaryAllocationCommand.java @@ -125,7 +125,7 @@ public class AllocateEmptyPrimaryAllocationCommand extends BasePrimaryAllocation // we need to move the unassigned info back to treat it as if it was index creation unassignedInfoToUpdate = new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, "force empty allocation from previous reason " + shardRouting.unassignedInfo().getReason() + ", " + shardRouting.unassignedInfo().getMessage(), - shardRouting.unassignedInfo().getFailure(), System.nanoTime(), System.currentTimeMillis()); + shardRouting.unassignedInfo().getFailure(), 0, System.nanoTime(), System.currentTimeMillis()); } initializeUnassignedShard(allocation, routingNodes, routingNode, shardRouting, unassignedInfoToUpdate); diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java new file mode 100644 index 00000000000..6a8a0ccc5fa --- /dev/null +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/MaxRetryAllocationDecider.java @@ -0,0 +1,83 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.cluster.routing.allocation.decider; + +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.cluster.routing.RoutingNode; +import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.UnassignedInfo; +import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.settings.Setting; +import org.elasticsearch.common.settings.Settings; + +/** + * An allocation decider that prevents shards from being allocated on any node if the shards allocation has been retried N times without + * success. This means if a shard has been INITIALIZING N times in a row without being moved to STARTED the shard will be ignored until + * the setting for index.allocation.max_retry is raised. The default value is 5. + * Note: This allocation decider also allows allocation of repeatedly failing shards when the /_cluster/reroute?retry_failed=true + * API is manually invoked. This allows single retries without raising the limits. + * + * @see RoutingAllocation#isRetryFailed() + */ +public class MaxRetryAllocationDecider extends AllocationDecider { + + public static final Setting SETTING_ALLOCATION_MAX_RETRY = Setting.intSetting("index.allocation.max_retries", 5, 0, + Setting.Property.Dynamic, Setting.Property.IndexScope); + + public static final String NAME = "max_retry"; + + /** + * Initializes a new {@link MaxRetryAllocationDecider} + * + * @param settings {@link Settings} used by this {@link AllocationDecider} + */ + @Inject + public MaxRetryAllocationDecider(Settings settings) { + super(settings); + } + + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingAllocation allocation) { + UnassignedInfo unassignedInfo = shardRouting.unassignedInfo(); + if (unassignedInfo != null && unassignedInfo.getNumFailedAllocations() > 0) { + final IndexMetaData indexMetaData = allocation.metaData().getIndexSafe(shardRouting.index()); + final int maxRetry = SETTING_ALLOCATION_MAX_RETRY.get(indexMetaData.getSettings()); + if (allocation.isRetryFailed()) { // manual allocation - retry + // if we are called via the _reroute API we ignore the failure counter and try to allocate + // this improves the usability since people don't need to raise the limits to issue retries since a simple _reroute call is + // enough to manually retry. + return allocation.decision(Decision.YES, NAME, "shard has already failed allocating [" + + unassignedInfo.getNumFailedAllocations() + "] times vs. [" + maxRetry + "] retries allowed " + + unassignedInfo.toString() + " - retrying once on manual allocation"); + } else if (unassignedInfo.getNumFailedAllocations() >= maxRetry) { + return allocation.decision(Decision.NO, NAME, "shard has already failed allocating [" + + unassignedInfo.getNumFailedAllocations() + "] times vs. [" + maxRetry + "] retries allowed " + + unassignedInfo.toString() + " - manually call [/_cluster/reroute?retry_failed=true] to retry"); + } + } + return allocation.decision(Decision.YES, NAME, "shard has no previous failures"); + } + + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return canAllocate(shardRouting, allocation); + } +} diff --git a/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index 9ebf6853166..203d1db76b3 100644 --- a/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -21,6 +21,7 @@ package org.elasticsearch.common.settings; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.routing.UnassignedInfo; import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider; +import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.gateway.PrimaryShardAllocator; @@ -40,7 +41,6 @@ import org.elasticsearch.index.similarity.SimilarityService; import org.elasticsearch.index.store.FsDirectoryService; import org.elasticsearch.index.store.IndexStore; import org.elasticsearch.index.store.Store; -import org.elasticsearch.index.IndexWarmer; import org.elasticsearch.indices.IndicesRequestCache; import java.util.Arrays; @@ -59,6 +59,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { public static final Predicate INDEX_SETTINGS_KEY_PREDICATE = (s) -> s.startsWith(IndexMetaData.INDEX_SETTING_PREFIX); public static final Set> BUILT_IN_INDEX_SETTINGS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList( + MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY, IndexSettings.INDEX_TTL_DISABLE_PURGE_SETTING, IndexStore.INDEX_STORE_THROTTLE_TYPE_SETTING, IndexStore.INDEX_STORE_THROTTLE_MAX_BYTES_PER_SEC_SETTING, diff --git a/core/src/main/java/org/elasticsearch/gateway/ReplicaShardAllocator.java b/core/src/main/java/org/elasticsearch/gateway/ReplicaShardAllocator.java index d2e3d7f42cf..8b6e425c26a 100644 --- a/core/src/main/java/org/elasticsearch/gateway/ReplicaShardAllocator.java +++ b/core/src/main/java/org/elasticsearch/gateway/ReplicaShardAllocator.java @@ -108,7 +108,7 @@ public abstract class ReplicaShardAllocator extends AbstractComponent { currentNode, nodeWithHighestMatch); it.moveToUnassigned(new UnassignedInfo(UnassignedInfo.Reason.REALLOCATED_REPLICA, "existing allocation of replica to [" + currentNode + "] cancelled, sync id match found on node [" + nodeWithHighestMatch + "]", - null, allocation.getCurrentNanoTime(), System.currentTimeMillis())); + null, 0, allocation.getCurrentNanoTime(), System.currentTimeMillis())); changed = true; } } diff --git a/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/reroute/RestClusterRerouteAction.java b/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/reroute/RestClusterRerouteAction.java index 8a4afd89ac4..b34c6726c09 100644 --- a/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/reroute/RestClusterRerouteAction.java +++ b/core/src/main/java/org/elasticsearch/rest/action/admin/cluster/reroute/RestClusterRerouteAction.java @@ -64,6 +64,7 @@ public class RestClusterRerouteAction extends BaseRestHandler { public void handleRequest(final RestRequest request, final RestChannel channel, final Client client) throws Exception { final ClusterRerouteRequest clusterRerouteRequest = Requests.clusterRerouteRequest(); clusterRerouteRequest.dryRun(request.paramAsBoolean("dry_run", clusterRerouteRequest.dryRun())); + clusterRerouteRequest.setRetryFailed(request.paramAsBoolean("retry_failed", clusterRerouteRequest.isRetryFailed())); clusterRerouteRequest.explain(request.paramAsBoolean("explain", clusterRerouteRequest.explain())); clusterRerouteRequest.timeout(request.paramAsTime("timeout", clusterRerouteRequest.timeout())); clusterRerouteRequest.masterNodeTimeout(request.paramAsTime("master_timeout", clusterRerouteRequest.masterNodeTimeout())); diff --git a/core/src/test/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteTests.java b/core/src/test/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteTests.java new file mode 100644 index 00000000000..927f572487c --- /dev/null +++ b/core/src/test/java/org/elasticsearch/action/admin/cluster/reroute/ClusterRerouteTests.java @@ -0,0 +1,181 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.action.admin.cluster.reroute; + +import org.elasticsearch.Version; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.EmptyClusterInfoService; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.cluster.routing.RoutingTable; +import org.elasticsearch.cluster.routing.allocation.AllocationService; +import org.elasticsearch.cluster.routing.allocation.FailedRerouteAllocation; +import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; +import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; +import org.elasticsearch.cluster.routing.allocation.command.AllocateEmptyPrimaryAllocationCommand; +import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders; +import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.io.stream.NamedWriteableAwareStreamInput; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.network.NetworkModule; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.test.ESAllocationTestCase; +import org.elasticsearch.test.gateway.NoopGatewayAllocator; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.atomic.AtomicReference; + +import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING; +import static org.elasticsearch.cluster.routing.ShardRoutingState.UNASSIGNED; + +public class ClusterRerouteTests extends ESAllocationTestCase { + + public void testSerializeRequest() throws IOException { + ClusterRerouteRequest req = new ClusterRerouteRequest(); + req.setRetryFailed(randomBoolean()); + req.dryRun(randomBoolean()); + req.explain(randomBoolean()); + req.commands(new AllocateEmptyPrimaryAllocationCommand("foo", 1, "bar", randomBoolean())); + req.timeout(TimeValue.timeValueMillis(randomIntBetween(0, 100))); + BytesStreamOutput out = new BytesStreamOutput(); + req.writeTo(out); + BytesReference bytes = out.bytes(); + NamedWriteableRegistry namedWriteableRegistry = new NamedWriteableRegistry(); + new NetworkModule(null, Settings.EMPTY, true, namedWriteableRegistry); + StreamInput wrap = new NamedWriteableAwareStreamInput(StreamInput.wrap(bytes.toBytes()), + namedWriteableRegistry); + ClusterRerouteRequest deserializedReq = new ClusterRerouteRequest(); + deserializedReq.readFrom(wrap); + + assertEquals(req.isRetryFailed(), deserializedReq.isRetryFailed()); + assertEquals(req.dryRun(), deserializedReq.dryRun()); + assertEquals(req.explain(), deserializedReq.explain()); + assertEquals(req.timeout(), deserializedReq.timeout()); + assertEquals(1, deserializedReq.getCommands().commands().size()); // allocation commands have their own tests + assertEquals(req.getCommands().commands().size(), deserializedReq.getCommands().commands().size()); + } + + public void testClusterStateUpdateTask() { + AllocationService allocationService = new AllocationService(Settings.builder().build(), new AllocationDeciders(Settings.EMPTY, + Collections.singleton(new MaxRetryAllocationDecider(Settings.EMPTY))), + NoopGatewayAllocator.INSTANCE, new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE); + ClusterState clusterState = createInitialClusterState(allocationService); + ClusterRerouteRequest req = new ClusterRerouteRequest(); + req.dryRun(true); + AtomicReference responseRef = new AtomicReference<>(); + ActionListener responseActionListener = new ActionListener() { + @Override + public void onResponse(ClusterRerouteResponse clusterRerouteResponse) { + responseRef.set(clusterRerouteResponse); + } + + @Override + public void onFailure(Throwable e) { + + } + }; + TransportClusterRerouteAction.ClusterRerouteResponseAckedClusterStateUpdateTask task = + new TransportClusterRerouteAction.ClusterRerouteResponseAckedClusterStateUpdateTask(logger, allocationService, req, + responseActionListener ); + ClusterState execute = task.execute(clusterState); + assertSame(execute, clusterState); // dry-run + task.onAllNodesAcked(null); + assertNotSame(responseRef.get().getState(), execute); + + req.dryRun(false);// now we allocate + + final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY); + // now fail it N-1 times + for (int i = 0; i < retries; i++) { + ClusterState newState = task.execute(clusterState); + assertNotSame(newState, clusterState); // dry-run=false + clusterState = newState; + RoutingTable routingTable = clusterState.routingTable(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i); + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i, + new UnsupportedOperationException())); + RoutingAllocation.Result result = allocationService.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + clusterState = ClusterState.builder(clusterState).routingTable(result.routingTable()).build(); + routingTable = clusterState.routingTable(); + assertEquals(routingTable.index("idx").shards().size(), 1); + if (i == retries-1) { + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + } else { + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + } + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i+1); + } + + + // without retry_failed we won't allocate that shard + ClusterState newState = task.execute(clusterState); + assertNotSame(newState, clusterState); // dry-run=false + task.onAllNodesAcked(null); + assertSame(responseRef.get().getState(), newState); + RoutingTable routingTable = clusterState.routingTable(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); + + req.setRetryFailed(true); // now we manually retry and get the shard back into initializing + newState = task.execute(clusterState); + assertNotSame(newState, clusterState); // dry-run=false + clusterState = newState; + routingTable = clusterState.routingTable(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); + } + + private ClusterState createInitialClusterState(AllocationService service) { + MetaData.Builder metaBuilder = MetaData.builder(); + metaBuilder.put(IndexMetaData.builder("idx").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(0)); + MetaData metaData = metaBuilder.build(); + RoutingTable.Builder routingTableBuilder = RoutingTable.builder(); + routingTableBuilder.addAsNew(metaData.index("idx")); + + RoutingTable routingTable = routingTableBuilder.build(); + ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT) + .metaData(metaData).routingTable(routingTable).build(); + clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))) + .build(); + RoutingTable prevRoutingTable = routingTable; + routingTable = service.reroute(clusterState, "reroute").routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + + assertEquals(prevRoutingTable.index("idx").shards().size(), 1); + assertEquals(prevRoutingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + return clusterState; + } +} diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java index 708f8ca5079..f46224570b0 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java @@ -64,7 +64,8 @@ public class UnassignedInfoTests extends ESAllocationTestCase { UnassignedInfo.Reason.NODE_LEFT, UnassignedInfo.Reason.REROUTE_CANCELLED, UnassignedInfo.Reason.REINITIALIZED, - UnassignedInfo.Reason.REALLOCATED_REPLICA}; + UnassignedInfo.Reason.REALLOCATED_REPLICA, + UnassignedInfo.Reason.PRIMARY_FAILED}; for (int i = 0; i < order.length; i++) { assertThat(order[i].ordinal(), equalTo(i)); } @@ -72,7 +73,10 @@ public class UnassignedInfoTests extends ESAllocationTestCase { } public void testSerialization() throws Exception { - UnassignedInfo meta = new UnassignedInfo(RandomPicks.randomFrom(random(), UnassignedInfo.Reason.values()), randomBoolean() ? randomAsciiOfLength(4) : null); + UnassignedInfo.Reason reason = RandomPicks.randomFrom(random(), UnassignedInfo.Reason.values()); + UnassignedInfo meta = reason == UnassignedInfo.Reason.ALLOCATION_FAILED ? + new UnassignedInfo(reason, randomBoolean() ? randomAsciiOfLength(4) : null, null, randomIntBetween(1, 100), System.nanoTime(), System.currentTimeMillis()): + new UnassignedInfo(reason, randomBoolean() ? randomAsciiOfLength(4) : null); BytesStreamOutput out = new BytesStreamOutput(); meta.writeTo(out); out.close(); @@ -82,6 +86,7 @@ public class UnassignedInfoTests extends ESAllocationTestCase { assertThat(read.getUnassignedTimeInMillis(), equalTo(meta.getUnassignedTimeInMillis())); assertThat(read.getMessage(), equalTo(meta.getMessage())); assertThat(read.getDetails(), equalTo(meta.getDetails())); + assertThat(read.getNumFailedAllocations(), equalTo(meta.getNumFailedAllocations())); } public void testIndexCreated() { @@ -273,7 +278,10 @@ public class UnassignedInfoTests extends ESAllocationTestCase { public void testUnassignedDelayOnlyNodeLeftNonNodeLeftReason() throws Exception { EnumSet reasons = EnumSet.allOf(UnassignedInfo.Reason.class); reasons.remove(UnassignedInfo.Reason.NODE_LEFT); - UnassignedInfo unassignedInfo = new UnassignedInfo(RandomPicks.randomFrom(random(), reasons), null); + UnassignedInfo.Reason reason = RandomPicks.randomFrom(random(), reasons); + UnassignedInfo unassignedInfo = reason == UnassignedInfo.Reason.ALLOCATION_FAILED ? + new UnassignedInfo(reason, null, null, 1, System.nanoTime(), System.currentTimeMillis()): + new UnassignedInfo(reason, null); unassignedInfo = unassignedInfo.updateDelay(unassignedInfo.getUnassignedTimeInNanos() + 1, // add 1 tick delay Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "10h").build(), Settings.EMPTY); long delay = unassignedInfo.getLastComputedLeftDelayNanos(); @@ -287,7 +295,7 @@ public class UnassignedInfoTests extends ESAllocationTestCase { */ public void testLeftDelayCalculation() throws Exception { final long baseTime = System.nanoTime(); - UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.NODE_LEFT, "test", null, baseTime, System.currentTimeMillis()); + UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.NODE_LEFT, "test", null, 0, baseTime, System.currentTimeMillis()); final long totalDelayNanos = TimeValue.timeValueMillis(10).nanos(); final Settings settings = Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), TimeValue.timeValueNanos(totalDelayNanos)).build(); unassignedInfo = unassignedInfo.updateDelay(baseTime, settings, Settings.EMPTY); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java index 7aa8576ece3..b63692e0d2a 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationCommandsTests.java @@ -94,7 +94,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { } else { toNodeId = "node1"; } - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, existingNodeId, toNodeId))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, existingNodeId, toNodeId)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node(existingNodeId).iterator().next().state(), equalTo(ShardRoutingState.RELOCATING)); @@ -148,7 +148,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> allocating to non-existent node, should fail"); try { - allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand(index, shardId.id(), "node42"))); + allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand(index, shardId.id(), "node42")), false, false); fail("expected IllegalArgumentException when allocating to non-existing node"); } catch (IllegalArgumentException e) { assertThat(e.getMessage(), containsString("failed to resolve [node42], no matching nodes")); @@ -156,7 +156,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> allocating to non-data node, should fail"); try { - allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand(index, shardId.id(), "node4"))); + allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand(index, shardId.id(), "node4")), false, false); fail("expected IllegalArgumentException when allocating to non-data node"); } catch (IllegalArgumentException e) { assertThat(e.getMessage(), containsString("allocation can only be done on data nodes")); @@ -164,7 +164,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> allocating non-existing shard, should fail"); try { - allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test", 1, "node2"))); + allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test", 1, "node2")), false, false); fail("expected ShardNotFoundException when allocating non-existing shard"); } catch (ShardNotFoundException e) { assertThat(e.getMessage(), containsString("no such shard")); @@ -172,7 +172,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> allocating non-existing index, should fail"); try { - allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test2", 0, "node2"))); + allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test2", 0, "node2")), false, false); fail("expected ShardNotFoundException when allocating non-existing index"); } catch (IndexNotFoundException e) { assertThat(e.getMessage(), containsString("no such index")); @@ -180,7 +180,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> allocating empty primary with acceptDataLoss flag set to false"); try { - allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", false))); + allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", false)), false, false); fail("expected IllegalArgumentException when allocating empty primary with acceptDataLoss flag set to false"); } catch (IllegalArgumentException e) { assertThat(e.getMessage(), containsString("allocating an empty primary for " + shardId + " can result in data loss. Please confirm by setting the accept_data_loss parameter to true")); @@ -188,14 +188,14 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> allocating stale primary with acceptDataLoss flag set to false"); try { - allocation.reroute(clusterState, new AllocationCommands(new AllocateStalePrimaryAllocationCommand(index, shardId.id(), "node1", false))); + allocation.reroute(clusterState, new AllocationCommands(new AllocateStalePrimaryAllocationCommand(index, shardId.id(), "node1", false)), false, false); fail("expected IllegalArgumentException when allocating stale primary with acceptDataLoss flag set to false"); } catch (IllegalArgumentException e) { assertThat(e.getMessage(), containsString("allocating an empty primary for " + shardId + " can result in data loss. Please confirm by setting the accept_data_loss parameter to true")); } logger.info("--> allocating empty primary with acceptDataLoss flag set to true"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", true))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", true)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -211,13 +211,13 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> allocate the replica shard on the primary shard node, should fail"); try { - allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node1"))); + allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node1")), false, false); fail("expected IllegalArgumentException when allocating replica shard on the primary shard node"); } catch (IllegalArgumentException e) { } logger.info("--> allocate the replica shard on on the second node"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2"))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2")), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -236,7 +236,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> verify that we fail when there are no unassigned shards"); try { - allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test", 0, "node3"))); + allocation.reroute(clusterState, new AllocationCommands(randomAllocateCommand("test", 0, "node3")), false, false); fail("expected IllegalArgumentException when allocating shard while no unassigned shard available"); } catch (IllegalArgumentException e) { } @@ -268,7 +268,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(0)); logger.info("--> allocating empty primary shard with accept_data_loss flag set to true"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", true))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateEmptyPrimaryAllocationCommand("test", 0, "node1", true)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -277,7 +277,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> cancel primary allocation, make sure it fails..."); try { - allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false))); + allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false)), false, false); fail(); } catch (IllegalArgumentException e) { } @@ -291,13 +291,13 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> cancel primary allocation, make sure it fails..."); try { - allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false))); + allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false)), false, false); fail(); } catch (IllegalArgumentException e) { } logger.info("--> allocate the replica shard on on the second node"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2"))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2")), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -306,7 +306,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { assertThat(clusterState.getRoutingNodes().node("node2").shardsWithState(INITIALIZING).size(), equalTo(1)); logger.info("--> cancel the relocation allocation"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node2", false))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node2", false)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -315,7 +315,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(0)); logger.info("--> allocate the replica shard on on the second node"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2"))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2")), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -325,7 +325,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> cancel the primary being replicated, make sure it fails"); try { - allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false))); + allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", false)), false, false); fail(); } catch (IllegalArgumentException e) { } @@ -339,7 +339,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { assertThat(clusterState.getRoutingNodes().node("node2").shardsWithState(STARTED).size(), equalTo(1)); logger.info("--> cancel allocation of the replica shard"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node2", false))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node2", false)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -348,7 +348,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(0)); logger.info("--> allocate the replica shard on on the second node"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2"))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new AllocateReplicaAllocationCommand("test", 0, "node2")), false, false); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(rerouteResult.changed(), equalTo(true)); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); @@ -364,7 +364,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { assertThat(clusterState.getRoutingNodes().node("node2").shardsWithState(STARTED).size(), equalTo(1)); logger.info("--> move the replica shard"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, "node2", "node3"))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, "node2", "node3")), false, false); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); @@ -374,7 +374,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { assertThat(clusterState.getRoutingNodes().node("node3").shardsWithState(INITIALIZING).size(), equalTo(1)); logger.info("--> cancel the move of the replica shard"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node3", false))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node3", false)), false, false); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().node("node1").shardsWithState(STARTED).size(), equalTo(1)); @@ -383,7 +383,7 @@ public class AllocationCommandsTests extends ESAllocationTestCase { logger.info("--> cancel the primary allocation (with allow_primary set to true)"); - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", true))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new CancelAllocationCommand("test", 0, "node1", true)), false, false); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(rerouteResult.changed(), equalTo(true)); logger.error(clusterState.prettyPrint()); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AwarenessAllocationTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AwarenessAllocationTests.java index ee993bf3ebd..805ab0321ba 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AwarenessAllocationTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/AwarenessAllocationTests.java @@ -868,7 +868,7 @@ public class AwarenessAllocationTests extends ESAllocationTestCase { } commands.add(new MoveAllocationCommand("test", 0, primaryNode, "A-4")); - routingTable = strategy.reroute(clusterState, commands).routingTable(); + routingTable = strategy.reroute(clusterState, commands, false, false).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(0)); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/DeadNodesAllocationTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/DeadNodesAllocationTests.java index 11b78d2ae6a..b14aeca890e 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/DeadNodesAllocationTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/DeadNodesAllocationTests.java @@ -149,8 +149,8 @@ public class DeadNodesAllocationTests extends ESAllocationTestCase { logger.info("--> moving primary shard to node3"); rerouteResult = allocation.reroute(clusterState, new AllocationCommands( - new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")) - ); + new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")), + false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(RELOCATING)); @@ -223,8 +223,8 @@ public class DeadNodesAllocationTests extends ESAllocationTestCase { logger.info("--> moving primary shard to node3"); rerouteResult = allocation.reroute(clusterState, new AllocationCommands( - new MoveAllocationCommand("test",0 , clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")) - ); + new MoveAllocationCommand("test",0 , clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")), + false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(RELOCATING)); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ExpectedShardSizeAllocationTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ExpectedShardSizeAllocationTests.java index bfa27a36d8b..29644f07944 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ExpectedShardSizeAllocationTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ExpectedShardSizeAllocationTests.java @@ -149,7 +149,7 @@ public class ExpectedShardSizeAllocationTests extends ESAllocationTestCase { } else { toNodeId = "node1"; } - rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, existingNodeId, toNodeId))); + rerouteResult = allocation.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", 0, existingNodeId, toNodeId)), false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertEquals(clusterState.getRoutingNodes().node(existingNodeId).iterator().next().state(), ShardRoutingState.RELOCATING); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedShardsRoutingTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedShardsRoutingTests.java index 58e2397b043..e859c5811c3 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedShardsRoutingTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedShardsRoutingTests.java @@ -109,8 +109,8 @@ public class FailedShardsRoutingTests extends ESAllocationTestCase { logger.info("--> moving primary shard to node3"); rerouteResult = allocation.reroute(clusterState, new AllocationCommands( - new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")) - ); + new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")), + false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(RELOCATING)); @@ -125,8 +125,8 @@ public class FailedShardsRoutingTests extends ESAllocationTestCase { logger.info("--> moving primary shard to node3"); rerouteResult = allocation.reroute(clusterState, new AllocationCommands( - new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")) - ); + new MoveAllocationCommand("test", 0, clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId(), "node3")), + false, false); assertThat(rerouteResult.changed(), equalTo(true)); clusterState = ClusterState.builder(clusterState).routingTable(rerouteResult.routingTable()).build(); assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(RELOCATING)); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java new file mode 100644 index 00000000000..f76851cfef9 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/MaxRetryAllocationDeciderTests.java @@ -0,0 +1,210 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.cluster.routing.allocation; + +import org.elasticsearch.Version; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.EmptyClusterInfoService; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.cluster.routing.RoutingTable; +import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; +import org.elasticsearch.cluster.routing.allocation.command.AllocateEmptyPrimaryAllocationCommand; +import org.elasticsearch.cluster.routing.allocation.command.AllocateReplicaAllocationCommand; +import org.elasticsearch.cluster.routing.allocation.command.AllocationCommands; +import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders; +import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.test.ESAllocationTestCase; +import org.elasticsearch.test.gateway.NoopGatewayAllocator; + +import java.util.Collections; +import java.util.List; + +import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING; +import static org.elasticsearch.cluster.routing.ShardRoutingState.STARTED; +import static org.elasticsearch.cluster.routing.ShardRoutingState.UNASSIGNED; + +public class MaxRetryAllocationDeciderTests extends ESAllocationTestCase { + + private AllocationService strategy; + + @Override + public void setUp() throws Exception { + super.setUp(); + strategy = new AllocationService(Settings.builder().build(), new AllocationDeciders(Settings.EMPTY, + Collections.singleton(new MaxRetryAllocationDecider(Settings.EMPTY))), + NoopGatewayAllocator.INSTANCE, new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE); + } + + private ClusterState createInitialClusterState() { + MetaData.Builder metaBuilder = MetaData.builder(); + metaBuilder.put(IndexMetaData.builder("idx").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(0)); + MetaData metaData = metaBuilder.build(); + RoutingTable.Builder routingTableBuilder = RoutingTable.builder(); + routingTableBuilder.addAsNew(metaData.index("idx")); + + RoutingTable routingTable = routingTableBuilder.build(); + ClusterState clusterState = ClusterState.builder(org.elasticsearch.cluster.ClusterName.DEFAULT) + .metaData(metaData).routingTable(routingTable).build(); + clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))) + .build(); + RoutingTable prevRoutingTable = routingTable; + routingTable = strategy.reroute(clusterState, "reroute", false).routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + + assertEquals(prevRoutingTable.index("idx").shards().size(), 1); + assertEquals(prevRoutingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + return clusterState; + } + + public void testSingleRetryOnIgnore() { + ClusterState clusterState = createInitialClusterState(); + RoutingTable routingTable = clusterState.routingTable(); + final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY); + // now fail it N-1 times + for (int i = 0; i < retries-1; i++) { + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i, + new UnsupportedOperationException())); + RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i+1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom" + i); + } + // now we go and check that we are actually stick to unassigned on the next failure + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", + new UnsupportedOperationException())); + RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); + + result = strategy.reroute(clusterState, new AllocationCommands(), false, true); // manual reroute should retry once + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); + + // now we go and check that we are actually stick to unassigned on the next failure ie. no retry + failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", + new UnsupportedOperationException())); + result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries+1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); + + } + + public void testFailedAllocation() { + ClusterState clusterState = createInitialClusterState(); + RoutingTable routingTable = clusterState.routingTable(); + final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY); + // now fail it N-1 times + for (int i = 0; i < retries-1; i++) { + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i, + new UnsupportedOperationException())); + RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i+1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom" + i); + } + // now we go and check that we are actually stick to unassigned on the next failure + { + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", + new UnsupportedOperationException())); + RoutingAllocation.Result result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); + } + + // change the settings and ensure we can do another round of allocation for that index. + clusterState = ClusterState.builder(clusterState).routingTable(routingTable) + .metaData(MetaData.builder(clusterState.metaData()) + .put(IndexMetaData.builder(clusterState.metaData().index("idx")).settings( + Settings.builder().put(clusterState.metaData().index("idx").getSettings()).put("index.allocation.max_retries", + retries+1).build() + ).build(), true).build()).build(); + RoutingAllocation.Result result = strategy.reroute(clusterState, "settings changed", false); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + // good we are initializing and we are maintaining failure information + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "boom"); + + // now we start the shard + routingTable = strategy.applyStartedShards(clusterState, Collections.singletonList(routingTable.index("idx") + .shard(0).shards().get(0))).routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + + // all counters have been reset to 0 ie. no unassigned info + assertEquals(routingTable.index("idx").shards().size(), 1); + assertNull(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo()); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), STARTED); + + // now fail again and see if it has a new counter + List failedShards = Collections.singletonList( + new FailedRerouteAllocation.FailedShard(routingTable.index("idx").shard(0).shards().get(0), "ZOOOMG", + new UnsupportedOperationException())); + result = strategy.applyFailedShards(clusterState, failedShards); + assertTrue(result.changed()); + routingTable = result.routingTable(); + clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); + assertEquals(routingTable.index("idx").shards().size(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), 1); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); + assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getMessage(), "ZOOOMG"); + } +} diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/NodeVersionAllocationDeciderTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/NodeVersionAllocationDeciderTests.java index 97a3003ab2f..d0fc64b4b6b 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/NodeVersionAllocationDeciderTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/NodeVersionAllocationDeciderTests.java @@ -337,7 +337,7 @@ public class NodeVersionAllocationDeciderTests extends ESAllocationTestCase { AllocationService strategy = new MockAllocationService(Settings.EMPTY, allocationDeciders, NoopGatewayAllocator.INSTANCE, new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE); - RoutingAllocation.Result result = strategy.reroute(state, new AllocationCommands(), true); + RoutingAllocation.Result result = strategy.reroute(state, new AllocationCommands(), true, false); // the two indices must stay as is, the replicas cannot move to oldNode2 because versions don't match state = ClusterState.builder(state).routingResult(result).build(); assertThat(result.routingTable().index(shard2.getIndex()).shardsWithState(ShardRoutingState.RELOCATING).size(), equalTo(0)); @@ -369,7 +369,7 @@ public class NodeVersionAllocationDeciderTests extends ESAllocationTestCase { AllocationService strategy = new MockAllocationService(Settings.EMPTY, allocationDeciders, NoopGatewayAllocator.INSTANCE, new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE); - RoutingAllocation.Result result = strategy.reroute(state, new AllocationCommands(), true); + RoutingAllocation.Result result = strategy.reroute(state, new AllocationCommands(), true, false); // Make sure that primary shards are only allocated on the new node for (int i = 0; i < numberOfShards; i++) { diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationTests.java index 28d916e20c1..61a72bc352a 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/ThrottlingAllocationTests.java @@ -283,7 +283,7 @@ public class ThrottlingAllocationTests extends ESAllocationTestCase { assertEquals(clusterState.getRoutingNodes().getOutgoingRecoveries("node2"), 0); assertEquals(clusterState.getRoutingNodes().getOutgoingRecoveries("node3"), 0); - RoutingAllocation.Result reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node1").iterator().next().shardId().id(), "node1", "node2"))); + RoutingAllocation.Result reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node1").iterator().next().shardId().id(), "node1", "node2")), false, false); assertEquals(reroute.explanations().explanations().size(), 1); assertEquals(reroute.explanations().explanations().get(0).decisions().type(), Decision.Type.YES); routingTable = reroute.routingTable(); @@ -296,7 +296,7 @@ public class ThrottlingAllocationTests extends ESAllocationTestCase { assertEquals(clusterState.getRoutingNodes().getOutgoingRecoveries("node3"), 0); // outgoing throttles - reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node3").iterator().next().shardId().id(), "node3", "node1")), true); + reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node3").iterator().next().shardId().id(), "node3", "node1")), true, false); assertEquals(reroute.explanations().explanations().size(), 1); assertEquals(reroute.explanations().explanations().get(0).decisions().type(), Decision.Type.THROTTLE); assertEquals(clusterState.getRoutingNodes().getIncomingRecoveries("node1"), 0); @@ -311,7 +311,7 @@ public class ThrottlingAllocationTests extends ESAllocationTestCase { assertThat(routingTable.shardsWithState(UNASSIGNED).size(), equalTo(0)); // incoming throttles - reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node3").iterator().next().shardId().id(), "node3", "node2")), true); + reroute = strategy.reroute(clusterState, new AllocationCommands(new MoveAllocationCommand("test", clusterState.getRoutingNodes().node("node3").iterator().next().shardId().id(), "node3", "node2")), true, false); assertEquals(reroute.explanations().explanations().size(), 1); assertEquals(reroute.explanations().explanations().get(0).decisions().type(), Decision.Type.THROTTLE); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java index 2c4e86ad4b1..579e87150a7 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java @@ -796,7 +796,7 @@ public class DiskThresholdDeciderTests extends ESAllocationTestCase { AllocationCommand relocate1 = new MoveAllocationCommand("test", 0, "node2", "node3"); AllocationCommands cmds = new AllocationCommands(relocate1); - routingTable = strategy.reroute(clusterState, cmds).routingTable(); + routingTable = strategy.reroute(clusterState, cmds, false, false).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); logShardStates(clusterState); @@ -808,7 +808,7 @@ public class DiskThresholdDeciderTests extends ESAllocationTestCase { // node3, which will put it over the low watermark when it // completes, with shard relocations taken into account this should // throw an exception about not being able to complete - strategy.reroute(clusterState, cmds).routingTable(); + strategy.reroute(clusterState, cmds, false, false).routingTable(); fail("should not have been able to reroute the shard"); } catch (IllegalArgumentException e) { assertThat("can't allocated because there isn't enough room: " + e.getMessage(), @@ -876,7 +876,7 @@ public class DiskThresholdDeciderTests extends ESAllocationTestCase { ); ClusterState clusterState = ClusterState.builder(baseClusterState).routingTable(builder.build()).build(); RoutingAllocation routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, - System.nanoTime()); + System.nanoTime(), false); Decision decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation); assertThat(decision.type(), equalTo(Decision.Type.NO)); @@ -896,7 +896,8 @@ public class DiskThresholdDeciderTests extends ESAllocationTestCase { ) ); clusterState = ClusterState.builder(baseClusterState).routingTable(builder.build()).build(); - routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime()); + routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime(), + false); decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation); assertThat(decision.type(), equalTo(Decision.Type.YES)); @@ -992,7 +993,7 @@ public class DiskThresholdDeciderTests extends ESAllocationTestCase { ); ClusterState clusterState = ClusterState.builder(baseClusterState).routingTable(builder.build()).build(); RoutingAllocation routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, - System.nanoTime()); + System.nanoTime(), false); Decision decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation); // Two shards should start happily @@ -1051,7 +1052,8 @@ public class DiskThresholdDeciderTests extends ESAllocationTestCase { ); clusterState = ClusterState.builder(updateClusterState).routingTable(builder.build()).build(); - routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime()); + routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime(), + false); decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation); assertThat(decision.type(), equalTo(Decision.Type.YES)); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java index d9e9fb95445..008884cbb8d 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java @@ -136,7 +136,7 @@ public class DiskThresholdDeciderUnitTests extends ESTestCase { ImmutableOpenMap.Builder shardSizes = ImmutableOpenMap.builder(); shardSizes.put("[test][0][p]", 10L); // 10 bytes final ClusterInfo clusterInfo = new ClusterInfo(leastAvailableUsages.build(), mostAvailableUsage.build(), shardSizes.build(), ImmutableOpenMap.of()); - RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Settings.EMPTY, new AllocationDecider[]{decider}), clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime()); + RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Settings.EMPTY, new AllocationDecider[]{decider}), clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime(), false); assertEquals(mostAvailableUsage.toString(), Decision.YES, decider.canAllocate(test_0, new RoutingNode("node_0", node_0), allocation)); assertEquals(mostAvailableUsage.toString(), Decision.NO, decider.canAllocate(test_0, new RoutingNode("node_1", node_1), allocation)); } @@ -204,7 +204,7 @@ public class DiskThresholdDeciderUnitTests extends ESTestCase { shardSizes.put("[test][2][p]", 10L); final ClusterInfo clusterInfo = new ClusterInfo(leastAvailableUsages.build(), mostAvailableUsage.build(), shardSizes.build(), shardRoutingMap.build()); - RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Settings.EMPTY, new AllocationDecider[]{decider}), clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime()); + RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Settings.EMPTY, new AllocationDecider[]{decider}), clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime(), false); assertEquals(Decision.YES, decider.canRemain(test_0, new RoutingNode("node_0", node_0), allocation)); assertEquals(Decision.NO, decider.canRemain(test_1, new RoutingNode("node_1", node_1), allocation)); try { diff --git a/core/src/test/java/org/elasticsearch/gateway/PrimaryShardAllocatorTests.java b/core/src/test/java/org/elasticsearch/gateway/PrimaryShardAllocatorTests.java index 01e717e6fa9..ee1cf7280e7 100644 --- a/core/src/test/java/org/elasticsearch/gateway/PrimaryShardAllocatorTests.java +++ b/core/src/test/java/org/elasticsearch/gateway/PrimaryShardAllocatorTests.java @@ -346,7 +346,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase { .metaData(metaData) .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - return new RoutingAllocation(allocationDeciders, new RoutingNodes(state, false), state, null, System.nanoTime()); + return new RoutingAllocation(allocationDeciders, new RoutingNodes(state, false), state, null, System.nanoTime(), false); } /** @@ -425,7 +425,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase { .metaData(metaData) .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - return new RoutingAllocation(allocationDeciders, new RoutingNodes(state, false), state, null, System.nanoTime()); + return new RoutingAllocation(allocationDeciders, new RoutingNodes(state, false), state, null, System.nanoTime(), false); } /** @@ -444,7 +444,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase { .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - RoutingAllocation allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + RoutingAllocation allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); boolean changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(false)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1)); @@ -452,7 +452,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase { assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas testAllocator.addData(node1, 1, null, randomBoolean()); - allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(false)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1)); @@ -460,7 +460,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase { assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas testAllocator.addData(node2, 1, null, randomBoolean()); - allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(true)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(0)); @@ -485,7 +485,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase { .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - RoutingAllocation allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + RoutingAllocation allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); boolean changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(false)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1)); @@ -493,7 +493,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase { assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas testAllocator.addData(node1, 1, null, randomBoolean()); - allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(false)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1)); @@ -501,7 +501,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase { assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(2)); // replicas testAllocator.addData(node2, 2, null, randomBoolean()); - allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime()); + allocation = new RoutingAllocation(yesAllocationDeciders(), new RoutingNodes(state, false), state, null, System.nanoTime(), false); changed = testAllocator.allocateUnassigned(allocation); assertThat(changed, equalTo(true)); assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(0)); @@ -525,7 +525,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase { .metaData(metaData) .routingTable(routingTableBuilder.build()) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, null, System.nanoTime()); + return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, null, System.nanoTime(), false); } class TestAllocator extends PrimaryShardAllocator { diff --git a/core/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorTests.java b/core/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorTests.java index 672c9de3d3e..20eb6286813 100644 --- a/core/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorTests.java +++ b/core/src/test/java/org/elasticsearch/gateway/ReplicaShardAllocatorTests.java @@ -302,7 +302,7 @@ public class ReplicaShardAllocatorTests extends ESAllocationTestCase { .metaData(metaData) .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, ClusterInfo.EMPTY, System.nanoTime()); + return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, ClusterInfo.EMPTY, System.nanoTime(), false); } private RoutingAllocation onePrimaryOnNode1And1ReplicaRecovering(AllocationDeciders deciders) { @@ -324,7 +324,7 @@ public class ReplicaShardAllocatorTests extends ESAllocationTestCase { .metaData(metaData) .routingTable(routingTable) .nodes(DiscoveryNodes.builder().put(node1).put(node2).put(node3)).build(); - return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, ClusterInfo.EMPTY, System.nanoTime()); + return new RoutingAllocation(deciders, new RoutingNodes(state, false), state, ClusterInfo.EMPTY, System.nanoTime(), false); } class TestAllocator extends ReplicaShardAllocator { diff --git a/core/src/test/java/org/elasticsearch/indices/state/RareClusterStateIT.java b/core/src/test/java/org/elasticsearch/indices/state/RareClusterStateIT.java index dc533737886..81c50cc4f9c 100644 --- a/core/src/test/java/org/elasticsearch/indices/state/RareClusterStateIT.java +++ b/core/src/test/java/org/elasticsearch/indices/state/RareClusterStateIT.java @@ -103,7 +103,7 @@ public class RareClusterStateIT extends ESIntegTestCase { .nodes(DiscoveryNodes.EMPTY_NODES) .build(), false ); - RoutingAllocation routingAllocation = new RoutingAllocation(allocationDeciders, routingNodes, current, ClusterInfo.EMPTY, System.nanoTime()); + RoutingAllocation routingAllocation = new RoutingAllocation(allocationDeciders, routingNodes, current, ClusterInfo.EMPTY, System.nanoTime(), false); allocator.allocateUnassigned(routingAllocation); } diff --git a/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java b/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java index 5ed63b519a4..7ca30132a4a 100644 --- a/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java +++ b/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java @@ -640,7 +640,7 @@ public class SharedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCas assertAcked(client.admin().cluster().preparePutRepository("test-repo") .setType("fs").setSettings(Settings.builder().put("location", repositoryLocation))); - createIndex("test-idx"); + prepareCreate("test-idx").setSettings(Settings.builder().put("index.allocation.max_retries", Integer.MAX_VALUE)).get(); ensureGreen(); logger.info("--> indexing some data"); diff --git a/docs/reference/cluster/reroute.asciidoc b/docs/reference/cluster/reroute.asciidoc index 99e754df529..bb48a00fbe5 100644 --- a/docs/reference/cluster/reroute.asciidoc +++ b/docs/reference/cluster/reroute.asciidoc @@ -103,3 +103,16 @@ are available: To ensure that these implications are well-understood, this command requires the special field `accept_data_loss` to be explicitly set to `true` for it to work. + +[float] +=== Retry failed shards + +The cluster will attempt to allocate a shard a maximum of +`index.allocation.max_retries` times in a row (defaults to `5`), before giving +up and leaving the shard unallocated. This scenario can be caused by +structural problems such as having an analyzer which refers to a stopwords +file which doesn't exist on all nodes. + +Once the problem has been corrected, allocation can be manually retried by +calling the <> API with `?retry_failed`, which +will attempt a single retry round for these shards. \ No newline at end of file diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.reroute.json b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.reroute.json index 2ae42c089d3..8bb85ca087a 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.reroute.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.reroute.json @@ -16,6 +16,10 @@ "type" : "boolean", "description" : "Return an explanation of why the commands can or cannot be executed" }, + "retry_failed": { + "type" : "boolean", + "description" : "Retries allocation of shards that are blocked due to too many subsequent allocation failures" + }, "metric": { "type": "list", "options": ["_all", "blocks", "metadata", "nodes", "routing_table", "master_node", "version"],