From 4e0d65453fefd584379f119b0b460bed6991fab6 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Mon, 15 Jun 2015 04:12:05 +0200 Subject: [PATCH] Add Unassigned meta data Unassigned meta includes additional information as to why a shard is unassigned, this is especially handy when a shard moves to unassigned due to node leaving or shard failure. The additional data is provided as part of the cluster state, and as part of `_cat/shards` API. The additional meta includes the timestamp that the shard has moved to unassigned, allowing us in the future to build functionality such as delay allocation due to node leaving until a copy of the shard is found. closes #11653 --- .../action/shard/ShardStateAction.java | 5 +- .../metadata/MetaDataIndexStateService.java | 2 +- .../routing/ImmutableShardRouting.java | 39 ++- .../cluster/routing/IndexRoutingTable.java | 35 ++- .../cluster/routing/MutableShardRouting.java | 4 +- .../cluster/routing/RoutingNodes.java | 4 +- .../cluster/routing/RoutingTable.java | 18 ++ .../cluster/routing/ShardRouting.java | 6 + .../cluster/routing/UnassignedInfo.java | 167 ++++++++++++ .../routing/allocation/AllocationService.java | 19 +- .../allocation/FailedRerouteAllocation.java | 20 +- .../command/CancelAllocationCommand.java | 10 +- .../gateway/GatewayAllocator.java | 6 +- .../gateway/LocalAllocateDangledIndices.java | 2 +- .../rest/action/cat/RestShardsAction.java | 19 ++ .../replication/ShardReplicationTests.java | 10 +- .../cluster/routing/UnassignedInfoTests.java | 256 ++++++++++++++++++ .../allocation/FailedShardsRoutingTests.java | 8 +- .../indices/store/IndicesStoreTests.java | 7 +- rest-api-spec/test/cat.shards/10_basic.yaml | 4 + 20 files changed, 593 insertions(+), 48 deletions(-) create mode 100644 core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java create mode 100644 core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java diff --git a/core/src/main/java/org/elasticsearch/cluster/action/shard/ShardStateAction.java b/core/src/main/java/org/elasticsearch/cluster/action/shard/ShardStateAction.java index bdea92d1f6f..caad29932bb 100644 --- a/core/src/main/java/org/elasticsearch/cluster/action/shard/ShardStateAction.java +++ b/core/src/main/java/org/elasticsearch/cluster/action/shard/ShardStateAction.java @@ -28,6 +28,7 @@ import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.routing.*; import org.elasticsearch.cluster.routing.allocation.AllocationService; +import org.elasticsearch.cluster.routing.allocation.FailedRerouteAllocation; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.common.Priority; import org.elasticsearch.common.component.AbstractComponent; @@ -146,7 +147,7 @@ public class ShardStateAction extends AbstractComponent { MetaData metaData = currentState.getMetaData(); - List shardRoutingsToBeApplied = new ArrayList<>(shardRoutingEntries.size()); + List shardRoutingsToBeApplied = new ArrayList<>(shardRoutingEntries.size()); for (int i = 0; i < shardRoutingEntries.size(); i++) { ShardRoutingEntry shardRoutingEntry = shardRoutingEntries.get(i); shardRoutingEntry.processed = true; @@ -163,7 +164,7 @@ public class ShardStateAction extends AbstractComponent { } logger.debug("{} will apply shard failed {}", shardRouting.shardId(), shardRoutingEntry); - shardRoutingsToBeApplied.add(shardRouting); + shardRoutingsToBeApplied.add(new FailedRerouteAllocation.FailedShard(shardRouting, shardRoutingEntry.reason)); } RoutingAllocation.Result routingResult = allocationService.applyFailedShards(currentState, shardRoutingsToBeApplied); diff --git a/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexStateService.java b/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexStateService.java index 0486f62b52b..c1113f8e688 100644 --- a/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexStateService.java +++ b/core/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexStateService.java @@ -180,7 +180,7 @@ public class MetaDataIndexStateService extends AbstractComponent { RoutingTable.Builder rtBuilder = RoutingTable.builder(updatedState.routingTable()); for (String index : indicesToOpen) { - rtBuilder.addAsRecovery(updatedState.metaData().index(index)); + rtBuilder.addAsFromCloseToOpen(updatedState.metaData().index(index)); } RoutingAllocation.Result routingResult = allocationService.reroute(ClusterState.builder(updatedState).routingTable(rtBuilder).build()); diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/ImmutableShardRouting.java b/core/src/main/java/org/elasticsearch/cluster/routing/ImmutableShardRouting.java index e177ccd0d73..3695902d6b6 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/ImmutableShardRouting.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/ImmutableShardRouting.java @@ -20,6 +20,8 @@ package org.elasticsearch.cluster.routing; import com.google.common.collect.ImmutableList; +import org.elasticsearch.Version; +import org.elasticsearch.common.Nullable; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Streamable; @@ -53,6 +55,8 @@ public class ImmutableShardRouting implements Streamable, Serializable, ShardRou protected RestoreSource restoreSource; + protected UnassignedInfo unassignedInfo; + private final transient ImmutableList asList; ImmutableShardRouting() { @@ -64,7 +68,7 @@ public class ImmutableShardRouting implements Streamable, Serializable, ShardRou } public ImmutableShardRouting(ShardRouting copy, long version) { - this(copy.index(), copy.id(), copy.currentNodeId(), copy.relocatingNodeId(), copy.restoreSource(), copy.primary(), copy.state(), version); + this(copy.index(), copy.id(), copy.currentNodeId(), copy.relocatingNodeId(), copy.restoreSource(), copy.primary(), copy.state(), version, copy.unassignedInfo()); } public ImmutableShardRouting(String index, int shardId, String currentNodeId, boolean primary, ShardRoutingState state, long version) { @@ -78,6 +82,12 @@ public class ImmutableShardRouting implements Streamable, Serializable, ShardRou public ImmutableShardRouting(String index, int shardId, String currentNodeId, String relocatingNodeId, RestoreSource restoreSource, boolean primary, ShardRoutingState state, long version) { + this(index, shardId, currentNodeId, relocatingNodeId, restoreSource, primary, state, version, null); + } + + public ImmutableShardRouting(String index, int shardId, String currentNodeId, + String relocatingNodeId, RestoreSource restoreSource, boolean primary, ShardRoutingState state, long version, + UnassignedInfo unassignedInfo) { this.index = index; this.shardId = shardId; this.currentNodeId = currentNodeId; @@ -87,6 +97,8 @@ public class ImmutableShardRouting implements Streamable, Serializable, ShardRou this.asList = ImmutableList.of((ShardRouting) this); this.version = version; this.restoreSource = restoreSource; + this.unassignedInfo = unassignedInfo; + assert !(state == ShardRoutingState.UNASSIGNED && unassignedInfo == null) : "unassigned shard must be created with meta"; } @Override @@ -167,6 +179,12 @@ public class ImmutableShardRouting implements Streamable, Serializable, ShardRou return restoreSource; } + @Override + @Nullable + public UnassignedInfo unassignedInfo() { + return unassignedInfo; + } + @Override public boolean primary() { return this.primary; @@ -224,6 +242,11 @@ public class ImmutableShardRouting implements Streamable, Serializable, ShardRou state = ShardRoutingState.fromValue(in.readByte()); restoreSource = RestoreSource.readOptionalRestoreSource(in); + if (in.getVersion().onOrAfter(Version.V_1_7_0)) { + if (in.readBoolean()) { + unassignedInfo = new UnassignedInfo(in); + } + } } @Override @@ -263,6 +286,14 @@ public class ImmutableShardRouting implements Streamable, Serializable, ShardRou } else { out.writeBoolean(false); } + if (out.getVersion().onOrAfter(Version.V_1_7_0)) { + if (unassignedInfo != null) { + out.writeBoolean(true); + unassignedInfo.writeTo(out); + } else { + out.writeBoolean(false); + } + } } @Override @@ -342,6 +373,9 @@ public class ImmutableShardRouting implements Streamable, Serializable, ShardRou sb.append(", restoring[" + restoreSource + "]"); } sb.append(", s[").append(state).append("]"); + if (this.unassignedInfo != null) { + sb.append(", ").append(unassignedInfo.toString()); + } return sb.toString(); } @@ -358,6 +392,9 @@ public class ImmutableShardRouting implements Streamable, Serializable, ShardRou builder.field("restore_source"); restoreSource().toXContent(builder, params); } + if (unassignedInfo != null) { + unassignedInfo.toXContent(builder, params); + } return builder.endObject(); } } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/IndexRoutingTable.java b/core/src/main/java/org/elasticsearch/cluster/routing/IndexRoutingTable.java index 7531e8052a9..3e10808d845 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/IndexRoutingTable.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/IndexRoutingTable.java @@ -37,7 +37,6 @@ import org.elasticsearch.index.shard.ShardId; import java.io.IOException; import java.util.ArrayList; -import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Set; @@ -394,34 +393,48 @@ public class IndexRoutingTable extends AbstractDiffable imple * Initializes a new empty index, as if it was created from an API. */ public Builder initializeAsNew(IndexMetaData indexMetaData) { - return initializeEmpty(indexMetaData, true); + return initializeEmpty(indexMetaData, true, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null)); } /** * Initializes a new empty index, as if it was created from an API. */ public Builder initializeAsRecovery(IndexMetaData indexMetaData) { - return initializeEmpty(indexMetaData, false); + return initializeEmpty(indexMetaData, false, new UnassignedInfo(UnassignedInfo.Reason.CLUSTER_RECOVERED, null)); + } + + /** + * Initializes a new index caused by dangling index imported. + */ + public Builder initializeAsFromDangling(IndexMetaData indexMetaData) { + return initializeEmpty(indexMetaData, false, new UnassignedInfo(UnassignedInfo.Reason.DANGLING_INDEX_IMPORTED, null)); + } + + /** + * Initializes a new empty index, as as a result of opening a closed index. + */ + public Builder initializeAsFromCloseToOpen(IndexMetaData indexMetaData) { + return initializeEmpty(indexMetaData, false, new UnassignedInfo(UnassignedInfo.Reason.INDEX_REOPENED, null)); } /** * Initializes a new empty index, to be restored from a snapshot */ public Builder initializeAsNewRestore(IndexMetaData indexMetaData, RestoreSource restoreSource, IntSet ignoreShards) { - return initializeAsRestore(indexMetaData, restoreSource, ignoreShards, true); + return initializeAsRestore(indexMetaData, restoreSource, ignoreShards, true, new UnassignedInfo(UnassignedInfo.Reason.NEW_INDEX_RESTORED, "restore_source[" + restoreSource.snapshotId().getRepository() + "/" + restoreSource.snapshotId().getSnapshot() + "]")); } /** * Initializes an existing index, to be restored from a snapshot */ public Builder initializeAsRestore(IndexMetaData indexMetaData, RestoreSource restoreSource) { - return initializeAsRestore(indexMetaData, restoreSource, null, false); + return initializeAsRestore(indexMetaData, restoreSource, null, false, new UnassignedInfo(UnassignedInfo.Reason.EXISTING_INDEX_RESTORED, "restore_source[" + restoreSource.snapshotId().getRepository() + "/" + restoreSource.snapshotId().getSnapshot() + "]")); } /** * Initializes an index, to be restored from snapshot */ - private Builder initializeAsRestore(IndexMetaData indexMetaData, RestoreSource restoreSource, IntSet ignoreShards, boolean asNew) { + private Builder initializeAsRestore(IndexMetaData indexMetaData, RestoreSource restoreSource, IntSet ignoreShards, boolean asNew, UnassignedInfo unassignedInfo) { if (!shards.isEmpty()) { throw new IllegalStateException("trying to initialize an index with fresh shards, but already has shards created"); } @@ -430,9 +443,9 @@ public class IndexRoutingTable extends AbstractDiffable imple for (int i = 0; i <= indexMetaData.numberOfReplicas(); i++) { if (asNew && ignoreShards.contains(shardId)) { // This shards wasn't completely snapshotted - restore it as new shard - indexShardRoutingBuilder.addShard(new ImmutableShardRouting(index, shardId, null, i == 0, ShardRoutingState.UNASSIGNED, 0)); + indexShardRoutingBuilder.addShard(new ImmutableShardRouting(index, shardId, null, null, null, i == 0, ShardRoutingState.UNASSIGNED, 0, unassignedInfo)); } else { - indexShardRoutingBuilder.addShard(new ImmutableShardRouting(index, shardId, null, null, i == 0 ? restoreSource : null, i == 0, ShardRoutingState.UNASSIGNED, 0)); + indexShardRoutingBuilder.addShard(new ImmutableShardRouting(index, shardId, null, null, i == 0 ? restoreSource : null, i == 0, ShardRoutingState.UNASSIGNED, 0, unassignedInfo)); } } shards.put(shardId, indexShardRoutingBuilder.build()); @@ -443,14 +456,14 @@ public class IndexRoutingTable extends AbstractDiffable imple /** * Initializes a new empty index, with an option to control if its from an API or not. */ - private Builder initializeEmpty(IndexMetaData indexMetaData, boolean asNew) { + private Builder initializeEmpty(IndexMetaData indexMetaData, boolean asNew, UnassignedInfo unassignedInfo) { if (!shards.isEmpty()) { throw new IllegalStateException("trying to initialize an index with fresh shards, but already has shards created"); } for (int shardId = 0; shardId < indexMetaData.numberOfShards(); shardId++) { IndexShardRoutingTable.Builder indexShardRoutingBuilder = new IndexShardRoutingTable.Builder(new ShardId(indexMetaData.index(), shardId), asNew ? false : true); for (int i = 0; i <= indexMetaData.numberOfReplicas(); i++) { - indexShardRoutingBuilder.addShard(new ImmutableShardRouting(index, shardId, null, i == 0, ShardRoutingState.UNASSIGNED, 0)); + indexShardRoutingBuilder.addShard(new ImmutableShardRouting(index, shardId, null, null, null, i == 0, ShardRoutingState.UNASSIGNED, 0, unassignedInfo)); } shards.put(shardId, indexShardRoutingBuilder.build()); } @@ -461,7 +474,7 @@ public class IndexRoutingTable extends AbstractDiffable imple for (IntCursor cursor : shards.keys()) { int shardId = cursor.value; // version 0, will get updated when reroute will happen - ImmutableShardRouting shard = new ImmutableShardRouting(index, shardId, null, false, ShardRoutingState.UNASSIGNED, 0); + ImmutableShardRouting shard = new ImmutableShardRouting(index, shardId, null, null, null, false, ShardRoutingState.UNASSIGNED, 0, new UnassignedInfo(UnassignedInfo.Reason.REPLICA_ADDED, null)); shards.put(shardId, new IndexShardRoutingTable.Builder(shards.get(shard.id())).addShard(shard).build() ); diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/MutableShardRouting.java b/core/src/main/java/org/elasticsearch/cluster/routing/MutableShardRouting.java index 18d7620697e..e9b3ac0a831 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/MutableShardRouting.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/MutableShardRouting.java @@ -44,12 +44,13 @@ public class MutableShardRouting extends ImmutableShardRouting { /** * Moves the shard to unassigned state. */ - void moveToUnassigned() { + void moveToUnassigned(UnassignedInfo unassignedInfo) { version++; assert state != ShardRoutingState.UNASSIGNED; state = ShardRoutingState.UNASSIGNED; currentNodeId = null; relocatingNodeId = null; + this.unassignedInfo = unassignedInfo; } /** @@ -120,6 +121,7 @@ public class MutableShardRouting extends ImmutableShardRouting { relocatingNodeId = null; restoreSource = null; state = ShardRoutingState.STARTED; + unassignedInfo = null; // we keep the unassigned data until the shard is started } /** diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/RoutingNodes.java b/core/src/main/java/org/elasticsearch/cluster/routing/RoutingNodes.java index 6052e7fa4ee..14c46b2f1d3 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/RoutingNodes.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/RoutingNodes.java @@ -795,10 +795,10 @@ public class RoutingNodes implements Iterable { return iterable.iterator(); } - public void moveToUnassigned() { + public void moveToUnassigned(UnassignedInfo unassignedInfo) { remove(); MutableShardRouting unassigned = new MutableShardRouting(shard); // protective copy of the mutable shard - unassigned.moveToUnassigned(); + unassigned.moveToUnassigned(unassignedInfo); unassigned().add(unassigned); } } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/RoutingTable.java b/core/src/main/java/org/elasticsearch/cluster/routing/RoutingTable.java index a42a33ee0b7..51c5402228d 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/RoutingTable.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/RoutingTable.java @@ -440,6 +440,24 @@ public class RoutingTable implements Iterable, Diffabletrue iff this shard is a primary. */ diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java b/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java new file mode 100644 index 00000000000..b20f1363e30 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java @@ -0,0 +1,167 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.cluster.routing; + +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.joda.FormatDateTimeFormatter; +import org.elasticsearch.common.joda.Joda; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; + +import java.io.IOException; + +/** + * Holds additional information as to why the shard is in unassigned state. + */ +public class UnassignedInfo implements ToXContent, Writeable { + + public static final FormatDateTimeFormatter DATE_TIME_FORMATTER = Joda.forPattern("dateOptionalTime"); + + /** + * Reason why the shard is in unassigned state. + *

+ * Note, ordering of the enum is important, make sure to add new values + * at the end and handle version serialization properly. + */ + public enum Reason { + /** + * Unassigned as a result of an API creation of an index. + */ + INDEX_CREATED, + /** + * Unassigned as a result of a full cluster recovery. + */ + CLUSTER_RECOVERED, + /** + * Unassigned as a result of opening a closed index. + */ + INDEX_REOPENED, + /** + * Unassigned as a result of importing a dangling index. + */ + DANGLING_INDEX_IMPORTED, + /** + * Unassigned as a result of restoring into a new index. + */ + NEW_INDEX_RESTORED, + /** + * Unassigned as a result of restoring into a closed index. + */ + EXISTING_INDEX_RESTORED, + /** + * Unassigned as a result of explicit addition of a replica. + */ + REPLICA_ADDED, + /** + * Unassigned as a result of a failed allocation of the shard. + */ + ALLOCATION_FAILED, + /** + * Unassigned as a result of the node hosting it leaving the cluster. + */ + NODE_LEFT, + /** + * Unassigned as a result of explicit cancel reroute command. + */ + REROUTE_CANCELLED; + } + + private final Reason reason; + private final long timestamp; + private final String details; + + public UnassignedInfo(Reason reason, String details) { + this(reason, System.currentTimeMillis(), details); + } + + private UnassignedInfo(Reason reason, long timestamp, String details) { + this.reason = reason; + this.timestamp = timestamp; + this.details = details; + } + + UnassignedInfo(StreamInput in) throws IOException { + this.reason = Reason.values()[(int) in.readByte()]; + this.timestamp = in.readLong(); + this.details = in.readOptionalString(); + } + + public void writeTo(StreamOutput out) throws IOException { + out.writeByte((byte) reason.ordinal()); + out.writeLong(timestamp); + out.writeOptionalString(details); + } + + public UnassignedInfo readFrom(StreamInput in) throws IOException { + return new UnassignedInfo(in); + } + + /** + * The reason why the shard is unassigned. + */ + public Reason getReason() { + return this.reason; + } + + /** + * The timestamp in milliseconds since epoch. Note, we use timestamp here since + * we want to make sure its preserved across node serializations. Extra care need + * to be made if its used to calculate diff (handle negative values) in case of + * time drift. + */ + public long getTimestampInMillis() { + return this.timestamp; + } + + /** + * Returns optional details explaining the reasons. + */ + @Nullable + public String getDetails() { + return this.details; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("unassigned_info[[reason=").append(reason).append("]"); + sb.append(", at[").append(DATE_TIME_FORMATTER.printer().print(timestamp)).append("]"); + if (details != null) { + sb.append(", details[").append(details).append("]"); + } + sb.append("]"); + return sb.toString(); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject("unassigned_info"); + builder.field("reason", reason); + builder.field("at", DATE_TIME_FORMATTER.printer().print(timestamp)); + if (details != null) { + builder.field("details", details); + } + builder.endObject(); + return builder; + } +} diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java index 30698502de9..6bfb1f52ca0 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationService.java @@ -88,7 +88,7 @@ public class AllocationService extends AbstractComponent { } public RoutingAllocation.Result applyFailedShard(ClusterState clusterState, ShardRouting failedShard) { - return applyFailedShards(clusterState, ImmutableList.of(failedShard)); + return applyFailedShards(clusterState, ImmutableList.of(new FailedRerouteAllocation.FailedShard(failedShard, null))); } /** @@ -96,14 +96,14 @@ public class AllocationService extends AbstractComponent { *

*

If the same instance of the routing table is returned, then no change has been made.

*/ - public RoutingAllocation.Result applyFailedShards(ClusterState clusterState, List failedShards) { + public RoutingAllocation.Result applyFailedShards(ClusterState clusterState, List failedShards) { RoutingNodes routingNodes = clusterState.routingNodes(); // shuffle the unassigned nodes, just so we won't have things like poison failed shards routingNodes.unassigned().shuffle(); FailedRerouteAllocation allocation = new FailedRerouteAllocation(allocationDeciders, routingNodes, clusterState.nodes(), failedShards, clusterInfoService.getClusterInfo()); boolean changed = false; - for (ShardRouting failedShard : failedShards) { - changed |= applyFailedShard(allocation, failedShard, true); + for (FailedRerouteAllocation.FailedShard failedShard : failedShards) { + changed |= applyFailedShard(allocation, failedShard.shard, true, new UnassignedInfo(UnassignedInfo.Reason.ALLOCATION_FAILED, failedShard.details)); } if (!changed) { return new RoutingAllocation.Result(false, clusterState.routingTable()); @@ -288,7 +288,7 @@ public class AllocationService extends AbstractComponent { } } for (ShardRouting shardToFail : shardsToFail) { - changed |= applyFailedShard(allocation, shardToFail, false); + changed |= applyFailedShard(allocation, shardToFail, false, new UnassignedInfo(UnassignedInfo.Reason.ALLOCATION_FAILED, "primary failed while replica initializing")); } // now, go over and elect a new primary if possible, not, from this code block on, if one is elected, @@ -348,8 +348,9 @@ public class AllocationService extends AbstractComponent { } changed = true; // now, go over all the shards routing on the node, and fail them + UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.NODE_LEFT, "node_left[" + node.nodeId() + "]"); for (MutableShardRouting shardRouting : node.copyShards()) { - applyFailedShard(allocation, shardRouting, false); + applyFailedShard(allocation, shardRouting, false, unassignedInfo); } // its a dead node, remove it, note, its important to remove it *after* we apply failed shard // since it relies on the fact that the RoutingNode exists in the list of nodes @@ -410,7 +411,7 @@ public class AllocationService extends AbstractComponent { * Applies the relevant logic to handle a failed shard. Returns true if changes happened that * require relocation. */ - private boolean applyFailedShard(RoutingAllocation allocation, ShardRouting failedShard, boolean addToIgnoreList) { + private boolean applyFailedShard(RoutingAllocation allocation, ShardRouting failedShard, boolean addToIgnoreList, UnassignedInfo unassignedInfo) { // create a copy of the failed shard, since we assume we can change possible references to it without // changing the state of failed shard failedShard = new ImmutableShardRouting(failedShard); @@ -474,7 +475,7 @@ public class AllocationService extends AbstractComponent { // make sure we ignore this shard on the relevant node allocation.addIgnoreShardForNode(failedShard.shardId(), failedShard.currentNodeId()); } - relocatingFromNode.moveToUnassigned(); + relocatingFromNode.moveToUnassigned(unassignedInfo); break; } } @@ -525,7 +526,7 @@ public class AllocationService extends AbstractComponent { routingNodes.unassigned().addAll(shardsToMove); } - node.moveToUnassigned(); + node.moveToUnassigned(unassignedInfo); break; } } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/FailedRerouteAllocation.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/FailedRerouteAllocation.java index 721514fe976..305768c8d28 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/FailedRerouteAllocation.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/FailedRerouteAllocation.java @@ -33,14 +33,28 @@ import java.util.List; */ public class FailedRerouteAllocation extends RoutingAllocation { - private final List failedShards; + /** + * A failed shard with the shard routing itself and an optional + * details on why it failed. + */ + public static class FailedShard { + public final ShardRouting shard; + public final String details; - public FailedRerouteAllocation(AllocationDeciders deciders, RoutingNodes routingNodes, DiscoveryNodes nodes, List failedShards, ClusterInfo clusterInfo) { + public FailedShard(ShardRouting shard, String details) { + this.shard = shard; + this.details = details; + } + } + + private final List failedShards; + + public FailedRerouteAllocation(AllocationDeciders deciders, RoutingNodes routingNodes, DiscoveryNodes nodes, List failedShards, ClusterInfo clusterInfo) { super(deciders, routingNodes, nodes, clusterInfo); this.failedShards = failedShards; } - public List failedShards() { + public List failedShards() { return failedShards; } } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/CancelAllocationCommand.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/CancelAllocationCommand.java index ecfad2c7923..a87ffea5643 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/CancelAllocationCommand.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/command/CancelAllocationCommand.java @@ -19,13 +19,9 @@ package org.elasticsearch.cluster.routing.allocation.command; -import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.cluster.node.DiscoveryNode; -import org.elasticsearch.cluster.routing.MutableShardRouting; -import org.elasticsearch.cluster.routing.RoutingNode; -import org.elasticsearch.cluster.routing.RoutingNodes; -import org.elasticsearch.cluster.routing.ShardRoutingState; +import org.elasticsearch.cluster.routing.*; import org.elasticsearch.cluster.routing.allocation.RerouteExplanation; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.cluster.routing.allocation.decider.Decision; @@ -199,7 +195,7 @@ public class CancelAllocationCommand implements AllocationCommand { throw new IllegalArgumentException("[cancel_allocation] can't cancel " + shardId + " on node " + discoNode + ", shard is primary and initializing its state"); } - it.moveToUnassigned(); + it.moveToUnassigned(new UnassignedInfo(UnassignedInfo.Reason.REROUTE_CANCELLED, null)); // now, go and find the shard that is initializing on the target node, and cancel it as well... RoutingNodes.RoutingNodeIterator initializingNode = allocation.routingNodes().routingNodeIter(shardRouting.relocatingNodeId()); if (initializingNode != null) { @@ -222,7 +218,7 @@ public class CancelAllocationCommand implements AllocationCommand { throw new IllegalArgumentException("[cancel_allocation] can't cancel " + shardId + " on node " + discoNode + ", shard is primary and started"); } - it.moveToUnassigned(); + it.moveToUnassigned(new UnassignedInfo(UnassignedInfo.Reason.REROUTE_CANCELLED, null)); } } if (!found) { diff --git a/core/src/main/java/org/elasticsearch/gateway/GatewayAllocator.java b/core/src/main/java/org/elasticsearch/gateway/GatewayAllocator.java index f59e271c04f..6e5f6ab23f0 100644 --- a/core/src/main/java/org/elasticsearch/gateway/GatewayAllocator.java +++ b/core/src/main/java/org/elasticsearch/gateway/GatewayAllocator.java @@ -130,9 +130,9 @@ public class GatewayAllocator extends AbstractComponent { } public void applyFailedShards(FailedRerouteAllocation allocation) { - for (ShardRouting shard : allocation.failedShards()) { - Releasables.close(asyncFetchStarted.remove(shard.shardId())); - Releasables.close(asyncFetchStore.remove(shard.shardId())); + for (FailedRerouteAllocation.FailedShard shard : allocation.failedShards()) { + Releasables.close(asyncFetchStarted.remove(shard.shard.shardId())); + Releasables.close(asyncFetchStore.remove(shard.shard.shardId())); } } diff --git a/core/src/main/java/org/elasticsearch/gateway/LocalAllocateDangledIndices.java b/core/src/main/java/org/elasticsearch/gateway/LocalAllocateDangledIndices.java index fb328823a83..80ecee29eb7 100644 --- a/core/src/main/java/org/elasticsearch/gateway/LocalAllocateDangledIndices.java +++ b/core/src/main/java/org/elasticsearch/gateway/LocalAllocateDangledIndices.java @@ -149,7 +149,7 @@ public class LocalAllocateDangledIndices extends AbstractComponent { metaData.put(upgradedIndexMetaData, false); blocks.addBlocks(upgradedIndexMetaData); if (upgradedIndexMetaData.getState() == IndexMetaData.State.OPEN) { - routingTableBuilder.addAsRecovery(upgradedIndexMetaData); + routingTableBuilder.addAsFromDangling(upgradedIndexMetaData); } sb.append("[").append(upgradedIndexMetaData.index()).append("/").append(upgradedIndexMetaData.state()).append("]"); } diff --git a/core/src/main/java/org/elasticsearch/rest/action/cat/RestShardsAction.java b/core/src/main/java/org/elasticsearch/rest/action/cat/RestShardsAction.java index 14703b1694d..8b37fd2c620 100644 --- a/core/src/main/java/org/elasticsearch/rest/action/cat/RestShardsAction.java +++ b/core/src/main/java/org/elasticsearch/rest/action/cat/RestShardsAction.java @@ -27,10 +27,12 @@ import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse; import org.elasticsearch.client.Client; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.UnassignedInfo; import org.elasticsearch.common.Strings; import org.elasticsearch.common.Table; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.rest.*; import org.elasticsearch.rest.action.support.RestActionListener; import org.elasticsearch.rest.action.support.RestResponseListener; @@ -89,6 +91,11 @@ public class RestShardsAction extends AbstractCatAction { .addCell("id", "default:false;desc:unique id of node where it lives") .addCell("node", "default:true;alias:n;desc:name of node where it lives"); + table.addCell("unassigned.reason", "alias:ur;default:false;desc:reason shard is unassigned"); + table.addCell("unassigned.at", "alias:ua;default:false;desc:time shard became unassigned (UTC)"); + table.addCell("unassigned.for", "alias:uf;default:false;text-align:right;desc:time has been unassigned"); + table.addCell("unassigned.details", "alias:ud;default:false;desc:additional details as to why the shard became unassigned"); + table.addCell("completion.size", "alias:cs,completionSize;default:false;text-align:right;desc:size of completion"); table.addCell("fielddata.memory_size", "alias:fm,fielddataMemory;default:false;text-align:right;desc:used fielddata cache"); @@ -208,6 +215,18 @@ public class RestShardsAction extends AbstractCatAction { table.addCell(null); } + if (shard.unassignedInfo() != null) { + table.addCell(shard.unassignedInfo().getReason()); + table.addCell(UnassignedInfo.DATE_TIME_FORMATTER.printer().print(shard.unassignedInfo().getTimestampInMillis())); + table.addCell(TimeValue.timeValueMillis(System.currentTimeMillis() - shard.unassignedInfo().getTimestampInMillis())); + table.addCell(shard.unassignedInfo().getDetails()); + } else { + table.addCell(null); + table.addCell(null); + table.addCell(null); + table.addCell(null); + } + table.addCell(shardStats == null ? null : shardStats.getCompletion().getSize()); table.addCell(shardStats == null ? null : shardStats.getFieldData().getMemorySize()); diff --git a/core/src/test/java/org/elasticsearch/action/support/replication/ShardReplicationTests.java b/core/src/test/java/org/elasticsearch/action/support/replication/ShardReplicationTests.java index 432e21248fa..0032742b8f3 100644 --- a/core/src/test/java/org/elasticsearch/action/support/replication/ShardReplicationTests.java +++ b/core/src/test/java/org/elasticsearch/action/support/replication/ShardReplicationTests.java @@ -211,6 +211,7 @@ public class ShardReplicationTests extends ElasticsearchTestCase { String primaryNode = null; String relocatingNode = null; + UnassignedInfo unassignedInfo = null; if (primaryState != ShardRoutingState.UNASSIGNED) { if (primaryLocal) { primaryNode = newNode(0).id(); @@ -221,21 +222,26 @@ public class ShardReplicationTests extends ElasticsearchTestCase { if (primaryState == ShardRoutingState.RELOCATING) { relocatingNode = selectAndRemove(unassignedNodes); } + } else { + unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null); } - indexShardRoutingBuilder.addShard(new ImmutableShardRouting(index, 0, primaryNode, relocatingNode, true, primaryState, 0)); + indexShardRoutingBuilder.addShard(new ImmutableShardRouting(index, 0, primaryNode, relocatingNode, null, true, primaryState, 0, unassignedInfo)); for (ShardRoutingState replicaState : replicaStates) { String replicaNode = null; relocatingNode = null; + unassignedInfo = null; if (replicaState != ShardRoutingState.UNASSIGNED) { assert primaryNode != null : "a replica is assigned but the primary isn't"; replicaNode = selectAndRemove(unassignedNodes); if (replicaState == ShardRoutingState.RELOCATING) { relocatingNode = selectAndRemove(unassignedNodes); } + } else { + unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null); } indexShardRoutingBuilder.addShard( - new ImmutableShardRouting(index, shardId.id(), replicaNode, relocatingNode, false, replicaState, 0)); + new ImmutableShardRouting(index, shardId.id(), replicaNode, relocatingNode, null, false, replicaState, 0, unassignedInfo)); } ClusterState.Builder state = ClusterState.builder(new ClusterName("test")); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java new file mode 100644 index 00000000000..ede68df61ed --- /dev/null +++ b/core/src/test/java/org/elasticsearch/cluster/routing/UnassignedInfoTests.java @@ -0,0 +1,256 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.cluster.routing; + +import com.carrotsearch.hppc.IntHashSet; +import com.carrotsearch.randomizedtesting.generators.RandomPicks; +import com.google.common.collect.ImmutableList; +import org.elasticsearch.Version; +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.cluster.metadata.SnapshotId; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.cluster.routing.allocation.AllocationService; +import org.elasticsearch.cluster.routing.allocation.FailedRerouteAllocation; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.test.ElasticsearchAllocationTestCase; +import org.junit.Test; + +import static org.elasticsearch.cluster.routing.ShardRoutingState.*; +import static org.hamcrest.Matchers.*; + +/** + */ +public class UnassignedInfoTests extends ElasticsearchAllocationTestCase { + + @Test + public void testReasonOrdinalOrder() { + UnassignedInfo.Reason[] order = new UnassignedInfo.Reason[]{ + UnassignedInfo.Reason.INDEX_CREATED, + UnassignedInfo.Reason.CLUSTER_RECOVERED, + UnassignedInfo.Reason.INDEX_REOPENED, + UnassignedInfo.Reason.DANGLING_INDEX_IMPORTED, + UnassignedInfo.Reason.NEW_INDEX_RESTORED, + UnassignedInfo.Reason.EXISTING_INDEX_RESTORED, + UnassignedInfo.Reason.REPLICA_ADDED, + UnassignedInfo.Reason.ALLOCATION_FAILED, + UnassignedInfo.Reason.NODE_LEFT, + UnassignedInfo.Reason.REROUTE_CANCELLED}; + for (int i = 0; i < order.length; i++) { + assertThat(order[i].ordinal(), equalTo(i)); + } + assertThat(UnassignedInfo.Reason.values().length, equalTo(order.length)); + } + + @Test + public void testSerialization() throws Exception { + UnassignedInfo meta = new UnassignedInfo(RandomPicks.randomFrom(getRandom(), UnassignedInfo.Reason.values()), randomBoolean() ? randomAsciiOfLength(4) : null); + BytesStreamOutput out = new BytesStreamOutput(); + meta.writeTo(out); + out.close(); + + UnassignedInfo read = new UnassignedInfo(StreamInput.wrap(out.bytes())); + assertThat(read.getReason(), equalTo(meta.getReason())); + assertThat(read.getTimestampInMillis(), equalTo(meta.getTimestampInMillis())); + assertThat(read.getDetails(), equalTo(meta.getDetails())); + } + + @Test + public void testIndexCreated() { + MetaData metaData = MetaData.builder() + .put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(randomIntBetween(1, 3)).numberOfReplicas(randomIntBetween(0, 3))) + .build(); + ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT) + .metaData(metaData) + .routingTable(RoutingTable.builder().addAsNew(metaData.index("test"))).build(); + for (MutableShardRouting shard : clusterState.routingNodes().shardsWithState(UNASSIGNED)) { + assertThat(shard.unassignedInfo().getReason(), equalTo(UnassignedInfo.Reason.INDEX_CREATED)); + } + } + + @Test + public void testClusterRecovered() { + MetaData metaData = MetaData.builder() + .put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(randomIntBetween(1, 3)).numberOfReplicas(randomIntBetween(0, 3))) + .build(); + ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT) + .metaData(metaData) + .routingTable(RoutingTable.builder().addAsRecovery(metaData.index("test"))).build(); + for (MutableShardRouting shard : clusterState.routingNodes().shardsWithState(UNASSIGNED)) { + assertThat(shard.unassignedInfo().getReason(), equalTo(UnassignedInfo.Reason.CLUSTER_RECOVERED)); + } + } + + @Test + public void testIndexReopened() { + MetaData metaData = MetaData.builder() + .put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(randomIntBetween(1, 3)).numberOfReplicas(randomIntBetween(0, 3))) + .build(); + ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT) + .metaData(metaData) + .routingTable(RoutingTable.builder().addAsFromCloseToOpen(metaData.index("test"))).build(); + for (MutableShardRouting shard : clusterState.routingNodes().shardsWithState(UNASSIGNED)) { + assertThat(shard.unassignedInfo().getReason(), equalTo(UnassignedInfo.Reason.INDEX_REOPENED)); + } + } + + @Test + public void testNewIndexRestored() { + MetaData metaData = MetaData.builder() + .put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(randomIntBetween(1, 3)).numberOfReplicas(randomIntBetween(0, 3))) + .build(); + ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT) + .metaData(metaData) + .routingTable(RoutingTable.builder().addAsNewRestore(metaData.index("test"), new RestoreSource(new SnapshotId("rep1", "snp1"), "test"), new IntHashSet())).build(); + for (MutableShardRouting shard : clusterState.routingNodes().shardsWithState(UNASSIGNED)) { + assertThat(shard.unassignedInfo().getReason(), equalTo(UnassignedInfo.Reason.NEW_INDEX_RESTORED)); + } + } + + @Test + public void testExistingIndexRestored() { + MetaData metaData = MetaData.builder() + .put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(randomIntBetween(1, 3)).numberOfReplicas(randomIntBetween(0, 3))) + .build(); + ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT) + .metaData(metaData) + .routingTable(RoutingTable.builder().addAsRestore(metaData.index("test"), new RestoreSource(new SnapshotId("rep1", "snp1"), "test"))).build(); + for (MutableShardRouting shard : clusterState.routingNodes().shardsWithState(UNASSIGNED)) { + assertThat(shard.unassignedInfo().getReason(), equalTo(UnassignedInfo.Reason.EXISTING_INDEX_RESTORED)); + } + } + + @Test + public void testDanglingIndexImported() { + MetaData metaData = MetaData.builder() + .put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(randomIntBetween(1, 3)).numberOfReplicas(randomIntBetween(0, 3))) + .build(); + ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT) + .metaData(metaData) + .routingTable(RoutingTable.builder().addAsFromDangling(metaData.index("test"))).build(); + for (MutableShardRouting shard : clusterState.routingNodes().shardsWithState(UNASSIGNED)) { + assertThat(shard.unassignedInfo().getReason(), equalTo(UnassignedInfo.Reason.DANGLING_INDEX_IMPORTED)); + } + } + + @Test + public void testReplicaAdded() { + AllocationService allocation = createAllocationService(); + MetaData metaData = MetaData.builder() + .put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(0)) + .build(); + ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT) + .metaData(metaData) + .routingTable(RoutingTable.builder().addAsNew(metaData.index("test"))).build(); + clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1"))).build(); + clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build(); + // starting primaries + clusterState = ClusterState.builder(clusterState).routingResult(allocation.applyStartedShards(clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))).build(); + IndexRoutingTable.Builder builder = IndexRoutingTable.builder("test"); + for (IndexShardRoutingTable indexShardRoutingTable : clusterState.routingTable().index("test")) { + builder.addIndexShard(indexShardRoutingTable); + } + builder.addReplica(); + clusterState = ClusterState.builder(clusterState).routingTable(RoutingTable.builder(clusterState.routingTable()).add(builder)).build(); + assertThat(clusterState.routingNodes().shardsWithState(UNASSIGNED).size(), equalTo(1)); + assertThat(clusterState.routingNodes().shardsWithState(UNASSIGNED).get(0).unassignedInfo(), notNullValue()); + assertThat(clusterState.routingNodes().shardsWithState(UNASSIGNED).get(0).unassignedInfo().getReason(), equalTo(UnassignedInfo.Reason.REPLICA_ADDED)); + } + + /** + * The unassigned meta is kept when a shard goes to INITIALIZING, but cleared when it moves to STARTED. + */ + @Test + public void testStateTransitionMetaHandling() { + ImmutableShardRouting shard = new ImmutableShardRouting("test", 1, null, null, null, true, ShardRoutingState.UNASSIGNED, 1, new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null)); + MutableShardRouting mutable = new MutableShardRouting(shard); + assertThat(mutable.unassignedInfo(), notNullValue()); + mutable.assignToNode("test_node"); + assertThat(mutable.state(), equalTo(ShardRoutingState.INITIALIZING)); + assertThat(mutable.unassignedInfo(), notNullValue()); + mutable.moveToStarted(); + assertThat(mutable.state(), equalTo(ShardRoutingState.STARTED)); + assertThat(mutable.unassignedInfo(), nullValue()); + } + + /** + * Tests that during reroute when a node is detected as leaving the cluster, the right unassigned meta is set + */ + @Test + public void testNodeLeave() { + AllocationService allocation = createAllocationService(); + MetaData metaData = MetaData.builder() + .put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1)) + .build(); + ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT) + .metaData(metaData) + .routingTable(RoutingTable.builder().addAsNew(metaData.index("test"))).build(); + clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))).build(); + clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build(); + // starting primaries + clusterState = ClusterState.builder(clusterState).routingResult(allocation.applyStartedShards(clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))).build(); + // starting replicas + clusterState = ClusterState.builder(clusterState).routingResult(allocation.applyStartedShards(clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))).build(); + assertThat(clusterState.routingNodes().hasUnassigned(), equalTo(false)); + // remove node2 and reroute + clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).remove("node2")).build(); + clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build(); + // verify that NODE_LEAVE is the reason for meta + assertThat(clusterState.routingNodes().hasUnassigned(), equalTo(true)); + assertThat(clusterState.routingNodes().shardsWithState(UNASSIGNED).size(), equalTo(1)); + assertThat(clusterState.routingNodes().shardsWithState(UNASSIGNED).get(0).unassignedInfo(), notNullValue()); + assertThat(clusterState.routingNodes().shardsWithState(UNASSIGNED).get(0).unassignedInfo().getReason(), equalTo(UnassignedInfo.Reason.NODE_LEFT)); + assertThat(clusterState.routingNodes().shardsWithState(UNASSIGNED).get(0).unassignedInfo().getTimestampInMillis(), greaterThan(0l)); + } + + /** + * Verifies that when a shard fails, reason is properly set and details are preserved. + */ + @Test + public void testFailedShard() { + AllocationService allocation = createAllocationService(); + MetaData metaData = MetaData.builder() + .put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1)) + .build(); + ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT) + .metaData(metaData) + .routingTable(RoutingTable.builder().addAsNew(metaData.index("test"))).build(); + clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().put(newNode("node1")).put(newNode("node2"))).build(); + clusterState = ClusterState.builder(clusterState).routingResult(allocation.reroute(clusterState)).build(); + // starting primaries + clusterState = ClusterState.builder(clusterState).routingResult(allocation.applyStartedShards(clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))).build(); + // starting replicas + clusterState = ClusterState.builder(clusterState).routingResult(allocation.applyStartedShards(clusterState, clusterState.routingNodes().shardsWithState(INITIALIZING))).build(); + assertThat(clusterState.routingNodes().hasUnassigned(), equalTo(false)); + // fail shard + ShardRouting shardToFail = clusterState.routingNodes().shardsWithState(STARTED).get(0); + clusterState = ClusterState.builder(clusterState).routingResult(allocation.applyFailedShards(clusterState, ImmutableList.of(new FailedRerouteAllocation.FailedShard(shardToFail, "test fail")))).build(); + // verify the reason and details + assertThat(clusterState.routingNodes().hasUnassigned(), equalTo(true)); + assertThat(clusterState.routingNodes().shardsWithState(UNASSIGNED).size(), equalTo(1)); + assertThat(clusterState.routingNodes().shardsWithState(UNASSIGNED).get(0).unassignedInfo(), notNullValue()); + assertThat(clusterState.routingNodes().shardsWithState(UNASSIGNED).get(0).unassignedInfo().getReason(), equalTo(UnassignedInfo.Reason.ALLOCATION_FAILED)); + assertThat(clusterState.routingNodes().shardsWithState(UNASSIGNED).get(0).unassignedInfo().getDetails(), equalTo("test fail")); + assertThat(clusterState.routingNodes().shardsWithState(UNASSIGNED).get(0).unassignedInfo().getTimestampInMillis(), greaterThan(0l)); + } +} diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedShardsRoutingTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedShardsRoutingTests.java index 505de07e16d..ce10fdfa055 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedShardsRoutingTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FailedShardsRoutingTests.java @@ -312,21 +312,21 @@ public class FailedShardsRoutingTests extends ElasticsearchAllocationTestCase { } int shardsToFail = randomIntBetween(1, numberOfReplicas); - ArrayList failedShards = new ArrayList<>(); + ArrayList failedShards = new ArrayList<>(); RoutingNodes routingNodes = clusterState.routingNodes(); for (int i = 0; i < shardsToFail; i++) { String n = "node" + Integer.toString(randomInt(numberOfReplicas)); logger.info("failing shard on node [{}]", n); ShardRouting shardToFail = routingNodes.node(n).get(0); - failedShards.add(new MutableShardRouting(shardToFail)); + failedShards.add(new FailedRerouteAllocation.FailedShard(new MutableShardRouting(shardToFail), null)); } routingTable = strategy.applyFailedShards(clusterState, failedShards).routingTable(); clusterState = ClusterState.builder(clusterState).routingTable(routingTable).build(); routingNodes = clusterState.routingNodes(); - for (ShardRouting failedShard : failedShards) { - if (!routingNodes.node(failedShard.currentNodeId()).isEmpty()) { + for (FailedRerouteAllocation.FailedShard failedShard : failedShards) { + if (!routingNodes.node(failedShard.shard.currentNodeId()).isEmpty()) { fail("shard " + failedShard + " was re-assigned to it's node"); } } diff --git a/core/src/test/java/org/elasticsearch/indices/store/IndicesStoreTests.java b/core/src/test/java/org/elasticsearch/indices/store/IndicesStoreTests.java index eadee06e2e2..bac1467fc07 100644 --- a/core/src/test/java/org/elasticsearch/indices/store/IndicesStoreTests.java +++ b/core/src/test/java/org/elasticsearch/indices/store/IndicesStoreTests.java @@ -29,6 +29,7 @@ import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.routing.ImmutableShardRouting; import org.elasticsearch.cluster.routing.IndexShardRoutingTable; import org.elasticsearch.cluster.routing.ShardRoutingState; +import org.elasticsearch.cluster.routing.UnassignedInfo; import org.elasticsearch.common.transport.LocalTransportAddress; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.test.ElasticsearchTestCase; @@ -95,7 +96,11 @@ public class IndicesStoreTests extends ElasticsearchTestCase { } else { state = randomFrom(ShardRoutingState.values()); } - routingTable.addShard(new ImmutableShardRouting("test", i, "xyz", null, j == 0, state, 0)); + UnassignedInfo unassignedInfo = null; + if (state == ShardRoutingState.UNASSIGNED) { + unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null); + } + routingTable.addShard(new ImmutableShardRouting("test", i, "xyz", null, null, j == 0, state, 0, unassignedInfo)); } } assertFalse(indicesStore.shardCanBeDeleted(clusterState.build(), routingTable.build())); diff --git a/rest-api-spec/test/cat.shards/10_basic.yaml b/rest-api-spec/test/cat.shards/10_basic.yaml index 5a0d8688449..a8f0b9bb6ac 100755 --- a/rest-api-spec/test/cat.shards/10_basic.yaml +++ b/rest-api-spec/test/cat.shards/10_basic.yaml @@ -15,6 +15,10 @@ ip .+ \n id .+ \n node .+ \n + unassigned.reason .+ \n + unassigned.at .+ \n + unassigned.for .+ \n + unassigned.details .+ \n completion.size .+ \n fielddata.memory_size .+ \n fielddata.evictions .+ \n