Allows failing shards without marking as stale (#28054)

Currently when failing a shard we also mark it as stale (eg. remove its
allocationId from from the InSync set). However in some cases, we need 
to be able to fail shards but keep them InSync set. This commit adds
such capacity. This is a preparatory change to make the primary-replica
resync less lenient.

Relates #24841
This commit is contained in:
Nhat Nguyen 2018-02-03 09:41:53 -05:00 committed by GitHub
parent 13083e27da
commit 965efa51cc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 331 additions and 185 deletions

View File

@ -387,14 +387,14 @@ public abstract class TransportWriteAction<
logger.warn((org.apache.logging.log4j.util.Supplier<?>) logger.warn((org.apache.logging.log4j.util.Supplier<?>)
() -> new ParameterizedMessage("[{}] {}", replica.shardId(), message), exception); () -> new ParameterizedMessage("[{}] {}", replica.shardId(), message), exception);
shardStateAction.remoteShardFailed(replica.shardId(), replica.allocationId().getId(), primaryTerm, message, exception, shardStateAction.remoteShardFailed(replica.shardId(), replica.allocationId().getId(), primaryTerm, true, message, exception,
createListener(onSuccess, onPrimaryDemoted, onIgnoredFailure)); createListener(onSuccess, onPrimaryDemoted, onIgnoredFailure));
} }
@Override @Override
public void markShardCopyAsStaleIfNeeded(ShardId shardId, String allocationId, Runnable onSuccess, public void markShardCopyAsStaleIfNeeded(ShardId shardId, String allocationId, Runnable onSuccess,
Consumer<Exception> onPrimaryDemoted, Consumer<Exception> onIgnoredFailure) { Consumer<Exception> onPrimaryDemoted, Consumer<Exception> onIgnoredFailure) {
shardStateAction.remoteShardFailed(shardId, allocationId, primaryTerm, "mark copy as stale", null, shardStateAction.remoteShardFailed(shardId, allocationId, primaryTerm, true, "mark copy as stale", null,
createListener(onSuccess, onPrimaryDemoted, onIgnoredFailure)); createListener(onSuccess, onPrimaryDemoted, onIgnoredFailure));
} }

View File

@ -24,6 +24,7 @@ import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.logging.log4j.util.Supplier; import org.apache.logging.log4j.util.Supplier;
import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.ClusterChangedEvent; import org.elasticsearch.cluster.ClusterChangedEvent;
import org.elasticsearch.cluster.ClusterStateTaskExecutor; import org.elasticsearch.cluster.ClusterStateTaskExecutor;
import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterState;
@ -88,21 +89,21 @@ public class ShardStateAction extends AbstractComponent {
this.clusterService = clusterService; this.clusterService = clusterService;
this.threadPool = threadPool; this.threadPool = threadPool;
transportService.registerRequestHandler(SHARD_STARTED_ACTION_NAME, ShardEntry::new, ThreadPool.Names.SAME, new ShardStartedTransportHandler(clusterService, new ShardStartedClusterStateTaskExecutor(allocationService, logger), logger)); transportService.registerRequestHandler(SHARD_STARTED_ACTION_NAME, ThreadPool.Names.SAME, StartedShardEntry::new, new ShardStartedTransportHandler(clusterService, new ShardStartedClusterStateTaskExecutor(allocationService, logger), logger));
transportService.registerRequestHandler(SHARD_FAILED_ACTION_NAME, ShardEntry::new, ThreadPool.Names.SAME, new ShardFailedTransportHandler(clusterService, new ShardFailedClusterStateTaskExecutor(allocationService, routingService, logger), logger)); transportService.registerRequestHandler(SHARD_FAILED_ACTION_NAME, ThreadPool.Names.SAME, FailedShardEntry::new, new ShardFailedTransportHandler(clusterService, new ShardFailedClusterStateTaskExecutor(allocationService, routingService, logger), logger));
} }
private void sendShardAction(final String actionName, final ClusterState currentState, final ShardEntry shardEntry, final Listener listener) { private void sendShardAction(final String actionName, final ClusterState currentState, final TransportRequest request, final Listener listener) {
ClusterStateObserver observer = new ClusterStateObserver(currentState, clusterService, null, logger, threadPool.getThreadContext()); ClusterStateObserver observer = new ClusterStateObserver(currentState, clusterService, null, logger, threadPool.getThreadContext());
DiscoveryNode masterNode = currentState.nodes().getMasterNode(); DiscoveryNode masterNode = currentState.nodes().getMasterNode();
Predicate<ClusterState> changePredicate = MasterNodeChangePredicate.build(currentState); Predicate<ClusterState> changePredicate = MasterNodeChangePredicate.build(currentState);
if (masterNode == null) { if (masterNode == null) {
logger.warn("{} no master known for action [{}] for shard entry [{}]", shardEntry.shardId, actionName, shardEntry); logger.warn("no master known for action [{}] for shard entry [{}]", actionName, request);
waitForNewMasterAndRetry(actionName, observer, shardEntry, listener, changePredicate); waitForNewMasterAndRetry(actionName, observer, request, listener, changePredicate);
} else { } else {
logger.debug("{} sending [{}] to [{}] for shard entry [{}]", shardEntry.shardId, actionName, masterNode.getId(), shardEntry); logger.debug("sending [{}] to [{}] for shard entry [{}]", actionName, masterNode.getId(), request);
transportService.sendRequest(masterNode, transportService.sendRequest(masterNode,
actionName, shardEntry, new EmptyTransportResponseHandler(ThreadPool.Names.SAME) { actionName, request, new EmptyTransportResponseHandler(ThreadPool.Names.SAME) {
@Override @Override
public void handleResponse(TransportResponse.Empty response) { public void handleResponse(TransportResponse.Empty response) {
listener.onSuccess(); listener.onSuccess();
@ -111,9 +112,9 @@ public class ShardStateAction extends AbstractComponent {
@Override @Override
public void handleException(TransportException exp) { public void handleException(TransportException exp) {
if (isMasterChannelException(exp)) { if (isMasterChannelException(exp)) {
waitForNewMasterAndRetry(actionName, observer, shardEntry, listener, changePredicate); waitForNewMasterAndRetry(actionName, observer, request, listener, changePredicate);
} else { } else {
logger.warn((Supplier<?>) () -> new ParameterizedMessage("{} unexpected failure while sending request [{}] to [{}] for shard entry [{}]", shardEntry.shardId, actionName, masterNode, shardEntry), exp); logger.warn(new ParameterizedMessage("unexpected failure while sending request [{}] to [{}] for shard entry [{}]", actionName, masterNode, request), exp);
listener.onFailure(exp instanceof RemoteTransportException ? (Exception) (exp.getCause() instanceof Exception ? exp.getCause() : new ElasticsearchException(exp.getCause())) : exp); listener.onFailure(exp instanceof RemoteTransportException ? (Exception) (exp.getCause() instanceof Exception ? exp.getCause() : new ElasticsearchException(exp.getCause())) : exp);
} }
} }
@ -139,13 +140,15 @@ public class ShardStateAction extends AbstractComponent {
* @param shardId shard id of the shard to fail * @param shardId shard id of the shard to fail
* @param allocationId allocation id of the shard to fail * @param allocationId allocation id of the shard to fail
* @param primaryTerm the primary term associated with the primary shard that is failing the shard. Must be strictly positive. * @param primaryTerm the primary term associated with the primary shard that is failing the shard. Must be strictly positive.
* @param markAsStale whether or not to mark a failing shard as stale (eg. removing from in-sync set) when failing the shard.
* @param message the reason for the failure * @param message the reason for the failure
* @param failure the underlying cause of the failure * @param failure the underlying cause of the failure
* @param listener callback upon completion of the request * @param listener callback upon completion of the request
*/ */
public void remoteShardFailed(final ShardId shardId, String allocationId, long primaryTerm, final String message, @Nullable final Exception failure, Listener listener) { public void remoteShardFailed(final ShardId shardId, String allocationId, long primaryTerm, boolean markAsStale, final String message, @Nullable final Exception failure, Listener listener) {
assert primaryTerm > 0L : "primary term should be strictly positive"; assert primaryTerm > 0L : "primary term should be strictly positive";
shardFailed(shardId, allocationId, primaryTerm, message, failure, listener, clusterService.state()); FailedShardEntry shardEntry = new FailedShardEntry(shardId, allocationId, primaryTerm, message, failure, markAsStale);
sendShardAction(SHARD_FAILED_ACTION_NAME, clusterService.state(), shardEntry, listener);
} }
/** /**
@ -160,29 +163,24 @@ public class ShardStateAction extends AbstractComponent {
*/ */
public void localShardFailed(final ShardRouting shardRouting, final String message, @Nullable final Exception failure, Listener listener, public void localShardFailed(final ShardRouting shardRouting, final String message, @Nullable final Exception failure, Listener listener,
final ClusterState currentState) { final ClusterState currentState) {
shardFailed(shardRouting.shardId(), shardRouting.allocationId().getId(), 0L, message, failure, listener, currentState); FailedShardEntry shardEntry = new FailedShardEntry(shardRouting.shardId(), shardRouting.allocationId().getId(), 0L, message, failure, true);
}
private void shardFailed(final ShardId shardId, String allocationId, long primaryTerm, final String message,
@Nullable final Exception failure, Listener listener, ClusterState currentState) {
ShardEntry shardEntry = new ShardEntry(shardId, allocationId, primaryTerm, message, failure);
sendShardAction(SHARD_FAILED_ACTION_NAME, currentState, shardEntry, listener); sendShardAction(SHARD_FAILED_ACTION_NAME, currentState, shardEntry, listener);
} }
// visible for testing // visible for testing
protected void waitForNewMasterAndRetry(String actionName, ClusterStateObserver observer, ShardEntry shardEntry, Listener listener, Predicate<ClusterState> changePredicate) { protected void waitForNewMasterAndRetry(String actionName, ClusterStateObserver observer, TransportRequest request, Listener listener, Predicate<ClusterState> changePredicate) {
observer.waitForNextChange(new ClusterStateObserver.Listener() { observer.waitForNextChange(new ClusterStateObserver.Listener() {
@Override @Override
public void onNewClusterState(ClusterState state) { public void onNewClusterState(ClusterState state) {
if (logger.isTraceEnabled()) { if (logger.isTraceEnabled()) {
logger.trace("new cluster state [{}] after waiting for master election to fail shard entry [{}]", state, shardEntry); logger.trace("new cluster state [{}] after waiting for master election for shard entry [{}]", state, request);
} }
sendShardAction(actionName, state, shardEntry, listener); sendShardAction(actionName, state, request, listener);
} }
@Override @Override
public void onClusterServiceClose() { public void onClusterServiceClose() {
logger.warn((Supplier<?>) () -> new ParameterizedMessage("{} node closed while execution action [{}] for shard entry [{}]", shardEntry.shardId, actionName, shardEntry), shardEntry.failure); logger.warn("node closed while execution action [{}] for shard entry [{}]", actionName, request);
listener.onFailure(new NodeClosedException(clusterService.localNode())); listener.onFailure(new NodeClosedException(clusterService.localNode()));
} }
@ -194,7 +192,7 @@ public class ShardStateAction extends AbstractComponent {
}, changePredicate); }, changePredicate);
} }
private static class ShardFailedTransportHandler implements TransportRequestHandler<ShardEntry> { private static class ShardFailedTransportHandler implements TransportRequestHandler<FailedShardEntry> {
private final ClusterService clusterService; private final ClusterService clusterService;
private final ShardFailedClusterStateTaskExecutor shardFailedClusterStateTaskExecutor; private final ShardFailedClusterStateTaskExecutor shardFailedClusterStateTaskExecutor;
private final Logger logger; private final Logger logger;
@ -206,7 +204,7 @@ public class ShardStateAction extends AbstractComponent {
} }
@Override @Override
public void messageReceived(ShardEntry request, TransportChannel channel) throws Exception { public void messageReceived(FailedShardEntry request, TransportChannel channel) throws Exception {
logger.warn((Supplier<?>) () -> new ParameterizedMessage("{} received shard failed for {}", request.shardId, request), request.failure); logger.warn((Supplier<?>) () -> new ParameterizedMessage("{} received shard failed for {}", request.shardId, request), request.failure);
clusterService.submitStateUpdateTask( clusterService.submitStateUpdateTask(
"shard-failed", "shard-failed",
@ -248,7 +246,7 @@ public class ShardStateAction extends AbstractComponent {
} }
} }
public static class ShardFailedClusterStateTaskExecutor implements ClusterStateTaskExecutor<ShardEntry> { public static class ShardFailedClusterStateTaskExecutor implements ClusterStateTaskExecutor<FailedShardEntry> {
private final AllocationService allocationService; private final AllocationService allocationService;
private final RoutingService routingService; private final RoutingService routingService;
private final Logger logger; private final Logger logger;
@ -260,13 +258,13 @@ public class ShardStateAction extends AbstractComponent {
} }
@Override @Override
public ClusterTasksResult<ShardEntry> execute(ClusterState currentState, List<ShardEntry> tasks) throws Exception { public ClusterTasksResult<FailedShardEntry> execute(ClusterState currentState, List<FailedShardEntry> tasks) throws Exception {
ClusterTasksResult.Builder<ShardEntry> batchResultBuilder = ClusterTasksResult.builder(); ClusterTasksResult.Builder<FailedShardEntry> batchResultBuilder = ClusterTasksResult.builder();
List<ShardEntry> tasksToBeApplied = new ArrayList<>(); List<FailedShardEntry> tasksToBeApplied = new ArrayList<>();
List<FailedShard> failedShardsToBeApplied = new ArrayList<>(); List<FailedShard> failedShardsToBeApplied = new ArrayList<>();
List<StaleShard> staleShardsToBeApplied = new ArrayList<>(); List<StaleShard> staleShardsToBeApplied = new ArrayList<>();
for (ShardEntry task : tasks) { for (FailedShardEntry task : tasks) {
IndexMetaData indexMetaData = currentState.metaData().index(task.shardId.getIndex()); IndexMetaData indexMetaData = currentState.metaData().index(task.shardId.getIndex());
if (indexMetaData == null) { if (indexMetaData == null) {
// tasks that correspond to non-existent indices are marked as successful // tasks that correspond to non-existent indices are marked as successful
@ -314,7 +312,7 @@ public class ShardStateAction extends AbstractComponent {
// failing a shard also possibly marks it as stale (see IndexMetaDataUpdater) // failing a shard also possibly marks it as stale (see IndexMetaDataUpdater)
logger.debug("{} failing shard {} (shard failed task: [{}])", task.shardId, matched, task); logger.debug("{} failing shard {} (shard failed task: [{}])", task.shardId, matched, task);
tasksToBeApplied.add(task); tasksToBeApplied.add(task);
failedShardsToBeApplied.add(new FailedShard(matched, task.message, task.failure)); failedShardsToBeApplied.add(new FailedShard(matched, task.message, task.failure, task.markAsStale));
} }
} }
} }
@ -352,15 +350,82 @@ public class ShardStateAction extends AbstractComponent {
} }
} }
public static class FailedShardEntry extends TransportRequest {
final ShardId shardId;
final String allocationId;
final long primaryTerm;
final String message;
final Exception failure;
final boolean markAsStale;
FailedShardEntry(StreamInput in) throws IOException {
super(in);
shardId = ShardId.readShardId(in);
allocationId = in.readString();
primaryTerm = in.readVLong();
message = in.readString();
failure = in.readException();
if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
markAsStale = in.readBoolean();
} else {
markAsStale = true;
}
}
public FailedShardEntry(ShardId shardId, String allocationId, long primaryTerm, String message, Exception failure, boolean markAsStale) {
this.shardId = shardId;
this.allocationId = allocationId;
this.primaryTerm = primaryTerm;
this.message = message;
this.failure = failure;
this.markAsStale = markAsStale;
}
public ShardId getShardId() {
return shardId;
}
public String getAllocationId() {
return allocationId;
}
@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
shardId.writeTo(out);
out.writeString(allocationId);
out.writeVLong(primaryTerm);
out.writeString(message);
out.writeException(failure);
if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
out.writeBoolean(markAsStale);
}
}
@Override
public String toString() {
List<String> components = new ArrayList<>(6);
components.add("shard id [" + shardId + "]");
components.add("allocation id [" + allocationId + "]");
components.add("primary term [" + primaryTerm + "]");
components.add("message [" + message + "]");
if (failure != null) {
components.add("failure [" + ExceptionsHelper.detailedMessage(failure) + "]");
}
components.add("markAsStale [" + markAsStale + "]");
return String.join(", ", components);
}
}
public void shardStarted(final ShardRouting shardRouting, final String message, Listener listener) { public void shardStarted(final ShardRouting shardRouting, final String message, Listener listener) {
shardStarted(shardRouting, message, listener, clusterService.state()); shardStarted(shardRouting, message, listener, clusterService.state());
} }
public void shardStarted(final ShardRouting shardRouting, final String message, Listener listener, ClusterState currentState) { public void shardStarted(final ShardRouting shardRouting, final String message, Listener listener, ClusterState currentState) {
ShardEntry shardEntry = new ShardEntry(shardRouting.shardId(), shardRouting.allocationId().getId(), 0L, message, null); StartedShardEntry shardEntry = new StartedShardEntry(shardRouting.shardId(), shardRouting.allocationId().getId(), message);
sendShardAction(SHARD_STARTED_ACTION_NAME, currentState, shardEntry, listener); sendShardAction(SHARD_STARTED_ACTION_NAME, currentState, shardEntry, listener);
} }
private static class ShardStartedTransportHandler implements TransportRequestHandler<ShardEntry> { private static class ShardStartedTransportHandler implements TransportRequestHandler<StartedShardEntry> {
private final ClusterService clusterService; private final ClusterService clusterService;
private final ShardStartedClusterStateTaskExecutor shardStartedClusterStateTaskExecutor; private final ShardStartedClusterStateTaskExecutor shardStartedClusterStateTaskExecutor;
private final Logger logger; private final Logger logger;
@ -372,7 +437,7 @@ public class ShardStateAction extends AbstractComponent {
} }
@Override @Override
public void messageReceived(ShardEntry request, TransportChannel channel) throws Exception { public void messageReceived(StartedShardEntry request, TransportChannel channel) throws Exception {
logger.debug("{} received shard started for [{}]", request.shardId, request); logger.debug("{} received shard started for [{}]", request.shardId, request);
clusterService.submitStateUpdateTask( clusterService.submitStateUpdateTask(
"shard-started " + request, "shard-started " + request,
@ -384,7 +449,7 @@ public class ShardStateAction extends AbstractComponent {
} }
} }
public static class ShardStartedClusterStateTaskExecutor implements ClusterStateTaskExecutor<ShardEntry>, ClusterStateTaskListener { public static class ShardStartedClusterStateTaskExecutor implements ClusterStateTaskExecutor<StartedShardEntry>, ClusterStateTaskListener {
private final AllocationService allocationService; private final AllocationService allocationService;
private final Logger logger; private final Logger logger;
@ -394,14 +459,12 @@ public class ShardStateAction extends AbstractComponent {
} }
@Override @Override
public ClusterTasksResult<ShardEntry> execute(ClusterState currentState, List<ShardEntry> tasks) throws Exception { public ClusterTasksResult<StartedShardEntry> execute(ClusterState currentState, List<StartedShardEntry> tasks) throws Exception {
ClusterTasksResult.Builder<ShardEntry> builder = ClusterTasksResult.builder(); ClusterTasksResult.Builder<StartedShardEntry> builder = ClusterTasksResult.builder();
List<ShardEntry> tasksToBeApplied = new ArrayList<>(); List<StartedShardEntry> tasksToBeApplied = new ArrayList<>();
List<ShardRouting> shardRoutingsToBeApplied = new ArrayList<>(tasks.size()); List<ShardRouting> shardRoutingsToBeApplied = new ArrayList<>(tasks.size());
Set<ShardRouting> seenShardRoutings = new HashSet<>(); // to prevent duplicates Set<ShardRouting> seenShardRoutings = new HashSet<>(); // to prevent duplicates
for (ShardEntry task : tasks) { for (StartedShardEntry task : tasks) {
assert task.primaryTerm == 0L : "shard is only started by itself: " + task;
ShardRouting matched = currentState.getRoutingTable().getByAllocationId(task.shardId, task.allocationId); ShardRouting matched = currentState.getRoutingTable().getByAllocationId(task.shardId, task.allocationId);
if (matched == null) { if (matched == null) {
// tasks that correspond to non-existent shards are marked as successful. The reason is that we resend shard started // tasks that correspond to non-existent shards are marked as successful. The reason is that we resend shard started
@ -451,40 +514,30 @@ public class ShardStateAction extends AbstractComponent {
} }
} }
public static class ShardEntry extends TransportRequest { public static class StartedShardEntry extends TransportRequest {
ShardId shardId; final ShardId shardId;
String allocationId; final String allocationId;
long primaryTerm; final String message;
String message;
Exception failure;
public ShardEntry() { StartedShardEntry(StreamInput in) throws IOException {
} super(in);
public ShardEntry(ShardId shardId, String allocationId, long primaryTerm, String message, @Nullable Exception failure) {
this.shardId = shardId;
this.allocationId = allocationId;
this.primaryTerm = primaryTerm;
this.message = message;
this.failure = failure;
}
public ShardId getShardId() {
return shardId;
}
public String getAllocationId() {
return allocationId;
}
@Override
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
shardId = ShardId.readShardId(in); shardId = ShardId.readShardId(in);
allocationId = in.readString(); allocationId = in.readString();
primaryTerm = in.readVLong(); if (in.getVersion().before(Version.V_7_0_0_alpha1)) {
message = in.readString(); final long primaryTerm = in.readVLong();
failure = in.readException(); assert primaryTerm == 0L : "shard is only started by itself: primary term [" + primaryTerm + "]";
}
this.message = in.readString();
if (in.getVersion().before(Version.V_7_0_0_alpha1)) {
final Exception ex = in.readException();
assert ex == null : "started shard must not have failure [" + ex + "]";
}
}
public StartedShardEntry(ShardId shardId, String allocationId, String message) {
this.shardId = shardId;
this.allocationId = allocationId;
this.message = message;
} }
@Override @Override
@ -492,22 +545,19 @@ public class ShardStateAction extends AbstractComponent {
super.writeTo(out); super.writeTo(out);
shardId.writeTo(out); shardId.writeTo(out);
out.writeString(allocationId); out.writeString(allocationId);
out.writeVLong(primaryTerm); if (out.getVersion().before(Version.V_7_0_0_alpha1)) {
out.writeVLong(0L);
}
out.writeString(message); out.writeString(message);
out.writeException(failure); if (out.getVersion().before(Version.V_7_0_0_alpha1)) {
out.writeException(null);
}
} }
@Override @Override
public String toString() { public String toString() {
List<String> components = new ArrayList<>(4); return String.format(Locale.ROOT, "StartedShardEntry{shardId [%s], allocationId [%s], message [%s]}",
components.add("shard id [" + shardId + "]"); shardId, allocationId, message);
components.add("allocation id [" + allocationId + "]");
components.add("primary term [" + primaryTerm + "]");
components.add("message [" + message + "]");
if (failure != null) {
components.add("failure [" + ExceptionsHelper.detailedMessage(failure) + "]");
}
return String.join(", ", components);
} }
} }

View File

@ -138,8 +138,8 @@ public class AllocationService extends AbstractComponent {
} }
// Used for testing // Used for testing
public ClusterState applyFailedShard(ClusterState clusterState, ShardRouting failedShard) { public ClusterState applyFailedShard(ClusterState clusterState, ShardRouting failedShard, boolean markAsStale) {
return applyFailedShards(clusterState, singletonList(new FailedShard(failedShard, null, null)), emptyList()); return applyFailedShards(clusterState, singletonList(new FailedShard(failedShard, null, null, markAsStale)), emptyList());
} }
// Used for testing // Used for testing
@ -185,6 +185,9 @@ public class AllocationService extends AbstractComponent {
UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.ALLOCATION_FAILED, message, UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.ALLOCATION_FAILED, message,
failedShardEntry.getFailure(), failedAllocations + 1, currentNanoTime, System.currentTimeMillis(), false, failedShardEntry.getFailure(), failedAllocations + 1, currentNanoTime, System.currentTimeMillis(), false,
AllocationStatus.NO_ATTEMPT); AllocationStatus.NO_ATTEMPT);
if (failedShardEntry.markAsStale()) {
allocation.removeAllocationId(failedShard);
}
routingNodes.failShard(logger, failedShard, unassignedInfo, indexMetaData, allocation.changes()); routingNodes.failShard(logger, failedShard, unassignedInfo, indexMetaData, allocation.changes());
} else { } else {
logger.trace("{} shard routing failed in an earlier iteration (routing: {})", shardToFail.shardId(), shardToFail); logger.trace("{} shard routing failed in an earlier iteration (routing: {})", shardToFail.shardId(), shardToFail);

View File

@ -30,18 +30,20 @@ public class FailedShard {
private final ShardRouting routingEntry; private final ShardRouting routingEntry;
private final String message; private final String message;
private final Exception failure; private final Exception failure;
private final boolean markAsStale;
public FailedShard(ShardRouting routingEntry, String message, Exception failure) { public FailedShard(ShardRouting routingEntry, String message, Exception failure, boolean markAsStale) {
assert routingEntry.assignedToNode() : "only assigned shards can be failed " + routingEntry; assert routingEntry.assignedToNode() : "only assigned shards can be failed " + routingEntry;
this.routingEntry = routingEntry; this.routingEntry = routingEntry;
this.message = message; this.message = message;
this.failure = failure; this.failure = failure;
this.markAsStale = markAsStale;
} }
@Override @Override
public String toString() { public String toString() {
return "failed shard, shard " + routingEntry + ", message [" + message + "], failure [" + return "failed shard, shard " + routingEntry + ", message [" + message + "], failure [" +
ExceptionsHelper.detailedMessage(failure) + "]"; ExceptionsHelper.detailedMessage(failure) + "], markAsStale [" + markAsStale + "]";
} }
/** /**
@ -66,4 +68,11 @@ public class FailedShard {
public Exception getFailure() { public Exception getFailure() {
return failure; return failure;
} }
/**
* Whether or not to mark the shard as stale (eg. removing from in-sync set) when failing the shard.
*/
public boolean markAsStale() {
return markAsStale;
}
} }

View File

@ -72,19 +72,12 @@ public class IndexMetaDataUpdater extends RoutingChangesObserver.AbstractRouting
@Override @Override
public void shardFailed(ShardRouting failedShard, UnassignedInfo unassignedInfo) { public void shardFailed(ShardRouting failedShard, UnassignedInfo unassignedInfo) {
if (failedShard.active() && unassignedInfo.getReason() != UnassignedInfo.Reason.NODE_LEFT) {
removeAllocationId(failedShard);
if (failedShard.primary()) {
Updates updates = changes(failedShard.shardId());
if (updates.firstFailedPrimary == null) {
// more than one primary can be failed (because of batching, primary can be failed, replica promoted and then failed...)
updates.firstFailedPrimary = failedShard;
}
}
}
if (failedShard.active() && failedShard.primary()) { if (failedShard.active() && failedShard.primary()) {
Updates updates = changes(failedShard.shardId());
if (updates.firstFailedPrimary == null) {
// more than one primary can be failed (because of batching, primary can be failed, replica promoted and then failed...)
updates.firstFailedPrimary = failedShard;
}
increasePrimaryTerm(failedShard.shardId()); increasePrimaryTerm(failedShard.shardId());
} }
} }
@ -286,8 +279,10 @@ public class IndexMetaDataUpdater extends RoutingChangesObserver.AbstractRouting
/** /**
* Remove allocation id of this shard from the set of in-sync shard copies * Remove allocation id of this shard from the set of in-sync shard copies
*/ */
private void removeAllocationId(ShardRouting shardRouting) { void removeAllocationId(ShardRouting shardRouting) {
changes(shardRouting.shardId()).removedAllocationIds.add(shardRouting.allocationId().getId()); if (shardRouting.active()) {
changes(shardRouting.shardId()).removedAllocationIds.add(shardRouting.allocationId().getId());
}
} }
/** /**

View File

@ -27,6 +27,7 @@ import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.cluster.routing.RoutingChangesObserver; import org.elasticsearch.cluster.routing.RoutingChangesObserver;
import org.elasticsearch.cluster.routing.RoutingNodes; import org.elasticsearch.cluster.routing.RoutingNodes;
import org.elasticsearch.cluster.routing.RoutingTable; import org.elasticsearch.cluster.routing.RoutingTable;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders; import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders;
import org.elasticsearch.cluster.routing.allocation.decider.Decision; import org.elasticsearch.cluster.routing.allocation.decider.Decision;
import org.elasticsearch.common.collect.ImmutableOpenMap; import org.elasticsearch.common.collect.ImmutableOpenMap;
@ -222,6 +223,13 @@ public class RoutingAllocation {
return unmodifiableSet(new HashSet<>(ignore)); return unmodifiableSet(new HashSet<>(ignore));
} }
/**
* Remove the allocation id of the provided shard from the set of in-sync shard copies
*/
public void removeAllocationId(ShardRouting shardRouting) {
indexMetaDataUpdater.removeAllocationId(shardRouting);
}
/** /**
* Returns observer to use for changes made to the routing nodes * Returns observer to use for changes made to the routing nodes
*/ */

View File

@ -156,6 +156,8 @@ public class CancelAllocationCommand implements AllocationCommand {
} }
routingNodes.failShard(Loggers.getLogger(CancelAllocationCommand.class), shardRouting, routingNodes.failShard(Loggers.getLogger(CancelAllocationCommand.class), shardRouting,
new UnassignedInfo(UnassignedInfo.Reason.REROUTE_CANCELLED, null), indexMetaData, allocation.changes()); new UnassignedInfo(UnassignedInfo.Reason.REROUTE_CANCELLED, null), indexMetaData, allocation.changes());
// TODO: We don't have to remove a cancelled shard from in-sync set once we have a strict resync implementation.
allocation.removeAllocationId(shardRouting);
return new RerouteExplanation(this, allocation.decision(Decision.YES, "cancel_allocation_command", return new RerouteExplanation(this, allocation.decision(Decision.YES, "cancel_allocation_command",
"shard " + shardId + " on node " + discoNode + " can be cancelled")); "shard " + shardId + " on node " + discoNode + " can be cancelled"));
} }

View File

@ -120,7 +120,7 @@ public class ClusterRerouteTests extends ESAllocationTestCase {
assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i); assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i);
List<FailedShard> failedShards = Collections.singletonList( List<FailedShard> failedShards = Collections.singletonList(
new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i, new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i,
new UnsupportedOperationException())); new UnsupportedOperationException(), randomBoolean()));
newState = allocationService.applyFailedShards(clusterState, failedShards); newState = allocationService.applyFailedShards(clusterState, failedShards);
assertThat(newState, not(equalTo(clusterState))); assertThat(newState, not(equalTo(clusterState)));
clusterState = newState; clusterState = newState;

View File

@ -315,7 +315,7 @@ public class TransportWriteActionTests extends ESTestCase {
// A write replication action proxy should fail the shard // A write replication action proxy should fail the shard
assertEquals(1, shardFailedRequests.length); assertEquals(1, shardFailedRequests.length);
CapturingTransport.CapturedRequest shardFailedRequest = shardFailedRequests[0]; CapturingTransport.CapturedRequest shardFailedRequest = shardFailedRequests[0];
ShardStateAction.ShardEntry shardEntry = (ShardStateAction.ShardEntry) shardFailedRequest.request; ShardStateAction.FailedShardEntry shardEntry = (ShardStateAction.FailedShardEntry) shardFailedRequest.request;
// the shard the request was sent to and the shard to be failed should be the same // the shard the request was sent to and the shard to be failed should be the same
assertEquals(shardEntry.getShardId(), replica.shardId()); assertEquals(shardEntry.getShardId(), replica.shardId());
assertEquals(shardEntry.getAllocationId(), replica.allocationId().getId()); assertEquals(shardEntry.getAllocationId(), replica.allocationId().getId());

View File

@ -22,6 +22,7 @@ package org.elasticsearch.cluster.action.shard;
import com.carrotsearch.hppc.cursors.ObjectCursor; import com.carrotsearch.hppc.cursors.ObjectCursor;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
import org.elasticsearch.Version; import org.elasticsearch.Version;
import org.elasticsearch.cluster.action.shard.ShardStateAction.FailedShardEntry;
import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterName;
import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ClusterStateTaskExecutor; import org.elasticsearch.cluster.ClusterStateTaskExecutor;
@ -30,6 +31,7 @@ import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.cluster.routing.GroupShardsIterator; import org.elasticsearch.cluster.routing.GroupShardsIterator;
import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
import org.elasticsearch.cluster.routing.RoutingNodes; import org.elasticsearch.cluster.routing.RoutingNodes;
import org.elasticsearch.cluster.routing.RoutingTable; import org.elasticsearch.cluster.routing.RoutingTable;
import org.elasticsearch.cluster.routing.ShardIterator; import org.elasticsearch.cluster.routing.ShardIterator;
@ -42,20 +44,26 @@ import org.elasticsearch.cluster.routing.allocation.StaleShard;
import org.elasticsearch.cluster.routing.allocation.decider.ClusterRebalanceAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ClusterRebalanceAllocationDecider;
import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.UUIDs;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.index.Index; import org.elasticsearch.index.Index;
import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.shard.ShardId;
import org.junit.Before; import org.junit.Before;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set;
import java.util.function.Function; import java.util.function.Function;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.IntStream; import java.util.stream.IntStream;
import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING;
import static org.elasticsearch.cluster.routing.ShardRoutingState.STARTED;
import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.instanceOf; import static org.hamcrest.CoreMatchers.instanceOf;
import static org.hamcrest.Matchers.contains;
public class ShardFailedClusterStateTaskExecutorTests extends ESAllocationTestCase { public class ShardFailedClusterStateTaskExecutorTests extends ESAllocationTestCase {
@ -87,8 +95,8 @@ public class ShardFailedClusterStateTaskExecutorTests extends ESAllocationTestCa
} }
public void testEmptyTaskListProducesSameClusterState() throws Exception { public void testEmptyTaskListProducesSameClusterState() throws Exception {
List<ShardStateAction.ShardEntry> tasks = Collections.emptyList(); List<ShardStateAction.FailedShardEntry> tasks = Collections.emptyList();
ClusterStateTaskExecutor.ClusterTasksResult<ShardStateAction.ShardEntry> result = ClusterStateTaskExecutor.ClusterTasksResult<ShardStateAction.FailedShardEntry> result =
executor.execute(clusterState, tasks); executor.execute(clusterState, tasks);
assertTasksSuccessful(tasks, result, clusterState, false); assertTasksSuccessful(tasks, result, clusterState, false);
} }
@ -96,35 +104,35 @@ public class ShardFailedClusterStateTaskExecutorTests extends ESAllocationTestCa
public void testDuplicateFailuresAreOkay() throws Exception { public void testDuplicateFailuresAreOkay() throws Exception {
String reason = "test duplicate failures are okay"; String reason = "test duplicate failures are okay";
ClusterState currentState = createClusterStateWithStartedShards(reason); ClusterState currentState = createClusterStateWithStartedShards(reason);
List<ShardStateAction.ShardEntry> tasks = createExistingShards(currentState, reason); List<FailedShardEntry> tasks = createExistingShards(currentState, reason);
ClusterStateTaskExecutor.ClusterTasksResult<ShardStateAction.ShardEntry> result = executor.execute(currentState, tasks); ClusterStateTaskExecutor.ClusterTasksResult<FailedShardEntry> result = executor.execute(currentState, tasks);
assertTasksSuccessful(tasks, result, clusterState, true); assertTasksSuccessful(tasks, result, clusterState, true);
} }
public void testNonExistentShardsAreMarkedAsSuccessful() throws Exception { public void testNonExistentShardsAreMarkedAsSuccessful() throws Exception {
String reason = "test non existent shards are marked as successful"; String reason = "test non existent shards are marked as successful";
ClusterState currentState = createClusterStateWithStartedShards(reason); ClusterState currentState = createClusterStateWithStartedShards(reason);
List<ShardStateAction.ShardEntry> tasks = createNonExistentShards(currentState, reason); List<FailedShardEntry> tasks = createNonExistentShards(currentState, reason);
ClusterStateTaskExecutor.ClusterTasksResult<ShardStateAction.ShardEntry> result = executor.execute(clusterState, tasks); ClusterStateTaskExecutor.ClusterTasksResult<FailedShardEntry> result = executor.execute(clusterState, tasks);
assertTasksSuccessful(tasks, result, clusterState, false); assertTasksSuccessful(tasks, result, clusterState, false);
} }
public void testTriviallySuccessfulTasksBatchedWithFailingTasks() throws Exception { public void testTriviallySuccessfulTasksBatchedWithFailingTasks() throws Exception {
String reason = "test trivially successful tasks batched with failing tasks"; String reason = "test trivially successful tasks batched with failing tasks";
ClusterState currentState = createClusterStateWithStartedShards(reason); ClusterState currentState = createClusterStateWithStartedShards(reason);
List<ShardStateAction.ShardEntry> failingTasks = createExistingShards(currentState, reason); List<FailedShardEntry> failingTasks = createExistingShards(currentState, reason);
List<ShardStateAction.ShardEntry> nonExistentTasks = createNonExistentShards(currentState, reason); List<FailedShardEntry> nonExistentTasks = createNonExistentShards(currentState, reason);
ShardStateAction.ShardFailedClusterStateTaskExecutor failingExecutor = new ShardStateAction.ShardFailedClusterStateTaskExecutor(allocationService, null, logger) { ShardStateAction.ShardFailedClusterStateTaskExecutor failingExecutor = new ShardStateAction.ShardFailedClusterStateTaskExecutor(allocationService, null, logger) {
@Override @Override
ClusterState applyFailedShards(ClusterState currentState, List<FailedShard> failedShards, List<StaleShard> staleShards) { ClusterState applyFailedShards(ClusterState currentState, List<FailedShard> failedShards, List<StaleShard> staleShards) {
throw new RuntimeException("simulated applyFailedShards failure"); throw new RuntimeException("simulated applyFailedShards failure");
} }
}; };
List<ShardStateAction.ShardEntry> tasks = new ArrayList<>(); List<FailedShardEntry> tasks = new ArrayList<>();
tasks.addAll(failingTasks); tasks.addAll(failingTasks);
tasks.addAll(nonExistentTasks); tasks.addAll(nonExistentTasks);
ClusterStateTaskExecutor.ClusterTasksResult<ShardStateAction.ShardEntry> result = failingExecutor.execute(currentState, tasks); ClusterStateTaskExecutor.ClusterTasksResult<FailedShardEntry> result = failingExecutor.execute(currentState, tasks);
Map<ShardStateAction.ShardEntry, ClusterStateTaskExecutor.TaskResult> taskResultMap = Map<FailedShardEntry, ClusterStateTaskExecutor.TaskResult> taskResultMap =
failingTasks.stream().collect(Collectors.toMap(Function.identity(), task -> ClusterStateTaskExecutor.TaskResult.failure(new RuntimeException("simulated applyFailedShards failure")))); failingTasks.stream().collect(Collectors.toMap(Function.identity(), task -> ClusterStateTaskExecutor.TaskResult.failure(new RuntimeException("simulated applyFailedShards failure"))));
taskResultMap.putAll(nonExistentTasks.stream().collect(Collectors.toMap(Function.identity(), task -> ClusterStateTaskExecutor.TaskResult.success()))); taskResultMap.putAll(nonExistentTasks.stream().collect(Collectors.toMap(Function.identity(), task -> ClusterStateTaskExecutor.TaskResult.success())));
assertTaskResults(taskResultMap, result, currentState, false); assertTaskResults(taskResultMap, result, currentState, false);
@ -133,23 +141,47 @@ public class ShardFailedClusterStateTaskExecutorTests extends ESAllocationTestCa
public void testIllegalShardFailureRequests() throws Exception { public void testIllegalShardFailureRequests() throws Exception {
String reason = "test illegal shard failure requests"; String reason = "test illegal shard failure requests";
ClusterState currentState = createClusterStateWithStartedShards(reason); ClusterState currentState = createClusterStateWithStartedShards(reason);
List<ShardStateAction.ShardEntry> failingTasks = createExistingShards(currentState, reason); List<ShardStateAction.FailedShardEntry> failingTasks = createExistingShards(currentState, reason);
List<ShardStateAction.ShardEntry> tasks = new ArrayList<>(); List<ShardStateAction.FailedShardEntry> tasks = new ArrayList<>();
for (ShardStateAction.ShardEntry failingTask : failingTasks) { for (ShardStateAction.FailedShardEntry failingTask : failingTasks) {
long primaryTerm = currentState.metaData().index(failingTask.shardId.getIndex()).primaryTerm(failingTask.shardId.id()); long primaryTerm = currentState.metaData().index(failingTask.shardId.getIndex()).primaryTerm(failingTask.shardId.id());
tasks.add(new ShardStateAction.ShardEntry(failingTask.shardId, failingTask.allocationId, tasks.add(new FailedShardEntry(failingTask.shardId, failingTask.allocationId,
randomIntBetween(1, (int) primaryTerm - 1), failingTask.message, failingTask.failure)); randomIntBetween(1, (int) primaryTerm - 1), failingTask.message, failingTask.failure, randomBoolean()));
} }
Map<ShardStateAction.ShardEntry, ClusterStateTaskExecutor.TaskResult> taskResultMap = Map<FailedShardEntry, ClusterStateTaskExecutor.TaskResult> taskResultMap =
tasks.stream().collect(Collectors.toMap( tasks.stream().collect(Collectors.toMap(
Function.identity(), Function.identity(),
task -> ClusterStateTaskExecutor.TaskResult.failure(new ShardStateAction.NoLongerPrimaryShardException(task.shardId, task -> ClusterStateTaskExecutor.TaskResult.failure(new ShardStateAction.NoLongerPrimaryShardException(task.shardId,
"primary term [" + task.primaryTerm + "] did not match current primary term [" + "primary term [" + task.primaryTerm + "] did not match current primary term [" +
currentState.metaData().index(task.shardId.getIndex()).primaryTerm(task.shardId.id()) + "]")))); currentState.metaData().index(task.shardId.getIndex()).primaryTerm(task.shardId.id()) + "]"))));
ClusterStateTaskExecutor.ClusterTasksResult<ShardStateAction.ShardEntry> result = executor.execute(currentState, tasks); ClusterStateTaskExecutor.ClusterTasksResult<FailedShardEntry> result = executor.execute(currentState, tasks);
assertTaskResults(taskResultMap, result, currentState, false); assertTaskResults(taskResultMap, result, currentState, false);
} }
public void testMarkAsStaleWhenFailingShard() throws Exception {
final MockAllocationService allocation = createAllocationService();
ClusterState clusterState = createClusterStateWithStartedShards("test markAsStale");
clusterState = allocation.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING));
IndexShardRoutingTable shardRoutingTable = clusterState.routingTable().index(INDEX).shard(0);
long primaryTerm = clusterState.metaData().index(INDEX).primaryTerm(0);
final Set<String> oldInSync = clusterState.metaData().index(INDEX).inSyncAllocationIds(0);
{
ShardStateAction.FailedShardEntry failShardOnly = new ShardStateAction.FailedShardEntry(shardRoutingTable.shardId(),
randomFrom(oldInSync), primaryTerm, "dummy", null, false);
ClusterState appliedState = executor.execute(clusterState, Arrays.asList(failShardOnly)).resultingState;
Set<String> newInSync = appliedState.metaData().index(INDEX).inSyncAllocationIds(0);
assertThat(newInSync, equalTo(oldInSync));
}
{
final String failedAllocationId = randomFrom(oldInSync);
ShardStateAction.FailedShardEntry failAndMarkAsStale = new ShardStateAction.FailedShardEntry(shardRoutingTable.shardId(),
failedAllocationId, primaryTerm, "dummy", null, true);
ClusterState appliedState = executor.execute(clusterState, Arrays.asList(failAndMarkAsStale)).resultingState;
Set<String> newInSync = appliedState.metaData().index(INDEX).inSyncAllocationIds(0);
assertThat(Sets.difference(oldInSync, newInSync), contains(failedAllocationId));
}
}
private ClusterState createClusterStateWithStartedShards(String reason) { private ClusterState createClusterStateWithStartedShards(String reason) {
int numberOfNodes = 1 + numberOfReplicas; int numberOfNodes = 1 + numberOfReplicas;
DiscoveryNodes.Builder nodes = DiscoveryNodes.builder(); DiscoveryNodes.Builder nodes = DiscoveryNodes.builder();
@ -163,7 +195,7 @@ public class ShardFailedClusterStateTaskExecutorTests extends ESAllocationTestCa
return allocationService.applyStartedShards(stateAfterReroute, routingNodes.shardsWithState(ShardRoutingState.INITIALIZING)); return allocationService.applyStartedShards(stateAfterReroute, routingNodes.shardsWithState(ShardRoutingState.INITIALIZING));
} }
private List<ShardStateAction.ShardEntry> createExistingShards(ClusterState currentState, String reason) { private List<ShardStateAction.FailedShardEntry> createExistingShards(ClusterState currentState, String reason) {
List<ShardRouting> shards = new ArrayList<>(); List<ShardRouting> shards = new ArrayList<>();
GroupShardsIterator<ShardIterator> shardGroups = currentState.routingTable().allAssignedShardsGrouped(new String[] { INDEX }, true); GroupShardsIterator<ShardIterator> shardGroups = currentState.routingTable().allAssignedShardsGrouped(new String[] { INDEX }, true);
for (ShardIterator shardIt : shardGroups) { for (ShardIterator shardIt : shardGroups) {
@ -181,7 +213,7 @@ public class ShardFailedClusterStateTaskExecutorTests extends ESAllocationTestCa
return toTasks(currentState, shardsToFail, indexUUID, reason); return toTasks(currentState, shardsToFail, indexUUID, reason);
} }
private List<ShardStateAction.ShardEntry> createNonExistentShards(ClusterState currentState, String reason) { private List<ShardStateAction.FailedShardEntry> createNonExistentShards(ClusterState currentState, String reason) {
// add shards from a non-existent index // add shards from a non-existent index
String nonExistentIndexUUID = "non-existent"; String nonExistentIndexUUID = "non-existent";
Index index = new Index("non-existent", nonExistentIndexUUID); Index index = new Index("non-existent", nonExistentIndexUUID);
@ -195,14 +227,15 @@ public class ShardFailedClusterStateTaskExecutorTests extends ESAllocationTestCa
nonExistentShards.add(nonExistentShardRouting(index, nodeIds, false)); nonExistentShards.add(nonExistentShardRouting(index, nodeIds, false));
} }
List<ShardStateAction.ShardEntry> existingShards = createExistingShards(currentState, reason); List<ShardStateAction.FailedShardEntry> existingShards = createExistingShards(currentState, reason);
List<ShardStateAction.ShardEntry> shardsWithMismatchedAllocationIds = new ArrayList<>(); List<ShardStateAction.FailedShardEntry> shardsWithMismatchedAllocationIds = new ArrayList<>();
for (ShardStateAction.ShardEntry existingShard : existingShards) { for (ShardStateAction.FailedShardEntry existingShard : existingShards) {
shardsWithMismatchedAllocationIds.add(new ShardStateAction.ShardEntry(existingShard.shardId, UUIDs.randomBase64UUID(), 0L, existingShard.message, existingShard.failure)); shardsWithMismatchedAllocationIds.add(new ShardStateAction.FailedShardEntry(existingShard.shardId, UUIDs.randomBase64UUID(), 0L, existingShard.message, existingShard.failure, randomBoolean()));
} }
List<ShardStateAction.ShardEntry> tasks = new ArrayList<>(); List<ShardStateAction.FailedShardEntry> tasks = new ArrayList<>();
nonExistentShards.forEach(shard -> tasks.add(new ShardStateAction.ShardEntry(shard.shardId(), shard.allocationId().getId(), 0L, reason, new CorruptIndexException("simulated", nonExistentIndexUUID)))); nonExistentShards.forEach(shard -> tasks.add(new ShardStateAction.FailedShardEntry(shard.shardId(), shard.allocationId().getId(), 0L,
reason, new CorruptIndexException("simulated", nonExistentIndexUUID), randomBoolean())));
tasks.addAll(shardsWithMismatchedAllocationIds); tasks.addAll(shardsWithMismatchedAllocationIds);
return tasks; return tasks;
} }
@ -214,26 +247,26 @@ public class ShardFailedClusterStateTaskExecutorTests extends ESAllocationTestCa
} }
private static void assertTasksSuccessful( private static void assertTasksSuccessful(
List<ShardStateAction.ShardEntry> tasks, List<ShardStateAction.FailedShardEntry> tasks,
ClusterStateTaskExecutor.ClusterTasksResult<ShardStateAction.ShardEntry> result, ClusterStateTaskExecutor.ClusterTasksResult<ShardStateAction.FailedShardEntry> result,
ClusterState clusterState, ClusterState clusterState,
boolean clusterStateChanged boolean clusterStateChanged
) { ) {
Map<ShardStateAction.ShardEntry, ClusterStateTaskExecutor.TaskResult> taskResultMap = Map<ShardStateAction.FailedShardEntry, ClusterStateTaskExecutor.TaskResult> taskResultMap =
tasks.stream().collect(Collectors.toMap(Function.identity(), task -> ClusterStateTaskExecutor.TaskResult.success())); tasks.stream().collect(Collectors.toMap(Function.identity(), task -> ClusterStateTaskExecutor.TaskResult.success()));
assertTaskResults(taskResultMap, result, clusterState, clusterStateChanged); assertTaskResults(taskResultMap, result, clusterState, clusterStateChanged);
} }
private static void assertTaskResults( private static void assertTaskResults(
Map<ShardStateAction.ShardEntry, ClusterStateTaskExecutor.TaskResult> taskResultMap, Map<ShardStateAction.FailedShardEntry, ClusterStateTaskExecutor.TaskResult> taskResultMap,
ClusterStateTaskExecutor.ClusterTasksResult<ShardStateAction.ShardEntry> result, ClusterStateTaskExecutor.ClusterTasksResult<ShardStateAction.FailedShardEntry> result,
ClusterState clusterState, ClusterState clusterState,
boolean clusterStateChanged boolean clusterStateChanged
) { ) {
// there should be as many task results as tasks // there should be as many task results as tasks
assertEquals(taskResultMap.size(), result.executionResults.size()); assertEquals(taskResultMap.size(), result.executionResults.size());
for (Map.Entry<ShardStateAction.ShardEntry, ClusterStateTaskExecutor.TaskResult> entry : taskResultMap.entrySet()) { for (Map.Entry<ShardStateAction.FailedShardEntry, ClusterStateTaskExecutor.TaskResult> entry : taskResultMap.entrySet()) {
// every task should have a corresponding task result // every task should have a corresponding task result
assertTrue(result.executionResults.containsKey(entry.getKey())); assertTrue(result.executionResults.containsKey(entry.getKey()));
@ -242,7 +275,7 @@ public class ShardFailedClusterStateTaskExecutorTests extends ESAllocationTestCa
} }
List<ShardRouting> shards = clusterState.getRoutingTable().allShards(); List<ShardRouting> shards = clusterState.getRoutingTable().allShards();
for (Map.Entry<ShardStateAction.ShardEntry, ClusterStateTaskExecutor.TaskResult> entry : taskResultMap.entrySet()) { for (Map.Entry<ShardStateAction.FailedShardEntry, ClusterStateTaskExecutor.TaskResult> entry : taskResultMap.entrySet()) {
if (entry.getValue().isSuccess()) { if (entry.getValue().isSuccess()) {
// the shard was successfully failed and so should not be in the routing table // the shard was successfully failed and so should not be in the routing table
for (ShardRouting shard : shards) { for (ShardRouting shard : shards) {
@ -267,15 +300,15 @@ public class ShardFailedClusterStateTaskExecutorTests extends ESAllocationTestCa
} }
} }
private static List<ShardStateAction.ShardEntry> toTasks(ClusterState currentState, List<ShardRouting> shards, String indexUUID, String message) { private static List<ShardStateAction.FailedShardEntry> toTasks(ClusterState currentState, List<ShardRouting> shards, String indexUUID, String message) {
return shards return shards
.stream() .stream()
.map(shard -> new ShardStateAction.ShardEntry( .map(shard -> new ShardStateAction.FailedShardEntry(
shard.shardId(), shard.shardId(),
shard.allocationId().getId(), shard.allocationId().getId(),
randomBoolean() ? 0L : currentState.metaData().getIndexSafe(shard.index()).primaryTerm(shard.id()), randomBoolean() ? 0L : currentState.metaData().getIndexSafe(shard.index()).primaryTerm(shard.id()),
message, message,
new CorruptIndexException("simulated", indexUUID))) new CorruptIndexException("simulated", indexUUID), randomBoolean()))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
} }

View File

@ -20,10 +20,13 @@
package org.elasticsearch.cluster.action.shard; package org.elasticsearch.cluster.action.shard;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
import org.elasticsearch.Version;
import org.elasticsearch.action.support.replication.ClusterStateCreationUtils; import org.elasticsearch.action.support.replication.ClusterStateCreationUtils;
import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ClusterStateObserver; import org.elasticsearch.cluster.ClusterStateObserver;
import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.NotMasterException;
import org.elasticsearch.cluster.action.shard.ShardStateAction.FailedShardEntry;
import org.elasticsearch.cluster.action.shard.ShardStateAction.StartedShardEntry;
import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.cluster.routing.IndexRoutingTable; import org.elasticsearch.cluster.routing.IndexRoutingTable;
import org.elasticsearch.cluster.routing.RoutingService; import org.elasticsearch.cluster.routing.RoutingService;
@ -32,15 +35,22 @@ import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.ShardsIterator; import org.elasticsearch.cluster.routing.ShardsIterator;
import org.elasticsearch.cluster.routing.allocation.AllocationService; import org.elasticsearch.cluster.routing.allocation.AllocationService;
import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.discovery.Discovery; import org.elasticsearch.discovery.Discovery;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.VersionUtils;
import org.elasticsearch.test.transport.CapturingTransport; import org.elasticsearch.test.transport.CapturingTransport;
import org.elasticsearch.threadpool.TestThreadPool; import org.elasticsearch.threadpool.TestThreadPool;
import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.NodeDisconnectedException; import org.elasticsearch.transport.NodeDisconnectedException;
import org.elasticsearch.transport.NodeNotConnectedException; import org.elasticsearch.transport.NodeNotConnectedException;
import org.elasticsearch.transport.TransportException; import org.elasticsearch.transport.TransportException;
import org.elasticsearch.transport.TransportRequest;
import org.elasticsearch.transport.TransportResponse; import org.elasticsearch.transport.TransportResponse;
import org.elasticsearch.transport.TransportService; import org.elasticsearch.transport.TransportService;
import org.junit.After; import org.junit.After;
@ -48,6 +58,8 @@ import org.junit.AfterClass;
import org.junit.Before; import org.junit.Before;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import java.io.IOException;
import java.util.UUID;
import java.util.Collections; import java.util.Collections;
import java.util.concurrent.CountDownLatch; import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
@ -63,6 +75,7 @@ import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.instanceOf; import static org.hamcrest.CoreMatchers.instanceOf;
import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.nullValue;
public class ShardStateActionTests extends ESTestCase { public class ShardStateActionTests extends ESTestCase {
private static ThreadPool THREAD_POOL; private static ThreadPool THREAD_POOL;
@ -90,9 +103,9 @@ public class ShardStateActionTests extends ESTestCase {
} }
@Override @Override
protected void waitForNewMasterAndRetry(String actionName, ClusterStateObserver observer, ShardEntry shardEntry, Listener listener, Predicate<ClusterState> changePredicate) { protected void waitForNewMasterAndRetry(String actionName, ClusterStateObserver observer, TransportRequest request, Listener listener, Predicate<ClusterState> changePredicate) {
onBeforeWaitForNewMasterAndRetry.run(); onBeforeWaitForNewMasterAndRetry.run();
super.waitForNewMasterAndRetry(actionName, observer, shardEntry, listener, changePredicate); super.waitForNewMasterAndRetry(actionName, observer, request, listener, changePredicate);
onAfterWaitForNewMasterAndRetry.run(); onAfterWaitForNewMasterAndRetry.run();
} }
} }
@ -160,8 +173,8 @@ public class ShardStateActionTests extends ESTestCase {
CapturingTransport.CapturedRequest[] capturedRequests = transport.getCapturedRequestsAndClear(); CapturingTransport.CapturedRequest[] capturedRequests = transport.getCapturedRequestsAndClear();
assertEquals(1, capturedRequests.length); assertEquals(1, capturedRequests.length);
// the request is a shard failed request // the request is a shard failed request
assertThat(capturedRequests[0].request, is(instanceOf(ShardStateAction.ShardEntry.class))); assertThat(capturedRequests[0].request, is(instanceOf(ShardStateAction.FailedShardEntry.class)));
ShardStateAction.ShardEntry shardEntry = (ShardStateAction.ShardEntry) capturedRequests[0].request; ShardStateAction.FailedShardEntry shardEntry = (ShardStateAction.FailedShardEntry) capturedRequests[0].request;
// for the right shard // for the right shard
assertEquals(shardEntry.shardId, shardRouting.shardId()); assertEquals(shardEntry.shardId, shardRouting.shardId());
assertEquals(shardEntry.allocationId, shardRouting.allocationId().getId()); assertEquals(shardEntry.allocationId, shardRouting.allocationId().getId());
@ -342,7 +355,7 @@ public class ShardStateActionTests extends ESTestCase {
long primaryTerm = clusterService.state().metaData().index(index).primaryTerm(failedShard.id()); long primaryTerm = clusterService.state().metaData().index(index).primaryTerm(failedShard.id());
assertThat(primaryTerm, greaterThanOrEqualTo(1L)); assertThat(primaryTerm, greaterThanOrEqualTo(1L));
shardStateAction.remoteShardFailed(failedShard.shardId(), failedShard.allocationId().getId(), primaryTerm + 1, "test", shardStateAction.remoteShardFailed(failedShard.shardId(), failedShard.allocationId().getId(), primaryTerm + 1, randomBoolean(), "test",
getSimulatedFailure(), new ShardStateAction.Listener() { getSimulatedFailure(), new ShardStateAction.Listener() {
@Override @Override
public void onSuccess() { public void onSuccess() {
@ -407,4 +420,36 @@ public class ShardStateActionTests extends ESTestCase {
private Exception getSimulatedFailure() { private Exception getSimulatedFailure() {
return new CorruptIndexException("simulated", (String) null); return new CorruptIndexException("simulated", (String) null);
} }
public void testShardEntryBWCSerialize() throws Exception {
final Version bwcVersion = randomValueOtherThanMany(
version -> version.onOrAfter(Version.V_7_0_0_alpha1), () -> VersionUtils.randomVersion(random()));
final ShardId shardId = new ShardId(randomRealisticUnicodeOfLengthBetween(10, 100), UUID.randomUUID().toString(), between(0, 1000));
final String allocationId = randomRealisticUnicodeOfCodepointLengthBetween(10, 100);
final String reason = randomRealisticUnicodeOfCodepointLengthBetween(10, 100);
try (StreamInput in = serialize(new StartedShardEntry(shardId, allocationId, reason), bwcVersion).streamInput()) {
in.setVersion(bwcVersion);
final FailedShardEntry failedShardEntry = new FailedShardEntry(in);
assertThat(failedShardEntry.shardId, equalTo(shardId));
assertThat(failedShardEntry.allocationId, equalTo(allocationId));
assertThat(failedShardEntry.message, equalTo(reason));
assertThat(failedShardEntry.failure, nullValue());
assertThat(failedShardEntry.markAsStale, equalTo(true));
}
try (StreamInput in = serialize(new FailedShardEntry(shardId, allocationId, 0L, reason, null, false), bwcVersion).streamInput()) {
in.setVersion(bwcVersion);
final StartedShardEntry startedShardEntry = new StartedShardEntry(in);
assertThat(startedShardEntry.shardId, equalTo(shardId));
assertThat(startedShardEntry.allocationId, equalTo(allocationId));
assertThat(startedShardEntry.message, equalTo(reason));
}
}
BytesReference serialize(Writeable writeable, Version version) throws IOException {
try (BytesStreamOutput out = new BytesStreamOutput()) {
out.setVersion(version);
writeable.writeTo(out);
return out.bytes();
}
}
} }

View File

@ -144,7 +144,7 @@ public class PrimaryTermsTests extends ESAllocationTestCase {
logger.info("failing primary shards {} for index [{}]", shardIdsToFail, index); logger.info("failing primary shards {} for index [{}]", shardIdsToFail, index);
List<FailedShard> failedShards = new ArrayList<>(); List<FailedShard> failedShards = new ArrayList<>();
for (int shard : shardIdsToFail) { for (int shard : shardIdsToFail) {
failedShards.add(new FailedShard(indexShardRoutingTable.shard(shard).primaryShard(), "test", null)); failedShards.add(new FailedShard(indexShardRoutingTable.shard(shard).primaryShard(), "test", null, randomBoolean()));
incrementPrimaryTerm(index, shard); // the primary failure should increment the primary term; incrementPrimaryTerm(index, shard); // the primary failure should increment the primary term;
} }
applyRerouteResult(allocationService.applyFailedShards(this.clusterState, failedShards,Collections.emptyList())); applyRerouteResult(allocationService.applyFailedShards(this.clusterState, failedShards,Collections.emptyList()));

View File

@ -254,7 +254,7 @@ public class UnassignedInfoTests extends ESAllocationTestCase {
assertThat(clusterState.getRoutingNodes().unassigned().size() > 0, equalTo(false)); assertThat(clusterState.getRoutingNodes().unassigned().size() > 0, equalTo(false));
// fail shard // fail shard
ShardRouting shardToFail = clusterState.getRoutingNodes().shardsWithState(STARTED).get(0); ShardRouting shardToFail = clusterState.getRoutingNodes().shardsWithState(STARTED).get(0);
clusterState = allocation.applyFailedShards(clusterState, Collections.singletonList(new FailedShard(shardToFail, "test fail", null))); clusterState = allocation.applyFailedShards(clusterState, Collections.singletonList(new FailedShard(shardToFail, "test fail", null, randomBoolean())));
// verify the reason and details // verify the reason and details
assertThat(clusterState.getRoutingNodes().unassigned().size() > 0, equalTo(true)); assertThat(clusterState.getRoutingNodes().unassigned().size() > 0, equalTo(true));
assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(1));

View File

@ -167,7 +167,7 @@ public class FailedNodeRoutingTests extends ESAllocationTestCase {
List<FailedShard> shardsToFail = new ArrayList<>(); List<FailedShard> shardsToFail = new ArrayList<>();
List<ShardRouting> failedPrimaries = randomSubsetOf(primaries); List<ShardRouting> failedPrimaries = randomSubsetOf(primaries);
failedPrimaries.stream().forEach(sr -> { failedPrimaries.stream().forEach(sr -> {
shardsToFail.add(new FailedShard(randomFrom(sr), "failed primary", new Exception())); shardsToFail.add(new FailedShard(randomFrom(sr), "failed primary", new Exception(), randomBoolean()));
}); });
logger.info("--> state before failing shards: {}", state); logger.info("--> state before failing shards: {}", state);

View File

@ -117,7 +117,7 @@ public class FailedShardsRoutingTests extends ESAllocationTestCase {
assertThat(clusterState.getRoutingNodes().node("node3").iterator().next().state(), equalTo(INITIALIZING)); assertThat(clusterState.getRoutingNodes().node("node3").iterator().next().state(), equalTo(INITIALIZING));
logger.info("--> fail primary shard recovering instance on node3 being initialized"); logger.info("--> fail primary shard recovering instance on node3 being initialized");
clusterState = allocation.applyFailedShard(clusterState, clusterState.getRoutingNodes().node("node3").iterator().next()); clusterState = allocation.applyFailedShard(clusterState, clusterState.getRoutingNodes().node("node3").iterator().next(), randomBoolean());
assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(STARTED)); assertThat(clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next().state(), equalTo(STARTED));
assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(0)); assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(0));
@ -132,7 +132,7 @@ public class FailedShardsRoutingTests extends ESAllocationTestCase {
assertThat(clusterState.getRoutingNodes().node("node3").iterator().next().state(), equalTo(INITIALIZING)); assertThat(clusterState.getRoutingNodes().node("node3").iterator().next().state(), equalTo(INITIALIZING));
logger.info("--> fail primary shard recovering instance on node1 being relocated"); logger.info("--> fail primary shard recovering instance on node1 being relocated");
clusterState = allocation.applyFailedShard(clusterState, clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next()); clusterState = allocation.applyFailedShard(clusterState, clusterState.getRoutingNodes().node(origPrimaryNodeId).iterator().next(), randomBoolean());
// check promotion of replica to primary // check promotion of replica to primary
assertThat(clusterState.getRoutingNodes().node(origReplicaNodeId).iterator().next().state(), equalTo(STARTED)); assertThat(clusterState.getRoutingNodes().node(origReplicaNodeId).iterator().next().state(), equalTo(STARTED));
@ -200,7 +200,7 @@ public class FailedShardsRoutingTests extends ESAllocationTestCase {
logger.info("fail the primary shard, will have no place to be rerouted to (single node), so stays unassigned"); logger.info("fail the primary shard, will have no place to be rerouted to (single node), so stays unassigned");
ShardRouting shardToFail = clusterState.routingTable().index("test").shard(0).primaryShard(); ShardRouting shardToFail = clusterState.routingTable().index("test").shard(0).primaryShard();
newState = strategy.applyFailedShard(clusterState, shardToFail); newState = strategy.applyFailedShard(clusterState, shardToFail, randomBoolean());
assertThat(newState, not(equalTo(clusterState))); assertThat(newState, not(equalTo(clusterState)));
clusterState = newState; clusterState = newState;
@ -249,7 +249,7 @@ public class FailedShardsRoutingTests extends ESAllocationTestCase {
logger.info("fail the first shard, will have no place to be rerouted to (single node), so stays unassigned"); logger.info("fail the first shard, will have no place to be rerouted to (single node), so stays unassigned");
ShardRouting firstShard = clusterState.getRoutingNodes().node("node1").iterator().next(); ShardRouting firstShard = clusterState.getRoutingNodes().node("node1").iterator().next();
newState = strategy.applyFailedShard(clusterState, firstShard); newState = strategy.applyFailedShard(clusterState, firstShard, randomBoolean());
assertThat(newState, not(equalTo(clusterState))); assertThat(newState, not(equalTo(clusterState)));
clusterState = newState; clusterState = newState;
@ -305,7 +305,7 @@ public class FailedShardsRoutingTests extends ESAllocationTestCase {
logger.info("failing shard on node [{}]", failedNode); logger.info("failing shard on node [{}]", failedNode);
ShardRouting shardToFail = routingNodes.node(failedNode).iterator().next(); ShardRouting shardToFail = routingNodes.node(failedNode).iterator().next();
if (shardRoutingsToFail.contains(shardToFail) == false) { if (shardRoutingsToFail.contains(shardToFail) == false) {
failedShards.add(new FailedShard(shardToFail, null, null)); failedShards.add(new FailedShard(shardToFail, null, null, randomBoolean()));
failedNodes.add(failedNode); failedNodes.add(failedNode);
shardRoutingsToFail.add(shardToFail); shardRoutingsToFail.add(shardToFail);
} }
@ -364,7 +364,7 @@ public class FailedShardsRoutingTests extends ESAllocationTestCase {
logger.info("fail the first shard, will start INITIALIZING on the second node"); logger.info("fail the first shard, will start INITIALIZING on the second node");
final ShardRouting firstShard = clusterState.getRoutingNodes().node(nodeHoldingPrimary).iterator().next(); final ShardRouting firstShard = clusterState.getRoutingNodes().node(nodeHoldingPrimary).iterator().next();
newState = strategy.applyFailedShard(clusterState, firstShard); newState = strategy.applyFailedShard(clusterState, firstShard, randomBoolean());
assertThat(newState, not(equalTo(clusterState))); assertThat(newState, not(equalTo(clusterState)));
clusterState = newState; clusterState = newState;
@ -455,7 +455,7 @@ public class FailedShardsRoutingTests extends ESAllocationTestCase {
logger.info("Fail the shards on node 3"); logger.info("Fail the shards on node 3");
ShardRouting shardToFail = routingNodes.node("node3").iterator().next(); ShardRouting shardToFail = routingNodes.node("node3").iterator().next();
newState = strategy.applyFailedShard(clusterState, shardToFail); newState = strategy.applyFailedShard(clusterState, shardToFail, randomBoolean());
assertThat(newState, not(equalTo(clusterState))); assertThat(newState, not(equalTo(clusterState)));
clusterState = newState; clusterState = newState;
routingNodes = clusterState.getRoutingNodes(); routingNodes = clusterState.getRoutingNodes();
@ -507,7 +507,7 @@ public class FailedShardsRoutingTests extends ESAllocationTestCase {
// fail the primary shard, check replicas get removed as well... // fail the primary shard, check replicas get removed as well...
ShardRouting primaryShardToFail = clusterState.routingTable().index("test").shard(0).primaryShard(); ShardRouting primaryShardToFail = clusterState.routingTable().index("test").shard(0).primaryShard();
ClusterState newState = allocation.applyFailedShard(clusterState, primaryShardToFail); ClusterState newState = allocation.applyFailedShard(clusterState, primaryShardToFail, randomBoolean());
assertThat(newState, not(equalTo(clusterState))); assertThat(newState, not(equalTo(clusterState)));
clusterState = newState; clusterState = newState;
// the primary gets allocated on another node, replicas are initializing // the primary gets allocated on another node, replicas are initializing
@ -550,7 +550,7 @@ public class FailedShardsRoutingTests extends ESAllocationTestCase {
// fail the primary shard, check one replica gets elected to primary, others become INITIALIZING (from it) // fail the primary shard, check one replica gets elected to primary, others become INITIALIZING (from it)
ShardRouting primaryShardToFail = clusterState.routingTable().index("test").shard(0).primaryShard(); ShardRouting primaryShardToFail = clusterState.routingTable().index("test").shard(0).primaryShard();
ClusterState newState = allocation.applyFailedShard(clusterState, primaryShardToFail); ClusterState newState = allocation.applyFailedShard(clusterState, primaryShardToFail, randomBoolean());
assertThat(newState, not(equalTo(clusterState))); assertThat(newState, not(equalTo(clusterState)));
clusterState = newState; clusterState = newState;
assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1)); assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(1));
@ -620,7 +620,7 @@ public class FailedShardsRoutingTests extends ESAllocationTestCase {
// fail the primary shard again and make sure the correct replica is promoted // fail the primary shard again and make sure the correct replica is promoted
ShardRouting primaryShardToFail = clusterState.routingTable().index("test").shard(0).primaryShard(); ShardRouting primaryShardToFail = clusterState.routingTable().index("test").shard(0).primaryShard();
ClusterState newState = allocation.applyFailedShard(clusterState, primaryShardToFail); ClusterState newState = allocation.applyFailedShard(clusterState, primaryShardToFail, randomBoolean());
assertThat(newState, not(equalTo(clusterState))); assertThat(newState, not(equalTo(clusterState)));
clusterState = newState; clusterState = newState;
// the primary gets allocated on another node // the primary gets allocated on another node
@ -649,7 +649,7 @@ public class FailedShardsRoutingTests extends ESAllocationTestCase {
// fail the primary shard again, and ensure the same thing happens // fail the primary shard again, and ensure the same thing happens
ShardRouting secondPrimaryShardToFail = clusterState.routingTable().index("test").shard(0).primaryShard(); ShardRouting secondPrimaryShardToFail = clusterState.routingTable().index("test").shard(0).primaryShard();
newState = allocation.applyFailedShard(clusterState, secondPrimaryShardToFail); newState = allocation.applyFailedShard(clusterState, secondPrimaryShardToFail, randomBoolean());
assertThat(newState, not(equalTo(clusterState))); assertThat(newState, not(equalTo(clusterState)));
clusterState = newState; clusterState = newState;
// the primary gets allocated on another node // the primary gets allocated on another node

View File

@ -23,7 +23,7 @@ import org.elasticsearch.Version;
import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ESAllocationTestCase; import org.elasticsearch.cluster.ESAllocationTestCase;
import org.elasticsearch.cluster.action.shard.ShardStateAction; import org.elasticsearch.cluster.action.shard.ShardStateAction;
import org.elasticsearch.cluster.action.shard.ShardStateAction.ShardEntry; import org.elasticsearch.cluster.action.shard.ShardStateAction.FailedShardEntry;
import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.node.DiscoveryNodes;
@ -135,7 +135,7 @@ public class InSyncAllocationIdTests extends ESAllocationTestCase {
logger.info("fail primary shard"); logger.info("fail primary shard");
ShardRouting startedPrimary = clusterState.getRoutingNodes().shardsWithState(STARTED).get(0); ShardRouting startedPrimary = clusterState.getRoutingNodes().shardsWithState(STARTED).get(0);
clusterState = allocation.applyFailedShard(clusterState, startedPrimary); clusterState = allocation.applyFailedShard(clusterState, startedPrimary, true);
assertThat(clusterState.getRoutingTable().shardsWithState(STARTED).size(), equalTo(0)); assertThat(clusterState.getRoutingTable().shardsWithState(STARTED).size(), equalTo(0));
assertEquals(Collections.singleton(startedPrimary.allocationId().getId()), assertEquals(Collections.singleton(startedPrimary.allocationId().getId()),
@ -167,7 +167,7 @@ public class InSyncAllocationIdTests extends ESAllocationTestCase {
new ShardStateAction.ShardFailedClusterStateTaskExecutor(allocation, null, logger); new ShardStateAction.ShardFailedClusterStateTaskExecutor(allocation, null, logger);
long primaryTerm = clusterState.metaData().index("test").primaryTerm(0); long primaryTerm = clusterState.metaData().index("test").primaryTerm(0);
clusterState = failedClusterStateTaskExecutor.execute(clusterState, Arrays.asList( clusterState = failedClusterStateTaskExecutor.execute(clusterState, Arrays.asList(
new ShardEntry(shardRoutingTable.shardId(), replicaShard.allocationId().getId(), primaryTerm, "dummy", null)) new FailedShardEntry(shardRoutingTable.shardId(), replicaShard.allocationId().getId(), primaryTerm, "dummy", null, true))
).resultingState; ).resultingState;
assertThat(clusterState.metaData().index("test").inSyncAllocationIds(0).size(), equalTo(1)); assertThat(clusterState.metaData().index("test").inSyncAllocationIds(0).size(), equalTo(1));
@ -189,11 +189,11 @@ public class InSyncAllocationIdTests extends ESAllocationTestCase {
long primaryTerm = clusterState.metaData().index("test").primaryTerm(0); long primaryTerm = clusterState.metaData().index("test").primaryTerm(0);
List<ShardEntry> failureEntries = new ArrayList<>(); List<FailedShardEntry> failureEntries = new ArrayList<>();
failureEntries.add(new ShardEntry( failureEntries.add(new FailedShardEntry(
shardRoutingTable.shardId(), primaryShard.allocationId().getId(), 0L, "dummy", null)); shardRoutingTable.shardId(), primaryShard.allocationId().getId(), 0L, "dummy", null, true));
failureEntries.add(new ShardEntry( failureEntries.add(new FailedShardEntry(
shardRoutingTable.shardId(), replicaShard.allocationId().getId(), primaryTerm, "dummy", null)); shardRoutingTable.shardId(), replicaShard.allocationId().getId(), primaryTerm, "dummy", null, true));
Collections.shuffle(failureEntries, random()); Collections.shuffle(failureEntries, random());
logger.info("Failing {}", failureEntries); logger.info("Failing {}", failureEntries);
@ -333,8 +333,8 @@ public class InSyncAllocationIdTests extends ESAllocationTestCase {
assertEquals(inSyncSet, clusterState.metaData().index("test").inSyncAllocationIds(0)); assertEquals(inSyncSet, clusterState.metaData().index("test").inSyncAllocationIds(0));
logger.info("fail primary shard"); logger.info("fail primary shard");
clusterState = failedClusterStateTaskExecutor.execute(clusterState, Collections.singletonList(new ShardEntry( clusterState = failedClusterStateTaskExecutor.execute(clusterState, Collections.singletonList(new FailedShardEntry(
shardRoutingTable.shardId(), primaryShard.allocationId().getId(), 0L, "dummy", null))).resultingState; shardRoutingTable.shardId(), primaryShard.allocationId().getId(), 0L, "dummy", null, true))).resultingState;
assertThat(clusterState.routingTable().index("test").shard(0).assignedShards().size(), equalTo(0)); assertThat(clusterState.routingTable().index("test").shard(0).assignedShards().size(), equalTo(0));
// in-sync allocation ids should not be updated // in-sync allocation ids should not be updated

View File

@ -91,7 +91,7 @@ public class MaxRetryAllocationDeciderTests extends ESAllocationTestCase {
for (int i = 0; i < retries-1; i++) { for (int i = 0; i < retries-1; i++) {
List<FailedShard> failedShards = Collections.singletonList( List<FailedShard> failedShards = Collections.singletonList(
new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i, new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i,
new UnsupportedOperationException())); new UnsupportedOperationException(), randomBoolean()));
ClusterState newState = strategy.applyFailedShards(clusterState, failedShards); ClusterState newState = strategy.applyFailedShards(clusterState, failedShards);
assertThat(newState, not(equalTo(clusterState))); assertThat(newState, not(equalTo(clusterState)));
clusterState = newState; clusterState = newState;
@ -104,7 +104,7 @@ public class MaxRetryAllocationDeciderTests extends ESAllocationTestCase {
// now we go and check that we are actually stick to unassigned on the next failure // now we go and check that we are actually stick to unassigned on the next failure
List<FailedShard> failedShards = Collections.singletonList( List<FailedShard> failedShards = Collections.singletonList(
new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom",
new UnsupportedOperationException())); new UnsupportedOperationException(), randomBoolean()));
ClusterState newState = strategy.applyFailedShards(clusterState, failedShards); ClusterState newState = strategy.applyFailedShards(clusterState, failedShards);
assertThat(newState, not(equalTo(clusterState))); assertThat(newState, not(equalTo(clusterState)));
clusterState = newState; clusterState = newState;
@ -130,7 +130,7 @@ public class MaxRetryAllocationDeciderTests extends ESAllocationTestCase {
for (int i = 0; i < retries-1; i++) { for (int i = 0; i < retries-1; i++) {
failedShards = Collections.singletonList( failedShards = Collections.singletonList(
new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom",
new UnsupportedOperationException())); new UnsupportedOperationException(), randomBoolean()));
newState = strategy.applyFailedShards(clusterState, failedShards); newState = strategy.applyFailedShards(clusterState, failedShards);
assertThat(newState, not(equalTo(clusterState))); assertThat(newState, not(equalTo(clusterState)));
@ -145,7 +145,7 @@ public class MaxRetryAllocationDeciderTests extends ESAllocationTestCase {
// now we go and check that we are actually stick to unassigned on the next failure // now we go and check that we are actually stick to unassigned on the next failure
failedShards = Collections.singletonList( failedShards = Collections.singletonList(
new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom",
new UnsupportedOperationException())); new UnsupportedOperationException(), randomBoolean()));
newState = strategy.applyFailedShards(clusterState, failedShards); newState = strategy.applyFailedShards(clusterState, failedShards);
assertThat(newState, not(equalTo(clusterState))); assertThat(newState, not(equalTo(clusterState)));
clusterState = newState; clusterState = newState;
@ -164,7 +164,7 @@ public class MaxRetryAllocationDeciderTests extends ESAllocationTestCase {
for (int i = 0; i < retries-1; i++) { for (int i = 0; i < retries-1; i++) {
List<FailedShard> failedShards = Collections.singletonList( List<FailedShard> failedShards = Collections.singletonList(
new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i, new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i,
new UnsupportedOperationException())); new UnsupportedOperationException(), randomBoolean()));
ClusterState newState = strategy.applyFailedShards(clusterState, failedShards); ClusterState newState = strategy.applyFailedShards(clusterState, failedShards);
assertThat(newState, not(equalTo(clusterState))); assertThat(newState, not(equalTo(clusterState)));
clusterState = newState; clusterState = newState;
@ -182,7 +182,7 @@ public class MaxRetryAllocationDeciderTests extends ESAllocationTestCase {
{ {
List<FailedShard> failedShards = Collections.singletonList( List<FailedShard> failedShards = Collections.singletonList(
new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom", new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom",
new UnsupportedOperationException())); new UnsupportedOperationException(), randomBoolean()));
ClusterState newState = strategy.applyFailedShards(clusterState, failedShards); ClusterState newState = strategy.applyFailedShards(clusterState, failedShards);
assertThat(newState, not(equalTo(clusterState))); assertThat(newState, not(equalTo(clusterState)));
clusterState = newState; clusterState = newState;
@ -231,7 +231,7 @@ public class MaxRetryAllocationDeciderTests extends ESAllocationTestCase {
// now fail again and see if it has a new counter // now fail again and see if it has a new counter
List<FailedShard> failedShards = Collections.singletonList( List<FailedShard> failedShards = Collections.singletonList(
new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "ZOOOMG", new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "ZOOOMG",
new UnsupportedOperationException())); new UnsupportedOperationException(), randomBoolean()));
newState = strategy.applyFailedShards(clusterState, failedShards); newState = strategy.applyFailedShards(clusterState, failedShards);
assertThat(newState, not(equalTo(clusterState))); assertThat(newState, not(equalTo(clusterState)));
clusterState = newState; clusterState = newState;

View File

@ -194,7 +194,7 @@ public class SingleShardNoReplicasRoutingTests extends ESAllocationTestCase {
logger.info("Marking the shard as failed"); logger.info("Marking the shard as failed");
RoutingNodes routingNodes = clusterState.getRoutingNodes(); RoutingNodes routingNodes = clusterState.getRoutingNodes();
newState = strategy.applyFailedShard(clusterState, routingNodes.node("node1").shardsWithState(INITIALIZING).get(0)); newState = strategy.applyFailedShard(clusterState, routingNodes.node("node1").shardsWithState(INITIALIZING).get(0), randomBoolean());
assertThat(newState, not(equalTo(clusterState))); assertThat(newState, not(equalTo(clusterState)));
clusterState = newState; clusterState = newState;

View File

@ -66,7 +66,7 @@ public class FilterAllocationDeciderTests extends ESAllocationTestCase {
// we can initally only allocate on node2 // we can initally only allocate on node2
assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING);
assertEquals(routingTable.index("idx").shard(0).shards().get(0).currentNodeId(), "node2"); assertEquals(routingTable.index("idx").shard(0).shards().get(0).currentNodeId(), "node2");
routingTable = service.applyFailedShard(state, routingTable.index("idx").shard(0).shards().get(0)).routingTable(); routingTable = service.applyFailedShard(state, routingTable.index("idx").shard(0).shards().get(0), randomBoolean()).routingTable();
state = ClusterState.builder(state).routingTable(routingTable).build(); state = ClusterState.builder(state).routingTable(routingTable).build();
assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED); assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED);
assertNull(routingTable.index("idx").shard(0).shards().get(0).currentNodeId()); assertNull(routingTable.index("idx").shard(0).shards().get(0).currentNodeId());
@ -114,7 +114,7 @@ public class FilterAllocationDeciderTests extends ESAllocationTestCase {
state = service.deassociateDeadNodes( state = service.deassociateDeadNodes(
ClusterState.builder(state).nodes(DiscoveryNodes.builder(state.nodes()).remove("node1")).build(), ClusterState.builder(state).nodes(DiscoveryNodes.builder(state.nodes()).remove("node1")).build(),
true, "test"); true, "test");
state = service.applyFailedShard(state, routingTable.index("idx").shard(0).primaryShard()); state = service.applyFailedShard(state, routingTable.index("idx").shard(0).primaryShard(), randomBoolean());
// now bring back node1 and see it's assigned // now bring back node1 and see it's assigned
state = service.reroute( state = service.reroute(

View File

@ -46,7 +46,8 @@ import org.elasticsearch.cluster.ClusterStateTaskExecutor.ClusterTasksResult;
import org.elasticsearch.cluster.ClusterStateUpdateTask; import org.elasticsearch.cluster.ClusterStateUpdateTask;
import org.elasticsearch.cluster.EmptyClusterInfoService; import org.elasticsearch.cluster.EmptyClusterInfoService;
import org.elasticsearch.cluster.action.shard.ShardStateAction; import org.elasticsearch.cluster.action.shard.ShardStateAction;
import org.elasticsearch.cluster.action.shard.ShardStateAction.ShardEntry; import org.elasticsearch.cluster.action.shard.ShardStateAction.StartedShardEntry;
import org.elasticsearch.cluster.action.shard.ShardStateAction.FailedShardEntry;
import org.elasticsearch.cluster.metadata.AliasValidator; import org.elasticsearch.cluster.metadata.AliasValidator;
import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
@ -221,16 +222,16 @@ public class ClusterStateChanges extends AbstractComponent {
} }
public ClusterState applyFailedShards(ClusterState clusterState, List<FailedShard> failedShards) { public ClusterState applyFailedShards(ClusterState clusterState, List<FailedShard> failedShards) {
List<ShardEntry> entries = failedShards.stream().map(failedShard -> List<FailedShardEntry> entries = failedShards.stream().map(failedShard ->
new ShardEntry(failedShard.getRoutingEntry().shardId(), failedShard.getRoutingEntry().allocationId().getId(), new FailedShardEntry(failedShard.getRoutingEntry().shardId(), failedShard.getRoutingEntry().allocationId().getId(),
0L, failedShard.getMessage(), failedShard.getFailure())) 0L, failedShard.getMessage(), failedShard.getFailure(), failedShard.markAsStale()))
.collect(Collectors.toList()); .collect(Collectors.toList());
return runTasks(shardFailedClusterStateTaskExecutor, clusterState, entries); return runTasks(shardFailedClusterStateTaskExecutor, clusterState, entries);
} }
public ClusterState applyStartedShards(ClusterState clusterState, List<ShardRouting> startedShards) { public ClusterState applyStartedShards(ClusterState clusterState, List<ShardRouting> startedShards) {
List<ShardEntry> entries = startedShards.stream().map(startedShard -> List<StartedShardEntry> entries = startedShards.stream().map(startedShard ->
new ShardEntry(startedShard.shardId(), startedShard.allocationId().getId(), 0L, "shard started", null)) new StartedShardEntry(startedShard.shardId(), startedShard.allocationId().getId(), "shard started"))
.collect(Collectors.toList()); .collect(Collectors.toList());
return runTasks(shardStartedClusterStateTaskExecutor, clusterState, entries); return runTasks(shardStartedClusterStateTaskExecutor, clusterState, entries);
} }

View File

@ -333,7 +333,7 @@ public class IndicesClusterStateServiceRandomUpdatesTests extends AbstractIndice
if (persistedShardRouting.initializing() && randomBoolean()) { if (persistedShardRouting.initializing() && randomBoolean()) {
startedShards.add(persistedShardRouting); startedShards.add(persistedShardRouting);
} else if (rarely()) { } else if (rarely()) {
failedShards.add(new FailedShard(persistedShardRouting, "fake shard failure", new Exception())); failedShards.add(new FailedShard(persistedShardRouting, "fake shard failure", new Exception(), randomBoolean()));
} }
} }
} }