Merge pull request #15016 from jasontedor/shard-failure-batch

Use general cluster state batching mechanism for shard failures
This commit is contained in:
Jason Tedor 2015-12-03 14:02:48 -05:00
commit 182c22f23f
1 changed files with 37 additions and 43 deletions

View File

@ -20,9 +20,7 @@
package org.elasticsearch.cluster.action.shard;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.cluster.ClusterService;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ClusterStateUpdateTask;
import org.elasticsearch.cluster.*;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.routing.RoutingService;
@ -46,7 +44,6 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.CountDownLatch;
import static org.elasticsearch.cluster.routing.ShardRouting.readShardRoutingEntry;
@ -64,7 +61,6 @@ public class ShardStateAction extends AbstractComponent {
private final RoutingService routingService;
private final BlockingQueue<ShardRoutingEntry> startedShardsQueue = ConcurrentCollections.newBlockingQueue();
private final BlockingQueue<ShardRoutingEntry> failedShardQueue = ConcurrentCollections.newBlockingQueue();
@Inject
public ShardStateAction(Settings settings, ClusterService clusterService, TransportService transportService,
@ -141,54 +137,52 @@ public class ShardStateAction extends AbstractComponent {
});
}
private final ShardFailedClusterStateHandler shardFailedClusterStateHandler = new ShardFailedClusterStateHandler();
private void handleShardFailureOnMaster(final ShardRoutingEntry shardRoutingEntry) {
logger.warn("{} received shard failed for {}", shardRoutingEntry.failure, shardRoutingEntry.shardRouting.shardId(), shardRoutingEntry);
failedShardQueue.add(shardRoutingEntry);
clusterService.submitStateUpdateTask("shard-failed (" + shardRoutingEntry.shardRouting + "), message [" + shardRoutingEntry.message + "]",
new ClusterStateUpdateTask(Priority.HIGH) {
clusterService.submitStateUpdateTask(
"shard-failed (" + shardRoutingEntry.shardRouting + "), message [" + shardRoutingEntry.message + "]",
shardRoutingEntry,
ClusterStateTaskConfig.build(Priority.HIGH),
shardFailedClusterStateHandler,
shardFailedClusterStateHandler);
}
@Override
public ClusterState execute(ClusterState currentState) {
if (shardRoutingEntry.processed) {
return currentState;
}
List<ShardRoutingEntry> shardRoutingEntries = new ArrayList<>();
failedShardQueue.drainTo(shardRoutingEntries);
// nothing to process (a previous event has processed it already)
if (shardRoutingEntries.isEmpty()) {
return currentState;
}
List<FailedRerouteAllocation.FailedShard> shardRoutingsToBeApplied = new ArrayList<>(shardRoutingEntries.size());
// mark all entries as processed
for (ShardRoutingEntry entry : shardRoutingEntries) {
entry.processed = true;
shardRoutingsToBeApplied.add(new FailedRerouteAllocation.FailedShard(entry.shardRouting, entry.message, entry.failure));
}
RoutingAllocation.Result routingResult = allocationService.applyFailedShards(currentState, shardRoutingsToBeApplied);
if (!routingResult.changed()) {
return currentState;
}
return ClusterState.builder(currentState).routingResult(routingResult).build();
class ShardFailedClusterStateHandler implements ClusterStateTaskExecutor<ShardRoutingEntry>, ClusterStateTaskListener {
@Override
public BatchResult<ShardRoutingEntry> execute(ClusterState currentState, List<ShardRoutingEntry> tasks) throws Exception {
BatchResult.Builder<ShardRoutingEntry> batchResultBuilder = BatchResult.builder();
List<FailedRerouteAllocation.FailedShard> shardRoutingsToBeApplied = new ArrayList<>(tasks.size());
for (ShardRoutingEntry task : tasks) {
task.processed = true;
shardRoutingsToBeApplied.add(new FailedRerouteAllocation.FailedShard(task.shardRouting, task.message, task.failure));
}
@Override
public void onFailure(String source, Throwable t) {
logger.error("unexpected failure during [{}]", t, source);
ClusterState maybeUpdatedState = currentState;
try {
RoutingAllocation.Result result = allocationService.applyFailedShards(currentState, shardRoutingsToBeApplied);
if (result.changed()) {
maybeUpdatedState = ClusterState.builder(currentState).routingResult(result).build();
}
batchResultBuilder.successes(tasks);
} catch (Throwable t) {
batchResultBuilder.failures(tasks, t);
}
return batchResultBuilder.build(maybeUpdatedState);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
if (oldState != newState && newState.getRoutingNodes().unassigned().size() > 0) {
logger.trace("unassigned shards after shard failures. scheduling a reroute.");
routingService.reroute("unassigned shards after shard failures, scheduling a reroute");
}
}
});
}
@Override
public void onFailure(String source, Throwable t) {
logger.error("unexpected failure during [{}]", t, source);
}
}
private void shardStartedOnMaster(final ShardRoutingEntry shardRoutingEntry) {