delay reroute only after we publish that a shard has started

This commit is contained in:
Shay Banon 2013-02-13 23:56:15 +01:00
parent 681239b413
commit 883c593d7e
2 changed files with 32 additions and 3 deletions

View File

@ -23,6 +23,7 @@ import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.cluster.ClusterService;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ClusterStateUpdateTask;
import org.elasticsearch.cluster.ProcessedClusterStateUpdateTask;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.cluster.routing.IndexRoutingTable;
import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
@ -44,6 +45,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.atomic.AtomicBoolean;
import static org.elasticsearch.cluster.ClusterState.newClusterStateBuilder;
import static org.elasticsearch.cluster.routing.ImmutableShardRouting.readShardRoutingEntry;
@ -59,6 +61,7 @@ public class ShardStateAction extends AbstractComponent {
private final ThreadPool threadPool;
private final BlockingQueue<ShardRouting> startedShardsQueue = ConcurrentCollections.newBlockingQueue();
private final AtomicBoolean rerouteRequired = new AtomicBoolean();
@Inject
public ShardStateAction(Settings settings, ClusterService clusterService, TransportService transportService,
@ -137,7 +140,7 @@ public class ShardStateAction extends AbstractComponent {
// process started events as fast as possible, to make shards available
startedShardsQueue.add(shardRouting);
clusterService.submitStateUpdateTask("shard-started (" + shardRouting + "), reason [" + reason + "]", Priority.HIGH, new ClusterStateUpdateTask() {
clusterService.submitStateUpdateTask("shard-started (" + shardRouting + "), reason [" + reason + "]", Priority.HIGH, new ProcessedClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
@ -182,12 +185,32 @@ public class ShardStateAction extends AbstractComponent {
if (logger.isDebugEnabled()) {
logger.debug("applying started shards {}, reason [{}]", shards, reason);
}
RoutingAllocation.Result routingResult = allocationService.applyStartedShards(currentState, shards);
// we don't do reroute right away, we do it after publishing the fact that it was started
RoutingAllocation.Result routingResult = allocationService.applyStartedShards(currentState, shards, false);
if (!routingResult.changed()) {
return currentState;
}
return newClusterStateBuilder().state(currentState).routingResult(routingResult).build();
}
@Override
public void clusterStateProcessed(ClusterState clusterState) {
rerouteRequired.set(true);
clusterService.submitStateUpdateTask("reroute post shard-started (" + shardRouting + "), reason [" + reason + "]", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
if (rerouteRequired.compareAndSet(true, false)) {
RoutingAllocation.Result routingResult = allocationService.reroute(currentState);
if (!routingResult.changed()) {
return currentState;
}
return newClusterStateBuilder().state(currentState).routingResult(routingResult).build();
} else {
return currentState;
}
}
});
}
});
}

View File

@ -79,6 +79,10 @@ public class AllocationService extends AbstractComponent {
* <p>If the same instance of the routing table is returned, then no change has been made.</p>
*/
public RoutingAllocation.Result applyStartedShards(ClusterState clusterState, List<? extends ShardRouting> startedShards) {
return applyStartedShards(clusterState, startedShards, true);
}
public RoutingAllocation.Result applyStartedShards(ClusterState clusterState, List<? extends ShardRouting> startedShards, boolean withReroute) {
RoutingNodes routingNodes = clusterState.routingNodes();
// shuffle the unassigned nodes, just so we won't have things like poison failed shards
Collections.shuffle(routingNodes.unassigned());
@ -88,7 +92,9 @@ public class AllocationService extends AbstractComponent {
return new RoutingAllocation.Result(false, clusterState.routingTable(), allocation.explanation());
}
shardsAllocators.applyStartedShards(allocation);
if (withReroute) {
reroute(allocation);
}
return new RoutingAllocation.Result(true, new RoutingTable.Builder().updateNodes(routingNodes).build().validateRaiseException(clusterState.metaData()), allocation.explanation());
}