delay reroute only after we publish that a shard has started
This commit is contained in:
parent
681239b413
commit
883c593d7e
|
@ -23,6 +23,7 @@ import org.elasticsearch.ElasticSearchException;
|
|||
import org.elasticsearch.cluster.ClusterService;
|
||||
import org.elasticsearch.cluster.ClusterState;
|
||||
import org.elasticsearch.cluster.ClusterStateUpdateTask;
|
||||
import org.elasticsearch.cluster.ProcessedClusterStateUpdateTask;
|
||||
import org.elasticsearch.cluster.node.DiscoveryNodes;
|
||||
import org.elasticsearch.cluster.routing.IndexRoutingTable;
|
||||
import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
|
||||
|
@ -44,6 +45,7 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import static org.elasticsearch.cluster.ClusterState.newClusterStateBuilder;
|
||||
import static org.elasticsearch.cluster.routing.ImmutableShardRouting.readShardRoutingEntry;
|
||||
|
@ -59,6 +61,7 @@ public class ShardStateAction extends AbstractComponent {
|
|||
private final ThreadPool threadPool;
|
||||
|
||||
private final BlockingQueue<ShardRouting> startedShardsQueue = ConcurrentCollections.newBlockingQueue();
|
||||
private final AtomicBoolean rerouteRequired = new AtomicBoolean();
|
||||
|
||||
@Inject
|
||||
public ShardStateAction(Settings settings, ClusterService clusterService, TransportService transportService,
|
||||
|
@ -137,7 +140,7 @@ public class ShardStateAction extends AbstractComponent {
|
|||
// process started events as fast as possible, to make shards available
|
||||
startedShardsQueue.add(shardRouting);
|
||||
|
||||
clusterService.submitStateUpdateTask("shard-started (" + shardRouting + "), reason [" + reason + "]", Priority.HIGH, new ClusterStateUpdateTask() {
|
||||
clusterService.submitStateUpdateTask("shard-started (" + shardRouting + "), reason [" + reason + "]", Priority.HIGH, new ProcessedClusterStateUpdateTask() {
|
||||
@Override
|
||||
public ClusterState execute(ClusterState currentState) {
|
||||
|
||||
|
@ -182,12 +185,32 @@ public class ShardStateAction extends AbstractComponent {
|
|||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("applying started shards {}, reason [{}]", shards, reason);
|
||||
}
|
||||
RoutingAllocation.Result routingResult = allocationService.applyStartedShards(currentState, shards);
|
||||
// we don't do reroute right away, we do it after publishing the fact that it was started
|
||||
RoutingAllocation.Result routingResult = allocationService.applyStartedShards(currentState, shards, false);
|
||||
if (!routingResult.changed()) {
|
||||
return currentState;
|
||||
}
|
||||
return newClusterStateBuilder().state(currentState).routingResult(routingResult).build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clusterStateProcessed(ClusterState clusterState) {
|
||||
rerouteRequired.set(true);
|
||||
clusterService.submitStateUpdateTask("reroute post shard-started (" + shardRouting + "), reason [" + reason + "]", new ClusterStateUpdateTask() {
|
||||
@Override
|
||||
public ClusterState execute(ClusterState currentState) {
|
||||
if (rerouteRequired.compareAndSet(true, false)) {
|
||||
RoutingAllocation.Result routingResult = allocationService.reroute(currentState);
|
||||
if (!routingResult.changed()) {
|
||||
return currentState;
|
||||
}
|
||||
return newClusterStateBuilder().state(currentState).routingResult(routingResult).build();
|
||||
} else {
|
||||
return currentState;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -79,6 +79,10 @@ public class AllocationService extends AbstractComponent {
|
|||
* <p>If the same instance of the routing table is returned, then no change has been made.</p>
|
||||
*/
|
||||
public RoutingAllocation.Result applyStartedShards(ClusterState clusterState, List<? extends ShardRouting> startedShards) {
|
||||
return applyStartedShards(clusterState, startedShards, true);
|
||||
}
|
||||
|
||||
public RoutingAllocation.Result applyStartedShards(ClusterState clusterState, List<? extends ShardRouting> startedShards, boolean withReroute) {
|
||||
RoutingNodes routingNodes = clusterState.routingNodes();
|
||||
// shuffle the unassigned nodes, just so we won't have things like poison failed shards
|
||||
Collections.shuffle(routingNodes.unassigned());
|
||||
|
@ -88,7 +92,9 @@ public class AllocationService extends AbstractComponent {
|
|||
return new RoutingAllocation.Result(false, clusterState.routingTable(), allocation.explanation());
|
||||
}
|
||||
shardsAllocators.applyStartedShards(allocation);
|
||||
if (withReroute) {
|
||||
reroute(allocation);
|
||||
}
|
||||
return new RoutingAllocation.Result(true, new RoutingTable.Builder().updateNodes(routingNodes).build().validateRaiseException(clusterState.metaData()), allocation.explanation());
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue