Prevent ShardAllocator to modify the unassigned while running allocations
The unassinged list is used to make allocation decisions but is currently modified during allocation runs which causes primaries to be throttled during allocation. If this happens newly allocated indices can be stalled for a long time turning a cluster into a RED state if concurrent relocations and / or recoveries are happening. Closes #3610
This commit is contained in:
parent
ca6c26c62d
commit
b39961b2a6
|
@ -334,14 +334,13 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
|
|||
return new NodeSorter(nodesArray(), weight, this);
|
||||
}
|
||||
|
||||
private boolean initialize(RoutingNodes routing) {
|
||||
private boolean initialize(RoutingNodes routing, List<MutableShardRouting> unassigned) {
|
||||
if (logger.isTraceEnabled()) {
|
||||
logger.trace("Start distributing Shards");
|
||||
}
|
||||
|
||||
indices.addAll(allocation.routingTable().indicesRouting().keySet());
|
||||
buildModelFromAssigned(routing.shards(assignedFilter));
|
||||
return allocateUnassigned(allocation.routingNodes().unassigned(), allocation.routingNodes().ignoredUnassigned());
|
||||
return allocateUnassigned(unassigned, routing.ignoredUnassigned());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -366,8 +365,8 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
|
|||
if (logger.isTraceEnabled()) {
|
||||
logger.trace("Start balancing cluster");
|
||||
}
|
||||
|
||||
boolean changed = initialize(allocation.routingNodes());
|
||||
final TransactionalList<MutableShardRouting> unassigned = new TransactionalList<MutableShardRouting>(allocation.routingNodes().unassigned());
|
||||
boolean changed = initialize(allocation.routingNodes(), unassigned);
|
||||
NodeSorter sorter = newNodeSorter();
|
||||
if (nodes.size() > 1) { /* skip if we only have one node */
|
||||
for (String index : buildWeightOrderedIndidces(Operation.BALANCE, sorter)) {
|
||||
|
@ -445,6 +444,7 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
|
|||
}
|
||||
}
|
||||
}
|
||||
unassigned.commit();
|
||||
return changed;
|
||||
}
|
||||
|
||||
|
@ -519,7 +519,8 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
|
|||
if (logger.isTraceEnabled()) {
|
||||
logger.trace("Try moving shard [{}] from [{}]", shard, node);
|
||||
}
|
||||
boolean changed = initialize(allocation.routingNodes());
|
||||
final TransactionalList<MutableShardRouting> unassigned = new TransactionalList<MutableShardRouting>(allocation.routingNodes().unassigned());
|
||||
boolean changed = initialize(allocation.routingNodes(), unassigned);
|
||||
|
||||
final ModelNode sourceNode = nodes.get(node.nodeId());
|
||||
assert sourceNode != null;
|
||||
|
@ -533,6 +534,7 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
|
|||
* This is not guaranteed to be balanced after this operation we still try best effort to
|
||||
* allocate on the minimal eligible node.
|
||||
*/
|
||||
|
||||
for (ModelNode currentNode : nodes) {
|
||||
if (currentNode.getNodeId().equals(node.nodeId())) {
|
||||
continue;
|
||||
|
@ -549,10 +551,11 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
|
|||
if (logger.isTraceEnabled()) {
|
||||
logger.trace("Moved shard [{}] to node [{}]", shard, currentNode.getNodeId());
|
||||
}
|
||||
return true;
|
||||
changed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
unassigned.commit();
|
||||
return changed;
|
||||
}
|
||||
|
||||
|
@ -1039,4 +1042,37 @@ public class BalancedShardsAllocator extends AbstractComponent implements Shards
|
|||
return weights[weights.length - 1] - weights[0];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A list that makes a full copy of the original list and applies all
|
||||
* modification to the copied list once {@link TransactionalList#commit()}
|
||||
* is called.
|
||||
*
|
||||
*/
|
||||
@SuppressWarnings("serial")
|
||||
private static final class TransactionalList<T> extends ArrayList<T> {
|
||||
|
||||
private final List<T> originalList;
|
||||
private List<T> assertingList; // only with assert
|
||||
|
||||
TransactionalList(List<T> originalList) {
|
||||
super(originalList);
|
||||
assert copyAsseringList(originalList);
|
||||
this.originalList = originalList;
|
||||
}
|
||||
|
||||
private boolean copyAsseringList(List<T> orig) {
|
||||
this.assertingList = new ArrayList<T>(orig);
|
||||
return true;
|
||||
}
|
||||
|
||||
public void commit() {
|
||||
/* Ensure that the actual source list is not modified while
|
||||
* the transaction is running */
|
||||
assert assertingList.equals(originalList) : "The list was modified outside of the scope";
|
||||
originalList.clear();
|
||||
originalList.addAll(this);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -74,16 +74,8 @@ public class ThrottlingAllocationDecider extends AllocationDecider {
|
|||
@Override
|
||||
public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
|
||||
if (shardRouting.primary()) {
|
||||
boolean primaryUnassigned = false;
|
||||
List<MutableShardRouting> unassigned = allocation.routingNodes().unassigned();
|
||||
for (int i1 = 0; i1 < unassigned.size(); i1++) {
|
||||
MutableShardRouting shard = unassigned.get(i1);
|
||||
if (shard.shardId().equals(shardRouting.shardId())) {
|
||||
primaryUnassigned = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (primaryUnassigned) {
|
||||
assert shardRouting.unassigned() || shardRouting.active();
|
||||
if (shardRouting.unassigned()) {
|
||||
// primary is unassigned, means we are going to do recovery from gateway
|
||||
// count *just the primary* currently doing recovery on the node and check against concurrent_recoveries
|
||||
int primariesInRecovery = 0;
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
|
||||
package org.elasticsearch.test.unit.cluster.routing.allocation;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.elasticsearch.cluster.ClusterState;
|
||||
import org.elasticsearch.cluster.metadata.MetaData;
|
||||
import org.elasticsearch.cluster.routing.RoutingTable;
|
||||
|
@ -45,7 +44,6 @@ public class PreferPrimaryAllocationTests extends ElasticsearchTestCase {
|
|||
|
||||
private final ESLogger logger = Loggers.getLogger(PreferPrimaryAllocationTests.class);
|
||||
|
||||
@LuceneTestCase.AwaitsFix(bugUrl = "seems like unassigned is cleared so throttling for primaries is not properly working")
|
||||
@Test
|
||||
public void testPreferPrimaryAllocationOverReplicas() {
|
||||
logger.info("create an allocation with 1 initial recoveries");
|
||||
|
|
Loading…
Reference in New Issue