YARN-3251. Fixed a deadlock in CapacityScheduler when computing absoluteMaxAvailableCapacity in LeafQueue (Craig Welch via wangda)

This commit is contained in:
Wangda Tan 2015-02-26 17:05:25 -08:00
parent 5b3d9bf636
commit 881084fe5c
2 changed files with 20 additions and 7 deletions

View File

@ -619,6 +619,9 @@ Release 2.6.0 - 2014-11-18
identifiers to be tampered and thus causing app submission failures in identifiers to be tampered and thus causing app submission failures in
secure mode. (Jian He via vinodkv) secure mode. (Jian He via vinodkv)
YARN-3251. Fixed a deadlock in CapacityScheduler when computing
absoluteMaxAvailableCapacity in LeafQueue (Craig Welch via wangda)
BREAKDOWN OF YARN-1051 SUBTASKS AND RELATED JIRAS BREAKDOWN OF YARN-1051 SUBTASKS AND RELATED JIRAS
YARN-1707. Introduce APIs to add/remove/resize queues in the YARN-1707. Introduce APIs to add/remove/resize queues in the

View File

@ -115,6 +115,8 @@ public class LeafQueue extends AbstractCSQueue {
private final QueueHeadroomInfo queueHeadroomInfo = new QueueHeadroomInfo(); private final QueueHeadroomInfo queueHeadroomInfo = new QueueHeadroomInfo();
private volatile float absoluteMaxAvailCapacity;
public LeafQueue(CapacitySchedulerContext cs, public LeafQueue(CapacitySchedulerContext cs,
String queueName, CSQueue parent, CSQueue old) throws IOException { String queueName, CSQueue parent, CSQueue old) throws IOException {
super(cs, queueName, parent, old); super(cs, queueName, parent, old);
@ -134,6 +136,10 @@ public class LeafQueue extends AbstractCSQueue {
float absoluteMaxCapacity = float absoluteMaxCapacity =
CSQueueUtils.computeAbsoluteMaximumCapacity(maximumCapacity, parent); CSQueueUtils.computeAbsoluteMaximumCapacity(maximumCapacity, parent);
// Initially set to absoluteMax, will be updated to more accurate
// max avail value during assignContainers
absoluteMaxAvailCapacity = absoluteMaxCapacity;
int userLimit = cs.getConfiguration().getUserLimit(getQueuePath()); int userLimit = cs.getConfiguration().getUserLimit(getQueuePath());
float userLimitFactor = float userLimitFactor =
cs.getConfiguration().getUserLimitFactor(getQueuePath()); cs.getConfiguration().getUserLimitFactor(getQueuePath());
@ -720,8 +726,18 @@ public class LeafQueue extends AbstractCSQueue {
} }
@Override @Override
public synchronized CSAssignment assignContainers(Resource clusterResource, public CSAssignment assignContainers(Resource clusterResource,
FiCaSchedulerNode node, boolean needToUnreserve) { FiCaSchedulerNode node, boolean needToUnreserve) {
//We should not hold a lock on a queue and its parent concurrently - it
//can lead to deadlocks when calls which walk down the tree occur
//concurrently (getQueueInfo...)
absoluteMaxAvailCapacity = CSQueueUtils.getAbsoluteMaxAvailCapacity(
resourceCalculator, clusterResource, this);
return assignContainersInternal(clusterResource, node, needToUnreserve);
}
private synchronized CSAssignment assignContainersInternal(
Resource clusterResource, FiCaSchedulerNode node, boolean needToUnreserve) {
if(LOG.isDebugEnabled()) { if(LOG.isDebugEnabled()) {
LOG.debug("assignContainers: node=" + node.getNodeName() LOG.debug("assignContainers: node=" + node.getNodeName()
@ -1012,12 +1028,6 @@ public class LeafQueue extends AbstractCSQueue {
computeUserLimit(application, clusterResource, required, computeUserLimit(application, clusterResource, required,
queueUser, requestedLabels); queueUser, requestedLabels);
//Max avail capacity needs to take into account usage by ancestor-siblings
//which are greater than their base capacity, so we are interested in "max avail"
//capacity
float absoluteMaxAvailCapacity = CSQueueUtils.getAbsoluteMaxAvailCapacity(
resourceCalculator, clusterResource, this);
Resource queueMaxCap = // Queue Max-Capacity Resource queueMaxCap = // Queue Max-Capacity
Resources.multiplyAndNormalizeDown( Resources.multiplyAndNormalizeDown(
resourceCalculator, resourceCalculator,