YARN-1398. Fixed a deadlock in ResourceManager between users requesting queue-acls and completing containers. Contributed by Vinod Kumar Vavilapalli.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1570415 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Vinod Kumar Vavilapalli 2014-02-21 02:20:20 +00:00
parent 4da6de1ca3
commit 772ead791c
2 changed files with 11 additions and 6 deletions

View File

@ -321,6 +321,9 @@ Release 2.4.0 - UNRELEASED
YARN-713. Fixed ResourceManager to not crash while building tokens when DNS YARN-713. Fixed ResourceManager to not crash while building tokens when DNS
issues happen transmittently. (Jian He via vinodkv) issues happen transmittently. (Jian He via vinodkv)
YARN-1398. Fixed a deadlock in ResourceManager between users requesting
queue-acls and completing containers. (vinodkv)
Release 2.3.1 - UNRELEASED Release 2.3.1 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -50,7 +50,6 @@
import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.api.records.Token;
import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
@ -1410,12 +1409,14 @@ public void completedContainer(Resource clusterResource,
FiCaSchedulerApp application, FiCaSchedulerNode node, RMContainer rmContainer, FiCaSchedulerApp application, FiCaSchedulerNode node, RMContainer rmContainer,
ContainerStatus containerStatus, RMContainerEventType event, CSQueue childQueue) { ContainerStatus containerStatus, RMContainerEventType event, CSQueue childQueue) {
if (application != null) { if (application != null) {
boolean removed = false;
// Careful! Locking order is important! // Careful! Locking order is important!
synchronized (this) { synchronized (this) {
Container container = rmContainer.getContainer(); Container container = rmContainer.getContainer();
boolean removed = false;
// Inform the application & the node // Inform the application & the node
// Note: It's safe to assume that all state changes to RMContainer // Note: It's safe to assume that all state changes to RMContainer
// happen under scheduler's lock... // happen under scheduler's lock...
@ -1441,13 +1442,14 @@ public void completedContainer(Resource clusterResource,
" absoluteUsedCapacity=" + getAbsoluteUsedCapacity() + " absoluteUsedCapacity=" + getAbsoluteUsedCapacity() +
" used=" + usedResources + " used=" + usedResources +
" cluster=" + clusterResource); " cluster=" + clusterResource);
// Inform the parent queue
getParent().completedContainer(clusterResource, application,
node, rmContainer, null, event, this);
} }
} }
if (removed) {
// Inform the parent queue _outside_ of the leaf-queue lock
getParent().completedContainer(clusterResource, application, node,
rmContainer, null, event, this);
}
} }
} }