YARN-8127. Resource leak when async scheduling is enabled. Contributed by Tao Yang.

(cherry picked from commit 7eb783e2634d8c11fb646f1f2fdf597336325312)
This commit is contained in:
Weiwei Yang 2018-04-11 17:15:25 +08:00 committed by Eric Payne
parent 6d11f7ae8c
commit dc03afc7df
2 changed files with 100 additions and 0 deletions

View File

@ -344,6 +344,16 @@ private boolean commonCheckContainerAllocation(
return false;
// If allocate from reserved container, make sure node is still reserved
if (allocation.getAllocateFromReservedContainer() != null
&& reservedContainerOnNode == null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Try to allocate from reserved container " + allocation
.getContainerId() + ", but node is not reserved");
return false;
// Do we have enough space on this node?
Resource availableResource = Resources.clone(

View File

@ -579,6 +579,96 @@ public Boolean answer(InvocationOnMock invocation) throws Exception {
// Testcase for YARN-8127
@Test (timeout = 30000)
public void testCommitDuplicatedAllocateFromReservedProposals()
throws Exception {
// disable async-scheduling for simulating complex scene
Configuration disableAsyncConf = new Configuration(conf);
CapacitySchedulerConfiguration.SCHEDULE_ASYNCHRONOUSLY_ENABLE, false);
// init RM & NMs
final MockRM rm = new MockRM(disableAsyncConf);
final MockNM nm1 = rm.registerNode("", 8 * GB);
rm.registerNode("", 8 * GB);
// init scheduler & nodes
while (
((CapacityScheduler) rm.getRMContext().getScheduler()).getNodeTracker()
.nodeCount() < 2) {
((AbstractYarnScheduler) rm.getRMContext().getScheduler())
final CapacityScheduler cs =
(CapacityScheduler) rm.getRMContext().getScheduler();
final SchedulerNode sn1 = cs.getSchedulerNode(nm1.getNodeId());
// launch app
RMApp app = rm.submitApp(1 * GB, "app", "user", null, false, "default",
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS, null, null, true, true);
MockAM am = MockRM.launchAndRegisterAM(app, rm, nm1);
FiCaSchedulerApp schedulerApp =
// app asks 1 * 6G container
// nm1 runs 2 container(container_01/AM, container_02)
allocateAndLaunchContainers(am, nm1, rm, 1,
Resources.createResource(6 * GB), 0, 2);
Assert.assertEquals(2, sn1.getNumContainers());
Assert.assertEquals(1 * GB, sn1.getUnallocatedResource().getMemorySize());
// app asks 5 * 2G container
// nm1 reserves 1 * 2G containers
.newInstance(Priority.newInstance(0), "*",
Resources.createResource(2 * GB), 5)), null);
cs.handle(new NodeUpdateSchedulerEvent(sn1.getRMNode()));
Assert.assertEquals(1, schedulerApp.getReservedContainers().size());
// rm kills 1 * 6G container_02
for (RMContainer rmContainer : sn1.getCopiedListOfRunningContainers()) {
if (rmContainer.getContainerId().getContainerId() != 1) {
cs.completedContainer(rmContainer, ContainerStatus
ContainerState.COMPLETE, "",
Assert.assertEquals(7 * GB, sn1.getUnallocatedResource().getMemorySize());
final CapacityScheduler spyCs = Mockito.spy(cs);
// handle CapacityScheduler#tryCommit, submit duplicated proposals
// that do allocation for reserved container for three times,
// to simulate that case in YARN-8127
Mockito.doAnswer(new Answer<Object>() {
public Boolean answer(InvocationOnMock invocation) throws Exception {
ResourceCommitRequest request =
(ResourceCommitRequest) invocation.getArguments()[1];
if (request.getFirstAllocatedOrReservedContainer()
.getAllocateFromReservedContainer() != null) {
for (int i=0; i<3; i++) {
cs.tryCommit((Resource) invocation.getArguments()[0],
(ResourceCommitRequest) invocation.getArguments()[1]);
Assert.assertEquals(2, sn1.getCopiedListOfRunningContainers().size());
Assert.assertEquals(5 * GB,
return true;
spyCs.handle(new NodeUpdateSchedulerEvent(sn1.getRMNode()));
private void allocateAndLaunchContainers(MockAM am, MockNM nm, MockRM rm,
int nContainer, Resource resource, int priority, int startContainerId)
throws Exception {