From cbf20264838f536382a9d8c4cd2144faf6875c3a Mon Sep 17 00:00:00 2001 From: Weiwei Yang Date: Fri, 20 Jul 2018 22:32:11 +0800 Subject: [PATCH] YARN-8528. Final states in ContainerAllocation might be modified externally causing unexpected allocation results. Contributed by Xintong Song. --- .../allocator/ContainerAllocation.java | 2 +- .../allocator/RegularContainerAllocator.java | 10 ++-- .../capacity/TestCapacityScheduler.java | 48 +++++++++++++++++++ 3 files changed, 54 insertions(+), 6 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/ContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/ContainerAllocation.java index f4085085004..b9b9bcff2f6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/ContainerAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/ContainerAllocation.java @@ -56,7 +56,7 @@ public class ContainerAllocation { RMContainer containerToBeUnreserved; private Resource resourceToBeAllocated = Resources.none(); - AllocationState state; + private AllocationState state; NodeType containerNodeType = NodeType.NODE_LOCAL; NodeType requestLocalityType = null; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java index 99a5b84b61f..8f49b418c74 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java @@ -263,7 +263,7 @@ public class RegularContainerAllocator extends AbstractContainerAllocator { reservedContainer, schedulingMode, resourceLimits); if (null == reservedContainer) { - if (result.state == AllocationState.PRIORITY_SKIPPED) { + if (result.getAllocationState() == AllocationState.PRIORITY_SKIPPED) { // Don't count 'skipped nodes' as a scheduling opportunity! application.subtractSchedulingOpportunity(schedulerKey); } @@ -487,8 +487,8 @@ public class RegularContainerAllocator extends AbstractContainerAllocator { // When a returned allocation is LOCALITY_SKIPPED, since we're in // off-switch request now, we will skip this app w.r.t priorities - if (allocation.state == AllocationState.LOCALITY_SKIPPED) { - allocation.state = AllocationState.APP_SKIPPED; + if (allocation.getAllocationState() == AllocationState.LOCALITY_SKIPPED) { + allocation = ContainerAllocation.APP_SKIPPED; } allocation.requestLocalityType = requestLocalityType; @@ -836,8 +836,8 @@ public class RegularContainerAllocator extends AbstractContainerAllocator { result = tryAllocateOnNode(clusterResource, node, schedulingMode, resourceLimits, schedulerKey, reservedContainer); - if (AllocationState.ALLOCATED == result.state - || AllocationState.RESERVED == result.state) { + if (AllocationState.ALLOCATED == result.getAllocationState() + || AllocationState.RESERVED == result.getAllocationState()) { result = doAllocation(result, node, schedulerKey, reservedContainer); break; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java index 0b54010c276..79cdcfe08d8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java @@ -134,6 +134,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.TestSchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.AllocationState; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.ContainerAllocation; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.ResourceCommitRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; @@ -4930,4 +4932,50 @@ public class TestCapacityScheduler extends CapacitySchedulerTestBase { spyCs.handle(new NodeUpdateSchedulerEvent( spyCs.getNode(nm.getNodeId()).getRMNode())); } + + // Testcase for YARN-8528 + // This is to test whether ContainerAllocation constants are holding correct + // values during scheduling. + @Test + public void testContainerAllocationLocalitySkipped() throws Exception { + Assert.assertEquals(AllocationState.APP_SKIPPED, + ContainerAllocation.APP_SKIPPED.getAllocationState()); + Assert.assertEquals(AllocationState.LOCALITY_SKIPPED, + ContainerAllocation.LOCALITY_SKIPPED.getAllocationState()); + Assert.assertEquals(AllocationState.PRIORITY_SKIPPED, + ContainerAllocation.PRIORITY_SKIPPED.getAllocationState()); + Assert.assertEquals(AllocationState.QUEUE_SKIPPED, + ContainerAllocation.QUEUE_SKIPPED.getAllocationState()); + + // init RM & NMs & Nodes + final MockRM rm = new MockRM(new CapacitySchedulerConfiguration()); + CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); + rm.start(); + final MockNM nm1 = rm.registerNode("h1:1234", 4 * GB); + final MockNM nm2 = rm.registerNode("h2:1234", 6 * GB); // maximum-allocation-mb = 6GB + + // submit app and request resource + // container2 is larger than nm1 total resource, will trigger locality skip + final RMApp app = rm.submitApp(1 * GB, "app", "user"); + final MockAM am = MockRM.launchAndRegisterAM(app, rm, nm1); + am.addRequests(new String[] {"*"}, 5 * GB, 1, 1, 2); + am.schedule(); + + // container1 (am) should be acquired, container2 should not + RMNode node1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId()); + cs.handle(new NodeUpdateSchedulerEvent(node1)); + ContainerId cid = ContainerId.newContainerId(am.getApplicationAttemptId(), 1l); + Assert.assertEquals(cs.getRMContainer(cid).getState(), RMContainerState.ACQUIRED); + cid = ContainerId.newContainerId(am.getApplicationAttemptId(), 2l); + Assert.assertNull(cs.getRMContainer(cid)); + + Assert.assertEquals(AllocationState.APP_SKIPPED, + ContainerAllocation.APP_SKIPPED.getAllocationState()); + Assert.assertEquals(AllocationState.LOCALITY_SKIPPED, + ContainerAllocation.LOCALITY_SKIPPED.getAllocationState()); + Assert.assertEquals(AllocationState.PRIORITY_SKIPPED, + ContainerAllocation.PRIORITY_SKIPPED.getAllocationState()); + Assert.assertEquals(AllocationState.QUEUE_SKIPPED, + ContainerAllocation.QUEUE_SKIPPED.getAllocationState()); + } }