YARN-8528. Final states in ContainerAllocation might be modified externally causing unexpected allocation results. Contributed by Xintong Song.

This commit is contained in:
Weiwei Yang 2018-07-20 22:32:11 +08:00
parent 823d576a66
commit 004e1f248e
3 changed files with 54 additions and 6 deletions

View File

@ -56,7 +56,7 @@ public class ContainerAllocation {
RMContainer containerToBeUnreserved;
private Resource resourceToBeAllocated = Resources.none();
AllocationState state;
private AllocationState state;
NodeType containerNodeType = NodeType.NODE_LOCAL;
NodeType requestLocalityType = null;

View File

@ -263,7 +263,7 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
reservedContainer, schedulingMode, resourceLimits);
if (null == reservedContainer) {
if (result.state == AllocationState.PRIORITY_SKIPPED) {
if (result.getAllocationState() == AllocationState.PRIORITY_SKIPPED) {
// Don't count 'skipped nodes' as a scheduling opportunity!
application.subtractSchedulingOpportunity(schedulerKey);
}
@ -487,8 +487,8 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
// When a returned allocation is LOCALITY_SKIPPED, since we're in
// off-switch request now, we will skip this app w.r.t priorities
if (allocation.state == AllocationState.LOCALITY_SKIPPED) {
allocation.state = AllocationState.APP_SKIPPED;
if (allocation.getAllocationState() == AllocationState.LOCALITY_SKIPPED) {
allocation = ContainerAllocation.APP_SKIPPED;
}
allocation.requestLocalityType = requestLocalityType;
@ -836,8 +836,8 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
result = tryAllocateOnNode(clusterResource, node, schedulingMode,
resourceLimits, schedulerKey, reservedContainer);
if (AllocationState.ALLOCATED == result.state
|| AllocationState.RESERVED == result.state) {
if (AllocationState.ALLOCATED == result.getAllocationState()
|| AllocationState.RESERVED == result.getAllocationState()) {
result = doAllocation(result, node, schedulerKey, reservedContainer);
break;
}

View File

@ -134,6 +134,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.TestSchedulerUtils;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.AllocationState;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.ContainerAllocation;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.ResourceCommitRequest;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
@ -4930,4 +4932,50 @@ public class TestCapacityScheduler extends CapacitySchedulerTestBase {
spyCs.handle(new NodeUpdateSchedulerEvent(
spyCs.getNode(nm.getNodeId()).getRMNode()));
}
// Testcase for YARN-8528
// This is to test whether ContainerAllocation constants are holding correct
// values during scheduling.
@Test
public void testContainerAllocationLocalitySkipped() throws Exception {
Assert.assertEquals(AllocationState.APP_SKIPPED,
ContainerAllocation.APP_SKIPPED.getAllocationState());
Assert.assertEquals(AllocationState.LOCALITY_SKIPPED,
ContainerAllocation.LOCALITY_SKIPPED.getAllocationState());
Assert.assertEquals(AllocationState.PRIORITY_SKIPPED,
ContainerAllocation.PRIORITY_SKIPPED.getAllocationState());
Assert.assertEquals(AllocationState.QUEUE_SKIPPED,
ContainerAllocation.QUEUE_SKIPPED.getAllocationState());
// init RM & NMs & Nodes
final MockRM rm = new MockRM(new CapacitySchedulerConfiguration());
CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
rm.start();
final MockNM nm1 = rm.registerNode("h1:1234", 4 * GB);
final MockNM nm2 = rm.registerNode("h2:1234", 6 * GB); // maximum-allocation-mb = 6GB
// submit app and request resource
// container2 is larger than nm1 total resource, will trigger locality skip
final RMApp app = rm.submitApp(1 * GB, "app", "user");
final MockAM am = MockRM.launchAndRegisterAM(app, rm, nm1);
am.addRequests(new String[] {"*"}, 5 * GB, 1, 1, 2);
am.schedule();
// container1 (am) should be acquired, container2 should not
RMNode node1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
cs.handle(new NodeUpdateSchedulerEvent(node1));
ContainerId cid = ContainerId.newContainerId(am.getApplicationAttemptId(), 1l);
Assert.assertEquals(cs.getRMContainer(cid).getState(), RMContainerState.ACQUIRED);
cid = ContainerId.newContainerId(am.getApplicationAttemptId(), 2l);
Assert.assertNull(cs.getRMContainer(cid));
Assert.assertEquals(AllocationState.APP_SKIPPED,
ContainerAllocation.APP_SKIPPED.getAllocationState());
Assert.assertEquals(AllocationState.LOCALITY_SKIPPED,
ContainerAllocation.LOCALITY_SKIPPED.getAllocationState());
Assert.assertEquals(AllocationState.PRIORITY_SKIPPED,
ContainerAllocation.PRIORITY_SKIPPED.getAllocationState());
Assert.assertEquals(AllocationState.QUEUE_SKIPPED,
ContainerAllocation.QUEUE_SKIPPED.getAllocationState());
}
}