From c322e749d6340fc983043f76b40d4095c1aa78b6 Mon Sep 17 00:00:00 2001 From: Wangda Tan Date: Wed, 3 Aug 2016 11:29:12 -0700 Subject: [PATCH] YARN-5342. Improve non-exclusive node partition resource allocation in Capacity Scheduler. (Sunil G via wangda) --- .../allocator/RegularContainerAllocator.java | 14 +++++++++++--- .../capacity/TestApplicationPriority.java | 2 ++ .../TestNodeLabelContainerAllocation.java | 17 ++++++++--------- 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java index ad6ada84b36..5f35f8c0619 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator; +import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.yarn.api.records.Container; @@ -660,14 +661,21 @@ ContainerAllocation doAllocation(ContainerAllocation allocationResult, } // Non-exclusive scheduling opportunity is different: we need reset - // it every time to make sure non-labeled resource request will be + // it when: + // - It allocated on the default partition + // + // This is to make sure non-labeled resource request will be // most likely allocated on non-labeled nodes first. - application.resetMissedNonPartitionedRequestSchedulingOpportunity(priority); + if (StringUtils.equals(node.getPartition(), + RMNodeLabelsManager.NO_LABEL)) { + application + .resetMissedNonPartitionedRequestSchedulingOpportunity(priority); + } } return allocationResult; } - + private ContainerAllocation allocate(Resource clusterResource, FiCaSchedulerNode node, SchedulingMode schedulingMode, ResourceLimits resourceLimits, Priority priority, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationPriority.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationPriority.java index 2a1c642ce81..b8b46c6f0e3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationPriority.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationPriority.java @@ -47,6 +47,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport; @@ -280,6 +281,7 @@ public void testPriorityWithPendingApplications() throws Exception { // If app3 (highest priority among rest) gets active, it indicates that // priority is working with pendingApplications. rm.killApp(app1.getApplicationId()); + rm.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.KILLED); // kick the scheduler, app3 (high among pending) gets free space MockAM am3 = MockRM.launchAM(app3, rm, nm1); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java index dc0a17d712d..13090ca44d8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java @@ -690,8 +690,6 @@ public RMNodeLabelsManager createNodeLabelManager() { rm1.start(); MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB); // label = y MockNM nm2 = rm1.registerNode("h2:1234", 100 * GB); // label = - - ContainerId nextContainerId; // launch an app to queue b1 (label = y), AM container should be launched in nm3 RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "b1"); @@ -699,13 +697,14 @@ public RMNodeLabelsManager createNodeLabelManager() { // request containers from am2, priority=1 asks for "" and priority=2 asks // for "y", "y" container should be allocated first - nextContainerId = - ContainerId.newContainerId(am1.getApplicationAttemptId(), 2); am1.allocate("*", 1 * GB, 1, 1, new ArrayList(), ""); am1.allocate("*", 1 * GB, 1, 2, new ArrayList(), "y"); - Assert.assertTrue(rm1.waitForState(nm1, nextContainerId, - RMContainerState.ALLOCATED, 10 * 1000)); - + + // Do a node heartbeat once + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + cs.handle(new NodeUpdateSchedulerEvent( + rm1.getRMContext().getRMNodes().get(nm1.getNodeId()))); + // Check pending resource for am2, priority=1 doesn't get allocated before // priority=2 allocated checkPendingResource(rm1, 1, am1.getApplicationAttemptId(), 1 * GB); @@ -1579,11 +1578,11 @@ public RMNodeLabelsManager createNodeLabelManager() { cs.getApplicationAttempt(am3.getApplicationAttemptId())); checkNumOfContainersInAnAppOnGivenNode(0, nm1.getNodeId(), cs.getApplicationAttempt(am4.getApplicationAttemptId())); - + // Test case 7 // After c allocated, d will go first because it has less used_capacity(x) // than c - doNMHeartbeat(rm, nm1.getNodeId(), 2); + doNMHeartbeat(rm, nm1.getNodeId(), 1); checkNumOfContainersInAnAppOnGivenNode(2, nm1.getNodeId(), cs.getApplicationAttempt(am1.getApplicationAttemptId())); checkNumOfContainersInAnAppOnGivenNode(3, nm1.getNodeId(),