From e3a01049807fa7694a3ee3b086ff70653c1a611f Mon Sep 17 00:00:00 2001 From: Wangda Tan Date: Fri, 15 Sep 2017 21:25:21 -0700 Subject: [PATCH] YARN-7149. Cross-queue preemption sometimes starves an underserved queue. (Eric Payne via wangda) Change-Id: Ib269991dbebce160378e8372ee6d24849c4a5ed6 (cherry picked from commit 3dfa937a1fadfc62947755872515f549b3b15e6a) --- .../scheduler/capacity/UsersManager.java | 4 +- .../capacity/TestContainerAllocation.java | 50 +++++++++++++++++++ .../scheduler/capacity/TestLeafQueue.java | 8 +-- 3 files changed, 57 insertions(+), 5 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UsersManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UsersManager.java index 5f7d185c8d2..33f30b00412 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UsersManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UsersManager.java @@ -731,7 +731,9 @@ public class UsersManager implements AbstractUsersManager { * should be higher than queue-hard-limit * ulMin */ float usersSummedByWeight = activeUsersTimesWeights; - Resource resourceUsed = totalResUsageForActiveUsers.getUsed(nodePartition); + Resource resourceUsed = Resources.add( + totalResUsageForActiveUsers.getUsed(nodePartition), + required); // For non-activeUser calculation, consider all users count. if (!activeUser) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java index dd6b25b78c8..906febfde74 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java @@ -24,6 +24,7 @@ import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.security.SecurityUtilTestHelper; import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; import org.apache.hadoop.yarn.api.records.Container; @@ -887,4 +888,53 @@ public class TestContainerAllocation { rm1.close(); } + + + + @Test(timeout = 60000) + public void testUserLimitAllocationMultipleContainers() throws Exception { + CapacitySchedulerConfiguration newConf = + (CapacitySchedulerConfiguration) TestUtils + .getConfigurationWithMultipleQueues(conf); + newConf.setUserLimit("root.c", 50); + MockRM rm1 = new MockRM(newConf); + + rm1.getRMContext().setNodeLabelManager(mgr); + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 1000 * GB); + + // launch app from 1st user to queue C, AM container should be launched in nm1 + RMApp app1 = rm1.submitApp(2 * GB, "app", "user1", null, "c"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + // launch app from 2nd user to queue C, AM container should be launched in nm1 + RMApp app2 = rm1.submitApp(2 * GB, "app", "user2", null, "c"); + MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm1); + + // Each application asks 1000 * 5GB containers + am1.allocate("*", 5 * GB, 1000, null); + am1.allocate("h1", 5 * GB, 1000, null); + am1.allocate(NetworkTopology.DEFAULT_RACK, 5 * GB, 1000, null); + + // Each application asks 1000 * 5GB containers + am2.allocate("*", 5 * GB, 1000, null); + am2.allocate("h1", 5 * GB, 1000, null); + am2.allocate(NetworkTopology.DEFAULT_RACK, 5 * GB, 1000, null); + + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + + FiCaSchedulerApp schedulerApp1 = + cs.getApplicationAttempt(am1.getApplicationAttemptId()); + FiCaSchedulerApp schedulerApp2 = + cs.getApplicationAttempt(am2.getApplicationAttemptId()); + + // container will be allocated to am1 + // App1 will get 2 container allocated (plus AM container) + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + Assert.assertEquals(101, schedulerApp1.getLiveContainers().size()); + Assert.assertEquals(100, schedulerApp2.getLiveContainers().size()); + + rm1.close(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java index d45f756a2e9..a32352b3af2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java @@ -1252,7 +1252,7 @@ public class TestLeafQueue { //app4 is user 0 //maxqueue 16G, userlimit 7G, used 8G, headroom 5G //(8G used is 6G from this test case - app4, 2 from last test case, app_1) - assertEquals(0*GB, app_4.getHeadroom().getMemorySize()); + assertEquals(1*GB, app_4.getHeadroom().getMemorySize()); } @Test @@ -1436,7 +1436,7 @@ public class TestLeafQueue { assertEquals(2*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); // TODO, fix headroom in the future patch - assertEquals(0*GB, app_0.getHeadroom().getMemorySize()); + assertEquals(1*GB, app_0.getHeadroom().getMemorySize()); // User limit = 2G, 2 in use assertEquals(0*GB, app_1.getHeadroom().getMemorySize()); // the application is not yet active @@ -1449,8 +1449,8 @@ public class TestLeafQueue { assertEquals(3*GB, a.getUsedResources().getMemorySize()); assertEquals(2*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(1*GB, app_1.getCurrentConsumption().getMemorySize()); - assertEquals(0*GB, app_0.getHeadroom().getMemorySize()); // 4G - 3G - assertEquals(0*GB, app_1.getHeadroom().getMemorySize()); // 4G - 3G + assertEquals(1*GB, app_0.getHeadroom().getMemorySize()); // 4G - 3G + assertEquals(1*GB, app_1.getHeadroom().getMemorySize()); // 4G - 3G // Submit requests for app_1 and set max-cap a.setMaxCapacity(.1f);