YARN-7149. Cross-queue preemption sometimes starves an underserved queue. (Eric Payne via wangda)

Change-Id: Ib269991dbebce160378e8372ee6d24849c4a5ed6
(cherry picked from commit 3dfa937a1fadfc62947755872515f549b3b15e6a)
This commit is contained in:
Wangda Tan 2017-09-15 21:25:21 -07:00
parent 03b6e95fa5
commit e3a0104980
3 changed files with 57 additions and 5 deletions

View File

@ -731,7 +731,9 @@ public class UsersManager implements AbstractUsersManager {
* should be higher than queue-hard-limit * ulMin * should be higher than queue-hard-limit * ulMin
*/ */
float usersSummedByWeight = activeUsersTimesWeights; float usersSummedByWeight = activeUsersTimesWeights;
Resource resourceUsed = totalResUsageForActiveUsers.getUsed(nodePartition); Resource resourceUsed = Resources.add(
totalResUsageForActiveUsers.getUsed(nodePartition),
required);
// For non-activeUser calculation, consider all users count. // For non-activeUser calculation, consider all users count.
if (!activeUser) { if (!activeUser) {

View File

@ -24,6 +24,7 @@ import java.util.List;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.net.NetworkTopology;
import org.apache.hadoop.security.SecurityUtilTestHelper; import org.apache.hadoop.security.SecurityUtilTestHelper;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.Container;
@ -887,4 +888,53 @@ public class TestContainerAllocation {
rm1.close(); rm1.close();
} }
@Test(timeout = 60000)
public void testUserLimitAllocationMultipleContainers() throws Exception {
CapacitySchedulerConfiguration newConf =
(CapacitySchedulerConfiguration) TestUtils
.getConfigurationWithMultipleQueues(conf);
newConf.setUserLimit("root.c", 50);
MockRM rm1 = new MockRM(newConf);
rm1.getRMContext().setNodeLabelManager(mgr);
rm1.start();
MockNM nm1 = rm1.registerNode("h1:1234", 1000 * GB);
// launch app from 1st user to queue C, AM container should be launched in nm1
RMApp app1 = rm1.submitApp(2 * GB, "app", "user1", null, "c");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
// launch app from 2nd user to queue C, AM container should be launched in nm1
RMApp app2 = rm1.submitApp(2 * GB, "app", "user2", null, "c");
MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm1);
// Each application asks 1000 * 5GB containers
am1.allocate("*", 5 * GB, 1000, null);
am1.allocate("h1", 5 * GB, 1000, null);
am1.allocate(NetworkTopology.DEFAULT_RACK, 5 * GB, 1000, null);
// Each application asks 1000 * 5GB containers
am2.allocate("*", 5 * GB, 1000, null);
am2.allocate("h1", 5 * GB, 1000, null);
am2.allocate(NetworkTopology.DEFAULT_RACK, 5 * GB, 1000, null);
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
FiCaSchedulerApp schedulerApp1 =
cs.getApplicationAttempt(am1.getApplicationAttemptId());
FiCaSchedulerApp schedulerApp2 =
cs.getApplicationAttempt(am2.getApplicationAttemptId());
// container will be allocated to am1
// App1 will get 2 container allocated (plus AM container)
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
Assert.assertEquals(101, schedulerApp1.getLiveContainers().size());
Assert.assertEquals(100, schedulerApp2.getLiveContainers().size());
rm1.close();
}
} }

View File

@ -1252,7 +1252,7 @@ public class TestLeafQueue {
//app4 is user 0 //app4 is user 0
//maxqueue 16G, userlimit 7G, used 8G, headroom 5G //maxqueue 16G, userlimit 7G, used 8G, headroom 5G
//(8G used is 6G from this test case - app4, 2 from last test case, app_1) //(8G used is 6G from this test case - app4, 2 from last test case, app_1)
assertEquals(0*GB, app_4.getHeadroom().getMemorySize()); assertEquals(1*GB, app_4.getHeadroom().getMemorySize());
} }
@Test @Test
@ -1436,7 +1436,7 @@ public class TestLeafQueue {
assertEquals(2*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(2*GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize());
// TODO, fix headroom in the future patch // TODO, fix headroom in the future patch
assertEquals(0*GB, app_0.getHeadroom().getMemorySize()); assertEquals(1*GB, app_0.getHeadroom().getMemorySize());
// User limit = 2G, 2 in use // User limit = 2G, 2 in use
assertEquals(0*GB, app_1.getHeadroom().getMemorySize()); assertEquals(0*GB, app_1.getHeadroom().getMemorySize());
// the application is not yet active // the application is not yet active
@ -1449,8 +1449,8 @@ public class TestLeafQueue {
assertEquals(3*GB, a.getUsedResources().getMemorySize()); assertEquals(3*GB, a.getUsedResources().getMemorySize());
assertEquals(2*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(2*GB, app_0.getCurrentConsumption().getMemorySize());
assertEquals(1*GB, app_1.getCurrentConsumption().getMemorySize()); assertEquals(1*GB, app_1.getCurrentConsumption().getMemorySize());
assertEquals(0*GB, app_0.getHeadroom().getMemorySize()); // 4G - 3G assertEquals(1*GB, app_0.getHeadroom().getMemorySize()); // 4G - 3G
assertEquals(0*GB, app_1.getHeadroom().getMemorySize()); // 4G - 3G assertEquals(1*GB, app_1.getHeadroom().getMemorySize()); // 4G - 3G
// Submit requests for app_1 and set max-cap // Submit requests for app_1 and set max-cap
a.setMaxCapacity(.1f); a.setMaxCapacity(.1f);