YARN-4477. FairScheduler: Handle condition which can result in an infinite loop in attemptScheduling. (Tao Jie via asuresh)

This commit is contained in:
Arun Suresh 2015-12-21 22:41:09 -08:00
parent 0087734cc1
commit e88422df45
3 changed files with 48 additions and 5 deletions

View File

@ -1173,6 +1173,9 @@ Release 2.8.0 - UNRELEASED
YARN-4454. NM to nodelabel mapping going wrong after RM restart.
(Bibin A Chundatt via wangda)
YARN-4477. FairScheduler: Handle condition which can result in an
infinite loop in attemptScheduling. (Tao Jie via asuresh)
Release 2.7.3 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -458,8 +458,9 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
* the container is {@code alreadyReserved} on the node, simply
* update relevant bookeeping. This dispatches ro relevant handlers
* in {@link FSSchedulerNode}..
* return whether reservation was possible with the current threshold limits
*/
private void reserve(Priority priority, FSSchedulerNode node,
private boolean reserve(Priority priority, FSSchedulerNode node,
Container container, NodeType type, boolean alreadyReserved) {
if (!reservationExceedsThreshold(node, type)) {
@ -477,7 +478,9 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
node.reserveResource(this, priority, rmContainer);
setReservation(node);
}
return true;
}
return false;
}
private boolean reservationExceedsThreshold(FSSchedulerNode node,
@ -627,10 +630,9 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
return container.getResource();
}
if (isReservable(container)) {
// The desired container won't fit here, so reserve
reserve(request.getPriority(), node, container, type, reserved);
if (isReservable(container) &&
reserve(request.getPriority(), node, container, type, reserved)) {
return FairScheduler.CONTAINER_RESERVED;
} else {
if (LOG.isDebugEnabled()) {

View File

@ -981,6 +981,43 @@ public class TestFairScheduler extends FairSchedulerTestBase {
scheduler.getSchedulerApp(attId).getNumReservations(null, true));
}
@Test (timeout = 5000)
public void testReservationThresholdWithAssignMultiple() throws Exception {
// set reservable-nodes to 0 which make reservation exceed
conf.setFloat(FairSchedulerConfiguration.RESERVABLE_NODES, 0f);
conf.setBoolean(FairSchedulerConfiguration.ASSIGN_MULTIPLE, true);
scheduler.init(conf);
scheduler.start();
scheduler.reinitialize(conf, resourceManager.getRMContext());
// Add two node
RMNode node1 =
MockNodes
.newNodeInfo(1, Resources.createResource(4096, 4), 1, "127.0.0.1");
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
scheduler.handle(nodeEvent1);
RMNode node2 =
MockNodes
.newNodeInfo(2, Resources.createResource(4096, 4), 1, "127.0.0.2");
NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2);
scheduler.handle(nodeEvent2);
//create one request and assign containers
ApplicationAttemptId attId = createSchedulingRequest(1024, "queue1", "user1", 10);
scheduler.update();
scheduler.handle(new NodeUpdateSchedulerEvent(node1));
scheduler.update();
scheduler.handle(new NodeUpdateSchedulerEvent(node2));
// Verify capacity allocation
assertEquals(8192, scheduler.getQueueManager().getQueue("queue1").
getResourceUsage().getMemory());
// Verify number of reservations have decremented
assertEquals(0,
scheduler.getSchedulerApp(attId).getNumReservations(null, true));
}
@Test (timeout = 500000)
public void testContainerReservationAttemptExceedingQueueMax()
throws Exception {
@ -4152,6 +4189,7 @@ public class TestFairScheduler extends FairSchedulerTestBase {
@Test
public void testQueueMaxAMShareWithContainerReservation() throws Exception {
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
conf.setFloat(FairSchedulerConfiguration.RESERVABLE_NODES, 1f);
PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
out.println("<?xml version=\"1.0\"?>");
out.println("<allocations>");