YARN-4477. FairScheduler: Handle condition which can result in an infinite loop in attemptScheduling. (Tao Jie via asuresh)

(cherry picked from commit e88422df45)
This commit is contained in:
Arun Suresh 2015-12-21 22:41:09 -08:00
parent fdfcffb3d1
commit 31893468b1
3 changed files with 48 additions and 5 deletions

View File

@ -1121,6 +1121,9 @@ Release 2.8.0 - UNRELEASED
YARN-4454. NM to nodelabel mapping going wrong after RM restart. YARN-4454. NM to nodelabel mapping going wrong after RM restart.
(Bibin A Chundatt via wangda) (Bibin A Chundatt via wangda)
YARN-4477. FairScheduler: Handle condition which can result in an
infinite loop in attemptScheduling. (Tao Jie via asuresh)
Release 2.7.3 - UNRELEASED Release 2.7.3 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -458,8 +458,9 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
* the container is {@code alreadyReserved} on the node, simply * the container is {@code alreadyReserved} on the node, simply
* update relevant bookeeping. This dispatches ro relevant handlers * update relevant bookeeping. This dispatches ro relevant handlers
* in {@link FSSchedulerNode}.. * in {@link FSSchedulerNode}..
* return whether reservation was possible with the current threshold limits
*/ */
private void reserve(Priority priority, FSSchedulerNode node, private boolean reserve(Priority priority, FSSchedulerNode node,
Container container, NodeType type, boolean alreadyReserved) { Container container, NodeType type, boolean alreadyReserved) {
if (!reservationExceedsThreshold(node, type)) { if (!reservationExceedsThreshold(node, type)) {
@ -477,7 +478,9 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
node.reserveResource(this, priority, rmContainer); node.reserveResource(this, priority, rmContainer);
setReservation(node); setReservation(node);
} }
return true;
} }
return false;
} }
private boolean reservationExceedsThreshold(FSSchedulerNode node, private boolean reservationExceedsThreshold(FSSchedulerNode node,
@ -627,10 +630,9 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
return container.getResource(); return container.getResource();
} }
if (isReservable(container)) { // The desired container won't fit here, so reserve
// The desired container won't fit here, so reserve if (isReservable(container) &&
reserve(request.getPriority(), node, container, type, reserved); reserve(request.getPriority(), node, container, type, reserved)) {
return FairScheduler.CONTAINER_RESERVED; return FairScheduler.CONTAINER_RESERVED;
} else { } else {
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {

View File

@ -981,6 +981,43 @@ public class TestFairScheduler extends FairSchedulerTestBase {
scheduler.getSchedulerApp(attId).getNumReservations(null, true)); scheduler.getSchedulerApp(attId).getNumReservations(null, true));
} }
@Test (timeout = 5000)
public void testReservationThresholdWithAssignMultiple() throws Exception {
// set reservable-nodes to 0 which make reservation exceed
conf.setFloat(FairSchedulerConfiguration.RESERVABLE_NODES, 0f);
conf.setBoolean(FairSchedulerConfiguration.ASSIGN_MULTIPLE, true);
scheduler.init(conf);
scheduler.start();
scheduler.reinitialize(conf, resourceManager.getRMContext());
// Add two node
RMNode node1 =
MockNodes
.newNodeInfo(1, Resources.createResource(4096, 4), 1, "127.0.0.1");
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
scheduler.handle(nodeEvent1);
RMNode node2 =
MockNodes
.newNodeInfo(2, Resources.createResource(4096, 4), 1, "127.0.0.2");
NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2);
scheduler.handle(nodeEvent2);
//create one request and assign containers
ApplicationAttemptId attId = createSchedulingRequest(1024, "queue1", "user1", 10);
scheduler.update();
scheduler.handle(new NodeUpdateSchedulerEvent(node1));
scheduler.update();
scheduler.handle(new NodeUpdateSchedulerEvent(node2));
// Verify capacity allocation
assertEquals(8192, scheduler.getQueueManager().getQueue("queue1").
getResourceUsage().getMemory());
// Verify number of reservations have decremented
assertEquals(0,
scheduler.getSchedulerApp(attId).getNumReservations(null, true));
}
@Test (timeout = 500000) @Test (timeout = 500000)
public void testContainerReservationAttemptExceedingQueueMax() public void testContainerReservationAttemptExceedingQueueMax()
throws Exception { throws Exception {
@ -4152,6 +4189,7 @@ public class TestFairScheduler extends FairSchedulerTestBase {
@Test @Test
public void testQueueMaxAMShareWithContainerReservation() throws Exception { public void testQueueMaxAMShareWithContainerReservation() throws Exception {
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE); conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
conf.setFloat(FairSchedulerConfiguration.RESERVABLE_NODES, 1f);
PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE)); PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
out.println("<?xml version=\"1.0\"?>"); out.println("<?xml version=\"1.0\"?>");
out.println("<allocations>"); out.println("<allocations>");