From e88422df45550f788ae8dd73aec84bde28012aeb Mon Sep 17 00:00:00 2001 From: Arun Suresh Date: Mon, 21 Dec 2015 22:41:09 -0800 Subject: [PATCH] YARN-4477. FairScheduler: Handle condition which can result in an infinite loop in attemptScheduling. (Tao Jie via asuresh) --- hadoop-yarn-project/CHANGES.txt | 3 ++ .../scheduler/fair/FSAppAttempt.java | 12 +++--- .../scheduler/fair/TestFairScheduler.java | 38 +++++++++++++++++++ 3 files changed, 48 insertions(+), 5 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index c306c04d17a..ab4634a04ef 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -1173,6 +1173,9 @@ Release 2.8.0 - UNRELEASED YARN-4454. NM to nodelabel mapping going wrong after RM restart. (Bibin A Chundatt via wangda) + YARN-4477. FairScheduler: Handle condition which can result in an + infinite loop in attemptScheduling. (Tao Jie via asuresh) + Release 2.7.3 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java index 3778cbaa448..5f753dd6bd9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java @@ -458,8 +458,9 @@ public class FSAppAttempt extends SchedulerApplicationAttempt * the container is {@code alreadyReserved} on the node, simply * update relevant bookeeping. This dispatches ro relevant handlers * in {@link FSSchedulerNode}.. + * return whether reservation was possible with the current threshold limits */ - private void reserve(Priority priority, FSSchedulerNode node, + private boolean reserve(Priority priority, FSSchedulerNode node, Container container, NodeType type, boolean alreadyReserved) { if (!reservationExceedsThreshold(node, type)) { @@ -477,7 +478,9 @@ public class FSAppAttempt extends SchedulerApplicationAttempt node.reserveResource(this, priority, rmContainer); setReservation(node); } + return true; } + return false; } private boolean reservationExceedsThreshold(FSSchedulerNode node, @@ -627,10 +630,9 @@ public class FSAppAttempt extends SchedulerApplicationAttempt return container.getResource(); } - if (isReservable(container)) { - // The desired container won't fit here, so reserve - reserve(request.getPriority(), node, container, type, reserved); - + // The desired container won't fit here, so reserve + if (isReservable(container) && + reserve(request.getPriority(), node, container, type, reserved)) { return FairScheduler.CONTAINER_RESERVED; } else { if (LOG.isDebugEnabled()) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 2f48380c77c..430eba71077 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -981,6 +981,43 @@ public class TestFairScheduler extends FairSchedulerTestBase { scheduler.getSchedulerApp(attId).getNumReservations(null, true)); } + @Test (timeout = 5000) + public void testReservationThresholdWithAssignMultiple() throws Exception { + // set reservable-nodes to 0 which make reservation exceed + conf.setFloat(FairSchedulerConfiguration.RESERVABLE_NODES, 0f); + conf.setBoolean(FairSchedulerConfiguration.ASSIGN_MULTIPLE, true); + scheduler.init(conf); + scheduler.start(); + scheduler.reinitialize(conf, resourceManager.getRMContext()); + + // Add two node + RMNode node1 = + MockNodes + .newNodeInfo(1, Resources.createResource(4096, 4), 1, "127.0.0.1"); + NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); + scheduler.handle(nodeEvent1); + RMNode node2 = + MockNodes + .newNodeInfo(2, Resources.createResource(4096, 4), 1, "127.0.0.2"); + NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2); + scheduler.handle(nodeEvent2); + + //create one request and assign containers + ApplicationAttemptId attId = createSchedulingRequest(1024, "queue1", "user1", 10); + scheduler.update(); + scheduler.handle(new NodeUpdateSchedulerEvent(node1)); + scheduler.update(); + scheduler.handle(new NodeUpdateSchedulerEvent(node2)); + + // Verify capacity allocation + assertEquals(8192, scheduler.getQueueManager().getQueue("queue1"). + getResourceUsage().getMemory()); + + // Verify number of reservations have decremented + assertEquals(0, + scheduler.getSchedulerApp(attId).getNumReservations(null, true)); + } + @Test (timeout = 500000) public void testContainerReservationAttemptExceedingQueueMax() throws Exception { @@ -4152,6 +4189,7 @@ public class TestFairScheduler extends FairSchedulerTestBase { @Test public void testQueueMaxAMShareWithContainerReservation() throws Exception { conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE); + conf.setFloat(FairSchedulerConfiguration.RESERVABLE_NODES, 1f); PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE)); out.println(""); out.println("");