From 6fcbf9b848c63465d26a40387a9be212e708f80b Mon Sep 17 00:00:00 2001 From: Karthik Kambatla Date: Sat, 21 Jun 2014 07:30:07 +0000 Subject: [PATCH] YARN-2187. FairScheduler: Disable max-AM-share check by default. (Robert Kanter via kasha) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1604321 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../fair/AllocationConfiguration.java | 2 +- .../fair/AllocationFileLoaderService.java | 2 +- .../scheduler/fair/FSLeafQueue.java | 3 + .../scheduler/fair/TestFairScheduler.java | 86 +++++++++++++++++++ .../src/site/apt/FairScheduler.apt.vm | 5 +- 6 files changed, 97 insertions(+), 4 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 77929357871..52a61809fb9 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -259,6 +259,9 @@ Release 2.5.0 - UNRELEASED NMLeveldbStateStoreService#loadLocalizationState() within finally block (Junping Du via jlowe) + YARN-2187. FairScheduler: Disable max-AM-share check by default. + (Robert Kanter via kasha) + Release 2.4.1 - 2014-06-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationConfiguration.java index 237cad29c19..d4ba88faf14 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationConfiguration.java @@ -126,7 +126,7 @@ public class AllocationConfiguration { queueMaxAMShares = new HashMap(); userMaxAppsDefault = Integer.MAX_VALUE; queueMaxAppsDefault = Integer.MAX_VALUE; - queueMaxAMShareDefault = 1.0f; + queueMaxAMShareDefault = -1.0f; queueAcls = new HashMap>(); minSharePreemptionTimeouts = new HashMap(); defaultMinSharePreemptionTimeout = Long.MAX_VALUE; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationFileLoaderService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationFileLoaderService.java index 064bdfc817f..4cc88c140d4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationFileLoaderService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationFileLoaderService.java @@ -221,7 +221,7 @@ public class AllocationFileLoaderService extends AbstractService { new HashMap>(); int userMaxAppsDefault = Integer.MAX_VALUE; int queueMaxAppsDefault = Integer.MAX_VALUE; - float queueMaxAMShareDefault = 1.0f; + float queueMaxAMShareDefault = -1.0f; long fairSharePreemptionTimeout = Long.MAX_VALUE; long defaultMinSharePreemptionTimeout = Long.MAX_VALUE; SchedulingPolicy defaultSchedPolicy = SchedulingPolicy.DEFAULT_POLICY; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java index 21dbdc5faca..8f957382e6a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java @@ -308,6 +308,9 @@ public class FSLeafQueue extends FSQueue { public boolean canRunAppAM(Resource amResource) { float maxAMShare = scheduler.getAllocationConfiguration().getQueueMaxAMShare(getName()); + if (Math.abs(maxAMShare - -1.0f) < 0.0001) { + return true; + } Resource maxAMResource = Resources.multiply(getFairShare(), maxAMShare); Resource ifRunAMResource = Resources.add(amResourceUsage, amResource); return !policy diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 9d8b1d117d0..dd42aa0e183 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -2483,6 +2483,92 @@ public class TestFairScheduler extends FairSchedulerTestBase { 0, queue1.getAmResourceUsage().getMemory()); } + @Test + public void testQueueMaxAMShareDefault() throws Exception { + conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE); + + PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE)); + out.println(""); + out.println(""); + out.println(""); + out.println(""); + out.println(""); + out.println("1.0"); + out.println(""); + out.println(""); + out.println(""); + out.println(""); + out.println(""); + out.println(""); + out.println(""); + out.println(""); + out.close(); + + scheduler.init(conf); + scheduler.start(); + scheduler.reinitialize(conf, resourceManager.getRMContext()); + + RMNode node = + MockNodes.newNodeInfo(1, Resources.createResource(8192, 20), + 0, "127.0.0.1"); + NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node); + NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node); + scheduler.handle(nodeEvent); + scheduler.update(); + + FSLeafQueue queue1 = + scheduler.getQueueManager().getLeafQueue("queue1", true); + assertEquals("Queue queue1's fair share should be 1366", + 1366, queue1.getFairShare().getMemory()); + FSLeafQueue queue2 = + scheduler.getQueueManager().getLeafQueue("queue2", true); + assertEquals("Queue queue2's fair share should be 1366", + 1366, queue2.getFairShare().getMemory()); + FSLeafQueue queue3 = + scheduler.getQueueManager().getLeafQueue("queue3", true); + assertEquals("Queue queue3's fair share should be 1366", + 1366, queue3.getFairShare().getMemory()); + FSLeafQueue queue4 = + scheduler.getQueueManager().getLeafQueue("queue4", true); + assertEquals("Queue queue4's fair share should be 1366", + 1366, queue4.getFairShare().getMemory()); + FSLeafQueue queue5 = + scheduler.getQueueManager().getLeafQueue("queue5", true); + assertEquals("Queue queue5's fair share should be 1366", + 1366, queue5.getFairShare().getMemory()); + + Resource amResource1 = Resource.newInstance(2048, 1); + int amPriority = RMAppAttemptImpl.AM_CONTAINER_PRIORITY.getPriority(); + + // Exceeds queue limit, but default maxAMShare is -1.0 so it doesn't matter + ApplicationAttemptId attId1 = createAppAttemptId(1, 1); + createApplicationWithAMResource(attId1, "queue1", "test1", amResource1); + createSchedulingRequestExistingApplication(2048, 1, amPriority, attId1); + FSSchedulerApp app1 = scheduler.getSchedulerApp(attId1); + scheduler.update(); + scheduler.handle(updateEvent); + assertEquals("Application1's AM requests 2048 MB memory", + 2048, app1.getAMResource().getMemory()); + assertEquals("Application1's AM should be running", + 1, app1.getLiveContainers().size()); + assertEquals("Queue1's AM resource usage should be 2048 MB memory", + 2048, queue1.getAmResourceUsage().getMemory()); + + // Exceeds queue limit, and maxAMShare is 1.0 + ApplicationAttemptId attId2 = createAppAttemptId(2, 1); + createApplicationWithAMResource(attId2, "queue2", "test1", amResource1); + createSchedulingRequestExistingApplication(2048, 1, amPriority, attId2); + FSSchedulerApp app2 = scheduler.getSchedulerApp(attId2); + scheduler.update(); + scheduler.handle(updateEvent); + assertEquals("Application2's AM requests 2048 MB memory", + 2048, app2.getAMResource().getMemory()); + assertEquals("Application2's AM should not be running", + 0, app2.getLiveContainers().size()); + assertEquals("Queue2's AM resource usage should be 0 MB memory", + 0, queue2.getAmResourceUsage().getMemory()); + } + @Test public void testMaxRunningAppsHierarchicalQueues() throws Exception { conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm index 23faf27bf26..b9cda2c254d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm @@ -239,8 +239,9 @@ Allocation file format * maxAMShare: limit the fraction of the queue's fair share that can be used to run application masters. This property can only be used for leaf queues. - Default value is 1.0f, which means AMs in the leaf queue can take up to 100% - of both the memory and CPU fair share. + For example, if set to 1.0f, then AMs in the leaf queue can take up to 100% + of both the memory and CPU fair share. The default value is -1.0f, which + means that this check is disabled. * weight: to share the cluster non-proportionally with other queues. Weights default to 1, and a queue with weight 2 should receive approximately twice