From e6eccf20defcaf84a9566482371c7be196779c0d Mon Sep 17 00:00:00 2001 From: Sanford Ryza Date: Wed, 5 Feb 2014 18:09:07 +0000 Subject: [PATCH] YARN-1499. Fair Scheduler changes for moving apps between queues (Sandy Ryza) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1564856 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../scheduler/SchedulerApplication.java | 6 +- .../fair/AllocationConfiguration.java | 3 +- .../scheduler/fair/FairScheduler.java | 119 ++++++++++++++- .../fair/MaxRunningAppsEnforcer.java | 51 ++++--- .../scheduler/fair/TestFairScheduler.java | 137 ++++++++++++++++++ .../fair/TestMaxRunningAppsEnforcer.java | 3 +- .../src/site/apt/FairScheduler.apt.vm | 34 ++++- 8 files changed, 326 insertions(+), 30 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index b2f70e41afd..0cdfc6fa567 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -14,6 +14,9 @@ Trunk - Unreleased YARN-1504. RM changes for moving apps between queues (Sandy Ryza) + YARN-1499. Fair Scheduler changes for moving apps between queues (Sandy + Ryza) + IMPROVEMENTS OPTIMIZATIONS diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplication.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplication.java index 1c4a5a638c5..4d6ca0eedbb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplication.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplication.java @@ -25,7 +25,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; @Unstable public class SchedulerApplication { - private final Queue queue; + private Queue queue; private final String user; private SchedulerApplicationAttempt currentAttempt; @@ -37,6 +37,10 @@ public class SchedulerApplication { public Queue getQueue() { return queue; } + + public void setQueue(Queue queue) { + this.queue = queue; + } public String getUser() { return user; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationConfiguration.java index 5a33dcb1829..6fc90f472c1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationConfiguration.java @@ -39,7 +39,8 @@ public class AllocationConfiguration { // Minimum resource allocation for each queue private final Map minQueueResources; // Maximum amount of resources per queue - private final Map maxQueueResources; + @VisibleForTesting + final Map maxQueueResources; // Sharing weights for each queue private final Map queueWeights; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index b88ad503dd1..e057e740fb8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -766,7 +766,9 @@ public class FairScheduler extends AbstractYarnScheduler { boolean wasRunnable = queue.removeApp(attempt); if (wasRunnable) { - maxRunningEnforcer.updateRunnabilityOnAppRemoval(attempt); + maxRunningEnforcer.untrackRunnableApp(attempt); + maxRunningEnforcer.updateRunnabilityOnAppRemoval(attempt, + attempt.getQueue()); } else { maxRunningEnforcer.untrackNonRunnableApp(attempt); } @@ -1355,4 +1357,119 @@ public class FairScheduler extends AbstractYarnScheduler { queue.collectSchedulerApplications(apps); return apps; } + + @Override + public synchronized String moveApplication(ApplicationId appId, + String queueName) throws YarnException { + SchedulerApplication app = applications.get(appId); + if (app == null) { + throw new YarnException("App to be moved " + appId + " not found."); + } + FSSchedulerApp attempt = (FSSchedulerApp) app.getCurrentAppAttempt(); + + FSLeafQueue oldQueue = (FSLeafQueue) app.getQueue(); + FSLeafQueue targetQueue = queueMgr.getLeafQueue(queueName, false); + if (targetQueue == null) { + throw new YarnException("Target queue " + queueName + + " not found or is not a leaf queue."); + } + if (targetQueue == oldQueue) { + return oldQueue.getQueueName(); + } + + if (oldQueue.getRunnableAppSchedulables().contains( + attempt.getAppSchedulable())) { + verifyMoveDoesNotViolateConstraints(attempt, oldQueue, targetQueue); + } + + executeMove(app, attempt, oldQueue, targetQueue); + return targetQueue.getQueueName(); + } + + private void verifyMoveDoesNotViolateConstraints(FSSchedulerApp app, + FSLeafQueue oldQueue, FSLeafQueue targetQueue) throws YarnException { + String queueName = targetQueue.getQueueName(); + ApplicationAttemptId appAttId = app.getApplicationAttemptId(); + // When checking maxResources and maxRunningApps, only need to consider + // queues before the lowest common ancestor of the two queues because the + // total running apps in queues above will not be changed. + FSQueue lowestCommonAncestor = findLowestCommonAncestorQueue(oldQueue, + targetQueue); + Resource consumption = app.getCurrentConsumption(); + + // Check whether the move would go over maxRunningApps or maxShare + FSQueue cur = targetQueue; + while (cur != lowestCommonAncestor) { + // maxRunningApps + if (cur.getNumRunnableApps() == allocConf.getQueueMaxApps(cur.getQueueName())) { + throw new YarnException("Moving app attempt " + appAttId + " to queue " + + queueName + " would violate queue maxRunningApps constraints on" + + " queue " + cur.getQueueName()); + } + + // maxShare + if (!Resources.fitsIn(Resources.add(cur.getResourceUsage(), consumption), + cur.getMaxShare())) { + throw new YarnException("Moving app attempt " + appAttId + " to queue " + + queueName + " would violate queue maxShare constraints on" + + " queue " + cur.getQueueName()); + } + + cur = cur.getParent(); + } + } + + /** + * Helper for moveApplication, which is synchronized, so all operations will + * be atomic. + */ + private void executeMove(SchedulerApplication app, FSSchedulerApp attempt, + FSLeafQueue oldQueue, FSLeafQueue newQueue) { + boolean wasRunnable = oldQueue.removeApp(attempt); + // if app was not runnable before, it may be runnable now + boolean nowRunnable = maxRunningEnforcer.canAppBeRunnable(newQueue, + attempt.getUser()); + if (wasRunnable && !nowRunnable) { + throw new IllegalStateException("Should have already verified that app " + + attempt.getApplicationId() + " would be runnable in new queue"); + } + + if (wasRunnable) { + maxRunningEnforcer.untrackRunnableApp(attempt); + } else if (nowRunnable) { + // App has changed from non-runnable to runnable + maxRunningEnforcer.untrackNonRunnableApp(attempt); + } + + attempt.move(newQueue); // This updates all the metrics + app.setQueue(newQueue); + newQueue.addApp(attempt, nowRunnable); + + if (nowRunnable) { + maxRunningEnforcer.trackRunnableApp(attempt); + } + if (wasRunnable) { + maxRunningEnforcer.updateRunnabilityOnAppRemoval(attempt, oldQueue); + } + } + + private FSQueue findLowestCommonAncestorQueue(FSQueue queue1, FSQueue queue2) { + // Because queue names include ancestors, separated by periods, we can find + // the lowest common ancestors by going from the start of the names until + // there's a character that doesn't match. + String name1 = queue1.getName(); + String name2 = queue2.getName(); + // We keep track of the last period we encounter to avoid returning root.apple + // when the queues are root.applepie and root.appletart + int lastPeriodIndex = -1; + for (int i = 0; i < Math.max(name1.length(), name2.length()); i++) { + if (name1.length() <= i || name2.length() <= i || + name1.charAt(i) != name2.charAt(i)) { + return queueMgr.getQueue(name1.substring(lastPeriodIndex)); + } else if (name1.charAt(i) == '.') { + lastPeriodIndex = i; + } + } + return queue1; // names are identical + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/MaxRunningAppsEnforcer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/MaxRunningAppsEnforcer.java index 862e44d0b98..359519a2f29 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/MaxRunningAppsEnforcer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/MaxRunningAppsEnforcer.java @@ -105,26 +105,15 @@ public class MaxRunningAppsEnforcer { } /** - * Updates the relevant tracking variables after a runnable app with the given - * queue and user has been removed. Checks to see whether any other applications - * are now runnable and makes them so. + * Checks to see whether any other applications runnable now that the given + * application has been removed from the given queue. And makes them so. * * Runs in O(n log(n)) where n is the number of queues that are under the * highest queue that went from having no slack to having slack. */ - public void updateRunnabilityOnAppRemoval(FSSchedulerApp app) { + public void updateRunnabilityOnAppRemoval(FSSchedulerApp app, FSLeafQueue queue) { AllocationConfiguration allocConf = scheduler.getAllocationConfiguration(); - // Update usersRunnableApps - String user = app.getUser(); - int newUserNumRunning = usersNumRunnableApps.get(user) - 1; - if (newUserNumRunning == 0) { - usersNumRunnableApps.remove(user); - } else { - usersNumRunnableApps.put(user, newUserNumRunning); - } - - // Update runnable app bookkeeping for queues: // childqueueX might have no pending apps itself, but if a queue higher up // in the hierarchy parentqueueY has a maxRunningApps set, an app completion // in childqueueX could allow an app in some other distant child of @@ -133,16 +122,14 @@ public class MaxRunningAppsEnforcer { // the queue was already at its max before the removal. // Thus we find the ancestor queue highest in the tree for which the app // that was at its maxRunningApps before the removal. - FSLeafQueue queue = app.getQueue(); FSQueue highestQueueWithAppsNowRunnable = (queue.getNumRunnableApps() == allocConf.getQueueMaxApps(queue.getName()) - 1) ? queue : null; FSParentQueue parent = queue.getParent(); while (parent != null) { if (parent.getNumRunnableApps() == allocConf.getQueueMaxApps(parent - .getName())) { + .getName()) - 1) { highestQueueWithAppsNowRunnable = parent; } - parent.decrementRunnableApps(); parent = parent.getParent(); } @@ -157,7 +144,12 @@ public class MaxRunningAppsEnforcer { gatherPossiblyRunnableAppLists(highestQueueWithAppsNowRunnable, appsNowMaybeRunnable); } - if (newUserNumRunning == allocConf.getUserMaxApps(user) - 1) { + String user = app.getUser(); + Integer userNumRunning = usersNumRunnableApps.get(user); + if (userNumRunning == null) { + userNumRunning = 0; + } + if (userNumRunning == allocConf.getUserMaxApps(user) - 1) { List userWaitingApps = usersNonRunnableApps.get(user); if (userWaitingApps != null) { appsNowMaybeRunnable.add(userWaitingApps); @@ -208,6 +200,29 @@ public class MaxRunningAppsEnforcer { } } + /** + * Updates the relevant tracking variables after a runnable app with the given + * queue and user has been removed. + */ + public void untrackRunnableApp(FSSchedulerApp app) { + // Update usersRunnableApps + String user = app.getUser(); + int newUserNumRunning = usersNumRunnableApps.get(user) - 1; + if (newUserNumRunning == 0) { + usersNumRunnableApps.remove(user); + } else { + usersNumRunnableApps.put(user, newUserNumRunning); + } + + // Update runnable app bookkeeping for queues + FSLeafQueue queue = app.getQueue(); + FSParentQueue parent = queue.getParent(); + while (parent != null) { + parent.decrementRunnableApps(); + parent = parent.getParent(); + } + } + /** * Stops tracking the given non-runnable app */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 892438902de..a84db75c2d6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -22,6 +22,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -56,10 +57,12 @@ import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.AsyncDispatcher; +import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; @@ -2547,4 +2550,138 @@ public class TestFairScheduler { TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler( scheduler.getSchedulerApplications(), scheduler, "default"); } + + @Test + public void testMoveRunnableApp() throws Exception { + scheduler.reinitialize(conf, resourceManager.getRMContext()); + + QueueManager queueMgr = scheduler.getQueueManager(); + FSLeafQueue oldQueue = queueMgr.getLeafQueue("queue1", true); + FSLeafQueue targetQueue = queueMgr.getLeafQueue("queue2", true); + + ApplicationAttemptId appAttId = + createSchedulingRequest(1024, 1, "queue1", "user1", 3); + ApplicationId appId = appAttId.getApplicationId(); + RMNode node = MockNodes.newNodeInfo(1, Resources.createResource(1024)); + NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node); + NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node); + scheduler.handle(nodeEvent); + scheduler.handle(updateEvent); + + assertEquals(Resource.newInstance(1024, 1), oldQueue.getResourceUsage()); + scheduler.update(); + assertEquals(Resource.newInstance(3072, 3), oldQueue.getDemand()); + + scheduler.moveApplication(appId, "queue2"); + FSSchedulerApp app = scheduler.getSchedulerApp(appAttId); + assertSame(targetQueue, app.getQueue()); + assertFalse(oldQueue.getRunnableAppSchedulables() + .contains(app.getAppSchedulable())); + assertTrue(targetQueue.getRunnableAppSchedulables() + .contains(app.getAppSchedulable())); + assertEquals(Resource.newInstance(0, 0), oldQueue.getResourceUsage()); + assertEquals(Resource.newInstance(1024, 1), targetQueue.getResourceUsage()); + assertEquals(0, oldQueue.getNumRunnableApps()); + assertEquals(1, targetQueue.getNumRunnableApps()); + assertEquals(1, queueMgr.getRootQueue().getNumRunnableApps()); + + scheduler.update(); + assertEquals(Resource.newInstance(0, 0), oldQueue.getDemand()); + assertEquals(Resource.newInstance(3072, 3), targetQueue.getDemand()); + } + + @Test + public void testMoveNonRunnableApp() throws Exception { + scheduler.reinitialize(conf, resourceManager.getRMContext()); + + QueueManager queueMgr = scheduler.getQueueManager(); + FSLeafQueue oldQueue = queueMgr.getLeafQueue("queue1", true); + FSLeafQueue targetQueue = queueMgr.getLeafQueue("queue2", true); + scheduler.getAllocationConfiguration().queueMaxApps.put("root.queue1", 0); + scheduler.getAllocationConfiguration().queueMaxApps.put("root.queue2", 0); + + ApplicationAttemptId appAttId = + createSchedulingRequest(1024, 1, "queue1", "user1", 3); + + assertEquals(0, oldQueue.getNumRunnableApps()); + scheduler.moveApplication(appAttId.getApplicationId(), "queue2"); + assertEquals(0, oldQueue.getNumRunnableApps()); + assertEquals(0, targetQueue.getNumRunnableApps()); + assertEquals(0, queueMgr.getRootQueue().getNumRunnableApps()); + } + + @Test + public void testMoveMakesAppRunnable() throws Exception { + scheduler.reinitialize(conf, resourceManager.getRMContext()); + + QueueManager queueMgr = scheduler.getQueueManager(); + FSLeafQueue oldQueue = queueMgr.getLeafQueue("queue1", true); + FSLeafQueue targetQueue = queueMgr.getLeafQueue("queue2", true); + scheduler.getAllocationConfiguration().queueMaxApps.put("root.queue1", 0); + + ApplicationAttemptId appAttId = + createSchedulingRequest(1024, 1, "queue1", "user1", 3); + + FSSchedulerApp app = scheduler.getSchedulerApp(appAttId); + assertTrue(oldQueue.getNonRunnableAppSchedulables() + .contains(app.getAppSchedulable())); + + scheduler.moveApplication(appAttId.getApplicationId(), "queue2"); + assertFalse(oldQueue.getNonRunnableAppSchedulables() + .contains(app.getAppSchedulable())); + assertFalse(targetQueue.getNonRunnableAppSchedulables() + .contains(app.getAppSchedulable())); + assertTrue(targetQueue.getRunnableAppSchedulables() + .contains(app.getAppSchedulable())); + assertEquals(1, targetQueue.getNumRunnableApps()); + assertEquals(1, queueMgr.getRootQueue().getNumRunnableApps()); + } + + @Test (expected = YarnException.class) + public void testMoveWouldViolateMaxAppsConstraints() throws Exception { + scheduler.reinitialize(conf, resourceManager.getRMContext()); + + QueueManager queueMgr = scheduler.getQueueManager(); + queueMgr.getLeafQueue("queue2", true); + scheduler.getAllocationConfiguration().queueMaxApps.put("root.queue2", 0); + + ApplicationAttemptId appAttId = + createSchedulingRequest(1024, 1, "queue1", "user1", 3); + + scheduler.moveApplication(appAttId.getApplicationId(), "queue2"); + } + + @Test (expected = YarnException.class) + public void testMoveWouldViolateMaxResourcesConstraints() throws Exception { + scheduler.reinitialize(conf, resourceManager.getRMContext()); + + QueueManager queueMgr = scheduler.getQueueManager(); + FSLeafQueue oldQueue = queueMgr.getLeafQueue("queue1", true); + queueMgr.getLeafQueue("queue2", true); + scheduler.getAllocationConfiguration().maxQueueResources.put("root.queue2", + Resource.newInstance(1024, 1)); + + ApplicationAttemptId appAttId = + createSchedulingRequest(1024, 1, "queue1", "user1", 3); + RMNode node = MockNodes.newNodeInfo(1, Resources.createResource(2048, 2)); + NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node); + NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node); + scheduler.handle(nodeEvent); + scheduler.handle(updateEvent); + scheduler.handle(updateEvent); + + assertEquals(Resource.newInstance(2048, 2), oldQueue.getResourceUsage()); + scheduler.moveApplication(appAttId.getApplicationId(), "queue2"); + } + + @Test (expected = YarnException.class) + public void testMoveToNonexistentQueue() throws Exception { + scheduler.reinitialize(conf, resourceManager.getRMContext()); + + scheduler.getQueueManager().getLeafQueue("queue1", true); + + ApplicationAttemptId appAttId = + createSchedulingRequest(1024, 1, "queue1", "user1", 3); + scheduler.moveApplication(appAttId.getApplicationId(), "queue2"); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestMaxRunningAppsEnforcer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestMaxRunningAppsEnforcer.java index 51daeec42e3..c1866f01cd1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestMaxRunningAppsEnforcer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestMaxRunningAppsEnforcer.java @@ -77,7 +77,8 @@ public class TestMaxRunningAppsEnforcer { private void removeApp(FSSchedulerApp app) { app.getQueue().removeApp(app); - maxAppsEnforcer.updateRunnabilityOnAppRemoval(app); + maxAppsEnforcer.untrackRunnableApp(app); + maxAppsEnforcer.updateRunnabilityOnAppRemoval(app, app.getQueue()); } @Test diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm index 655de70c1d2..97436fb8e54 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm @@ -349,16 +349,20 @@ Queue Access Control Lists (ACLs) * {Administration} - The fair scheduler provides support for administration at runtime through two mechanisms: + The fair scheduler provides support for administration at runtime through a few mechanisms: - * It is possible to modify minimum shares, limits, weights, preemption timeouts - and queue scheduling policies at runtime by editing the allocation file. The - scheduler will reload this file 10-15 seconds after it sees that it was - modified. +Modifying configuration at runtime - * Current applications, queues, and fair shares can be examined through the - ResourceManager's web interface, at - http:///cluster/scheduler. + It is possible to modify minimum shares, limits, weights, preemption timeouts + and queue scheduling policies at runtime by editing the allocation file. The + scheduler will reload this file 10-15 seconds after it sees that it was + modified. + +Monitoring through web UI + + Current applications, queues, and fair shares can be examined through the + ResourceManager's web interface, at + http:///cluster/scheduler. The following fields can be seen for each queue on the web interface: @@ -382,3 +386,17 @@ Queue Access Control Lists (ACLs) In addition to the information that the ResourceManager normally displays about each application, the web interface includes the application's fair share. +Moving applications between queues + + The Fair Scheduler supports moving a running application to a different queue. + This can be useful for moving an important application to a higher priority + queue, or for moving an unimportant application to a lower priority queue. + Apps can be moved by running "yarn application -movetoqueue appID -queue + targetQueueName". + + When an application is moved to a queue, its existing allocations become + counted with the new queue's allocations instead of the old for purposes + of determining fairness. An attempt to move an application to a queue will + fail if the addition of the app's resources to that queue would violate the + its maxRunningApps or maxResources constraints. +