diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java index bc988c9e705..0686bc256e3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java @@ -766,8 +766,18 @@ public class FSAppAttempt extends SchedulerApplicationAttempt // The desired container won't fit here, so reserve if (isReservable(capability) && reserve(request, node, reservedContainer, type, schedulerKey)) { + if (isWaitingForAMContainer()) { + updateAMDiagnosticMsg(capability, + " exceed the available resources of the node and the request is" + + " reserved"); + } return FairScheduler.CONTAINER_RESERVED; } else { + if (isWaitingForAMContainer()) { + updateAMDiagnosticMsg(capability, + " exceed the available resources of the node and the request cannot" + + " be reserved"); + } if (LOG.isDebugEnabled()) { LOG.debug("Couldn't creating reservation for " + getName() + ",at priority " + request.getPriority()); @@ -920,23 +930,31 @@ public class FSAppAttempt extends SchedulerApplicationAttempt ResourceRequest rackRequest = getResourceRequest(key, node.getRackName()); ResourceRequest nodeRequest = getResourceRequest(key, node.getNodeName()); - return - // There must be outstanding requests at the given priority: + boolean ret = true; + if (!(// There must be outstanding requests at the given priority: anyRequest != null && anyRequest.getNumContainers() > 0 && - // If locality relaxation is turned off at *-level, there must be a - // non-zero request for the node's rack: - (anyRequest.getRelaxLocality() || - (rackRequest != null && rackRequest.getNumContainers() > 0)) && - // If locality relaxation is turned off at rack-level, there must be a - // non-zero request at the node: - (rackRequest == null || rackRequest.getRelaxLocality() || - (nodeRequest != null && nodeRequest.getNumContainers() > 0)) && - // The requested container must be able to fit on the node: - Resources.lessThanOrEqual(RESOURCE_CALCULATOR, null, - anyRequest.getCapability(), - node.getRMNode().getTotalCapability()) && - // The requested container must fit in queue maximum share: - getQueue().fitsInMaxShare(anyRequest.getCapability()); + // If locality relaxation is turned off at *-level, there must be a + // non-zero request for the node's rack: + (anyRequest.getRelaxLocality() || + (rackRequest != null && rackRequest.getNumContainers() > 0)) && + // If locality relaxation is turned off at rack-level, there must be a + // non-zero request at the node: + (rackRequest == null || rackRequest.getRelaxLocality() || + (nodeRequest != null && nodeRequest.getNumContainers() > 0)) && + // The requested container must be able to fit on the node: + Resources.lessThanOrEqual(RESOURCE_CALCULATOR, null, + anyRequest.getCapability(), node.getRMNode().getTotalCapability()))) { + ret = false; + } else if (!getQueue().fitsInMaxShare(anyRequest.getCapability())) { + // The requested container must fit in queue maximum share + if (isWaitingForAMContainer()) { + updateAMDiagnosticMsg(anyRequest.getCapability(), + " exceeds current queue or its parents maximum resource allowed)."); + } + ret = false; + } + + return ret; } private boolean isValidReservation(FSSchedulerNode node) { @@ -1083,6 +1101,12 @@ public class FSAppAttempt extends SchedulerApplicationAttempt @Override public Resource assignContainer(FSSchedulerNode node) { if (isOverAMShareLimit()) { + if (isWaitingForAMContainer()) { + List ask = appSchedulingInfo.getAllResourceRequests(); + updateAMDiagnosticMsg(ask.get(0).getCapability(), " exceeds maximum " + + "AM resource allowed)."); + } + if (LOG.isDebugEnabled()) { LOG.debug("Skipping allocation because maxAMShare limit would " + "be exceeded"); @@ -1092,6 +1116,21 @@ public class FSAppAttempt extends SchedulerApplicationAttempt return assignContainer(node, false); } + /** + * Build the diagnostic message and update it. + * + * @param resource resource request + * @param reason the reason why AM doesn't get the resource + */ + private void updateAMDiagnosticMsg(Resource resource, String reason) { + StringBuilder diagnosticMessageBldr = new StringBuilder(); + diagnosticMessageBldr.append(" (Resource request: "); + diagnosticMessageBldr.append(resource); + diagnosticMessageBldr.append(reason); + updateAMContainerDiagnostics(AMState.INACTIVATED, + diagnosticMessageBldr.toString()); + } + /** * Preempt a running container according to the priority */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 94fdb7c37d5..1d04710ceb1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -710,7 +710,7 @@ public class FairScheduler extends } application.setCurrentAppAttempt(attempt); - boolean runnable = maxRunningEnforcer.canAppBeRunnable(queue, user); + boolean runnable = maxRunningEnforcer.canAppBeRunnable(queue, attempt); queue.addApp(attempt, runnable); if (runnable) { maxRunningEnforcer.trackRunnableApp(attempt); @@ -1714,7 +1714,7 @@ public class FairScheduler extends boolean wasRunnable = oldQueue.removeApp(attempt); // if app was not runnable before, it may be runnable now boolean nowRunnable = maxRunningEnforcer.canAppBeRunnable(newQueue, - attempt.getUser()); + attempt); if (wasRunnable && !nowRunnable) { throw new IllegalStateException("Should have already verified that app " + attempt.getApplicationId() + " would be runnable in new queue"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/MaxRunningAppsEnforcer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/MaxRunningAppsEnforcer.java index 8592fa68740..02e2d97aaf3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/MaxRunningAppsEnforcer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/MaxRunningAppsEnforcer.java @@ -30,6 +30,7 @@ import org.apache.commons.logging.LogFactory; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.ListMultimap; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.AMState; /** * Handles tracking and enforcement for user and queue maxRunningApps @@ -54,25 +55,64 @@ public class MaxRunningAppsEnforcer { /** * Checks whether making the application runnable would exceed any * maxRunningApps limits. + * + * @param queue the current queue + * @param attempt the app attempt being checked + * @return true if the application is runnable; false otherwise */ - public boolean canAppBeRunnable(FSQueue queue, String user) { + public boolean canAppBeRunnable(FSQueue queue, FSAppAttempt attempt) { + boolean ret = true; + if (exceedUserMaxApps(attempt.getUser())) { + attempt.updateAMContainerDiagnostics(AMState.INACTIVATED, + "The user \"" + attempt.getUser() + "\" has reached the maximum limit" + + " of runnable applications."); + ret = false; + } else if (exceedQueueMaxRunningApps(queue)) { + attempt.updateAMContainerDiagnostics(AMState.INACTIVATED, + "The queue \"" + queue.getName() + "\" has reached the maximum limit" + + " of runnable applications."); + ret = false; + } + + return ret; + } + + /** + * Checks whether the number of user runnable apps exceeds the limitation. + * + * @param user the user name + * @return true if the number hits the limit; false otherwise + */ + public boolean exceedUserMaxApps(String user) { AllocationConfiguration allocConf = scheduler.getAllocationConfiguration(); Integer userNumRunnable = usersNumRunnableApps.get(user); if (userNumRunnable == null) { userNumRunnable = 0; } if (userNumRunnable >= allocConf.getUserMaxApps(user)) { - return false; + return true; } + + return false; + } + + /** + * Recursively checks whether the number of queue runnable apps exceeds the + * limitation. + * + * @param queue the current queue + * @return true if the number hits the limit; false otherwise + */ + public boolean exceedQueueMaxRunningApps(FSQueue queue) { // Check queue and all parent queues while (queue != null) { if (queue.getNumRunnableApps() >= queue.getMaxRunningApps()) { - return false; + return true; } queue = queue.getParent(); } - return true; + return false; } /** @@ -198,7 +238,7 @@ public class MaxRunningAppsEnforcer { continue; } - if (canAppBeRunnable(next.getQueue(), next.getUser())) { + if (canAppBeRunnable(next.getQueue(), next)) { trackRunnableApp(next); FSAppAttempt appSched = next; next.getQueue().addApp(appSched, true); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestMaxRunningAppsEnforcer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestMaxRunningAppsEnforcer.java index 3f17081bc2b..f77df1ea399 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestMaxRunningAppsEnforcer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestMaxRunningAppsEnforcer.java @@ -68,9 +68,9 @@ public class TestMaxRunningAppsEnforcer { private FSAppAttempt addApp(FSLeafQueue queue, String user) { ApplicationId appId = ApplicationId.newInstance(0l, appNum++); ApplicationAttemptId attId = ApplicationAttemptId.newInstance(appId, 0); - boolean runnable = maxAppsEnforcer.canAppBeRunnable(queue, user); FSAppAttempt app = new FSAppAttempt(scheduler, attId, user, queue, null, rmContext); + boolean runnable = maxAppsEnforcer.canAppBeRunnable(queue, app); queue.addApp(app, runnable); if (runnable) { maxAppsEnforcer.trackRunnableApp(app);