From ea94980477c57d84945a33cdea2eb28af7ccecbc Mon Sep 17 00:00:00 2001 From: Jian He Date: Wed, 30 Jul 2014 04:00:54 +0000 Subject: [PATCH] Merge r1614538 from trunk. YARN-2354. DistributedShell may allocate more containers than client specified after AM restarts. Contributed by Li Lu git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1614539 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../distributedshell/ApplicationMaster.java | 11 ++++++----- .../distributedshell/TestDSFailedAppMaster.java | 6 ++++-- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index d6c026039fe..cb07e22ebe1 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -93,6 +93,9 @@ Release 2.6.0 - UNRELEASED YARN-1796. container-executor shouldn't require o-r permissions (atm) + YARN-2354. DistributedShell may allocate more containers than client + specified after AM restarts. (Li Lu via jianhe) + Release 2.5.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java index 5e1cbbcd932..9051d31089f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java @@ -208,7 +208,8 @@ public static enum DSEntity { // App Master configuration // No. of containers to run shell command on - private int numTotalContainers = 1; + @VisibleForTesting + protected int numTotalContainers = 1; // Memory to request for the container on which the shell command will run private int containerMemory = 10; // VirtualCores to request for the container on which the shell command will run @@ -594,8 +595,8 @@ public void run() throws YarnException, IOException { List previousAMRunningContainers = response.getContainersFromPreviousAttempts(); - LOG.info("Received " + previousAMRunningContainers.size() - + " previous AM's running containers on AM registration."); + LOG.info(appAttemptID + " received " + previousAMRunningContainers.size() + + " previous attempts' running containers on AM registration."); numAllocatedContainers.addAndGet(previousAMRunningContainers.size()); int numTotalContainersToRequest = @@ -610,7 +611,7 @@ public void run() throws YarnException, IOException { ContainerRequest containerAsk = setupContainerAskForRM(); amRMClient.addContainerRequest(containerAsk); } - numRequestedContainers.set(numTotalContainersToRequest); + numRequestedContainers.set(numTotalContainers); try { publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_END); @@ -689,7 +690,7 @@ public void onContainersCompleted(List completedContainers) { LOG.info("Got response from RM for container ask, completedCnt=" + completedContainers.size()); for (ContainerStatus containerStatus : completedContainers) { - LOG.info("Got container status for containerID=" + LOG.info(appAttemptID + " got container status for containerID=" + containerStatus.getContainerId() + ", state=" + containerStatus.getState() + ", exitStatus=" + containerStatus.getExitStatus() + ", diagnostics=" diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSFailedAppMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSFailedAppMaster.java index db7419bc8e6..f3ab4b7538b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSFailedAppMaster.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSFailedAppMaster.java @@ -36,9 +36,11 @@ public void run() throws YarnException, IOException { if (appAttemptID.getAttemptId() == 2) { // should reuse the earlier running container, so numAllocatedContainers // should be set to 1. And should ask no more containers, so - // numRequestedContainers should be set to 0. + // numRequestedContainers should be the same as numTotalContainers. + // The only container is the container requested by the AM in the first + // attempt. if (numAllocatedContainers.get() != 1 - || numRequestedContainers.get() != 0) { + || numRequestedContainers.get() != numTotalContainers) { LOG.info("NumAllocatedContainers is " + numAllocatedContainers.get() + " and NumRequestedContainers is " + numAllocatedContainers.get() + ".Application Master failed. exiting");