YARN-2354. DistributedShell may allocate more containers than client specified after AM restarts. Contributed by Li Lu

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1614538 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jian He 2014-07-30 03:58:59 +00:00
parent c0b49ff107
commit 7e54b1c6d9
3 changed files with 13 additions and 7 deletions

View File

@ -111,6 +111,9 @@ Release 2.6.0 - UNRELEASED
YARN-1796. container-executor shouldn't require o-r permissions (atm) YARN-1796. container-executor shouldn't require o-r permissions (atm)
YARN-2354. DistributedShell may allocate more containers than client
specified after AM restarts. (Li Lu via jianhe)
Release 2.5.0 - UNRELEASED Release 2.5.0 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -208,7 +208,8 @@ public class ApplicationMaster {
// App Master configuration // App Master configuration
// No. of containers to run shell command on // No. of containers to run shell command on
private int numTotalContainers = 1; @VisibleForTesting
protected int numTotalContainers = 1;
// Memory to request for the container on which the shell command will run // Memory to request for the container on which the shell command will run
private int containerMemory = 10; private int containerMemory = 10;
// VirtualCores to request for the container on which the shell command will run // VirtualCores to request for the container on which the shell command will run
@ -594,8 +595,8 @@ public class ApplicationMaster {
List<Container> previousAMRunningContainers = List<Container> previousAMRunningContainers =
response.getContainersFromPreviousAttempts(); response.getContainersFromPreviousAttempts();
LOG.info("Received " + previousAMRunningContainers.size() LOG.info(appAttemptID + " received " + previousAMRunningContainers.size()
+ " previous AM's running containers on AM registration."); + " previous attempts' running containers on AM registration.");
numAllocatedContainers.addAndGet(previousAMRunningContainers.size()); numAllocatedContainers.addAndGet(previousAMRunningContainers.size());
int numTotalContainersToRequest = int numTotalContainersToRequest =
@ -610,7 +611,7 @@ public class ApplicationMaster {
ContainerRequest containerAsk = setupContainerAskForRM(); ContainerRequest containerAsk = setupContainerAskForRM();
amRMClient.addContainerRequest(containerAsk); amRMClient.addContainerRequest(containerAsk);
} }
numRequestedContainers.set(numTotalContainersToRequest); numRequestedContainers.set(numTotalContainers);
try { try {
publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(), publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(),
DSEvent.DS_APP_ATTEMPT_END); DSEvent.DS_APP_ATTEMPT_END);
@ -689,7 +690,7 @@ public class ApplicationMaster {
LOG.info("Got response from RM for container ask, completedCnt=" LOG.info("Got response from RM for container ask, completedCnt="
+ completedContainers.size()); + completedContainers.size());
for (ContainerStatus containerStatus : completedContainers) { for (ContainerStatus containerStatus : completedContainers) {
LOG.info("Got container status for containerID=" LOG.info(appAttemptID + " got container status for containerID="
+ containerStatus.getContainerId() + ", state=" + containerStatus.getContainerId() + ", state="
+ containerStatus.getState() + ", exitStatus=" + containerStatus.getState() + ", exitStatus="
+ containerStatus.getExitStatus() + ", diagnostics=" + containerStatus.getExitStatus() + ", diagnostics="

View File

@ -36,9 +36,11 @@ public class TestDSFailedAppMaster extends ApplicationMaster {
if (appAttemptID.getAttemptId() == 2) { if (appAttemptID.getAttemptId() == 2) {
// should reuse the earlier running container, so numAllocatedContainers // should reuse the earlier running container, so numAllocatedContainers
// should be set to 1. And should ask no more containers, so // should be set to 1. And should ask no more containers, so
// numRequestedContainers should be set to 0. // numRequestedContainers should be the same as numTotalContainers.
// The only container is the container requested by the AM in the first
// attempt.
if (numAllocatedContainers.get() != 1 if (numAllocatedContainers.get() != 1
|| numRequestedContainers.get() != 0) { || numRequestedContainers.get() != numTotalContainers) {
LOG.info("NumAllocatedContainers is " + numAllocatedContainers.get() LOG.info("NumAllocatedContainers is " + numAllocatedContainers.get()
+ " and NumRequestedContainers is " + numAllocatedContainers.get() + " and NumRequestedContainers is " + numAllocatedContainers.get()
+ ".Application Master failed. exiting"); + ".Application Master failed. exiting");