YARN-2354. DistributedShell may allocate more containers than client specified after AM restarts. Contributed by Li Lu

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1614538 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jian He 2014-07-30 03:58:59 +00:00
parent c0b49ff107
commit 7e54b1c6d9
3 changed files with 13 additions and 7 deletions

View File

@ -111,6 +111,9 @@ Release 2.6.0 - UNRELEASED
YARN-1796. container-executor shouldn't require o-r permissions (atm)
YARN-2354. DistributedShell may allocate more containers than client
specified after AM restarts. (Li Lu via jianhe)
Release 2.5.0 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -208,7 +208,8 @@ public static enum DSEntity {
// App Master configuration
// No. of containers to run shell command on
private int numTotalContainers = 1;
@VisibleForTesting
protected int numTotalContainers = 1;
// Memory to request for the container on which the shell command will run
private int containerMemory = 10;
// VirtualCores to request for the container on which the shell command will run
@ -594,8 +595,8 @@ public void run() throws YarnException, IOException {
List<Container> previousAMRunningContainers =
response.getContainersFromPreviousAttempts();
LOG.info("Received " + previousAMRunningContainers.size()
+ " previous AM's running containers on AM registration.");
LOG.info(appAttemptID + " received " + previousAMRunningContainers.size()
+ " previous attempts' running containers on AM registration.");
numAllocatedContainers.addAndGet(previousAMRunningContainers.size());
int numTotalContainersToRequest =
@ -610,7 +611,7 @@ public void run() throws YarnException, IOException {
ContainerRequest containerAsk = setupContainerAskForRM();
amRMClient.addContainerRequest(containerAsk);
}
numRequestedContainers.set(numTotalContainersToRequest);
numRequestedContainers.set(numTotalContainers);
try {
publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(),
DSEvent.DS_APP_ATTEMPT_END);
@ -689,7 +690,7 @@ public void onContainersCompleted(List<ContainerStatus> completedContainers) {
LOG.info("Got response from RM for container ask, completedCnt="
+ completedContainers.size());
for (ContainerStatus containerStatus : completedContainers) {
LOG.info("Got container status for containerID="
LOG.info(appAttemptID + " got container status for containerID="
+ containerStatus.getContainerId() + ", state="
+ containerStatus.getState() + ", exitStatus="
+ containerStatus.getExitStatus() + ", diagnostics="

View File

@ -36,9 +36,11 @@ public void run() throws YarnException, IOException {
if (appAttemptID.getAttemptId() == 2) {
// should reuse the earlier running container, so numAllocatedContainers
// should be set to 1. And should ask no more containers, so
// numRequestedContainers should be set to 0.
// numRequestedContainers should be the same as numTotalContainers.
// The only container is the container requested by the AM in the first
// attempt.
if (numAllocatedContainers.get() != 1
|| numRequestedContainers.get() != 0) {
|| numRequestedContainers.get() != numTotalContainers) {
LOG.info("NumAllocatedContainers is " + numAllocatedContainers.get()
+ " and NumRequestedContainers is " + numAllocatedContainers.get()
+ ".Application Master failed. exiting");