YARN-2354. DistributedShell may allocate more containers than client specified after AM restarts. Contributed by Li Lu
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1614538 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c0b49ff107
commit
7e54b1c6d9
|
@ -111,6 +111,9 @@ Release 2.6.0 - UNRELEASED
|
||||||
|
|
||||||
YARN-1796. container-executor shouldn't require o-r permissions (atm)
|
YARN-1796. container-executor shouldn't require o-r permissions (atm)
|
||||||
|
|
||||||
|
YARN-2354. DistributedShell may allocate more containers than client
|
||||||
|
specified after AM restarts. (Li Lu via jianhe)
|
||||||
|
|
||||||
Release 2.5.0 - UNRELEASED
|
Release 2.5.0 - UNRELEASED
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -208,7 +208,8 @@ public class ApplicationMaster {
|
||||||
|
|
||||||
// App Master configuration
|
// App Master configuration
|
||||||
// No. of containers to run shell command on
|
// No. of containers to run shell command on
|
||||||
private int numTotalContainers = 1;
|
@VisibleForTesting
|
||||||
|
protected int numTotalContainers = 1;
|
||||||
// Memory to request for the container on which the shell command will run
|
// Memory to request for the container on which the shell command will run
|
||||||
private int containerMemory = 10;
|
private int containerMemory = 10;
|
||||||
// VirtualCores to request for the container on which the shell command will run
|
// VirtualCores to request for the container on which the shell command will run
|
||||||
|
@ -594,8 +595,8 @@ public class ApplicationMaster {
|
||||||
|
|
||||||
List<Container> previousAMRunningContainers =
|
List<Container> previousAMRunningContainers =
|
||||||
response.getContainersFromPreviousAttempts();
|
response.getContainersFromPreviousAttempts();
|
||||||
LOG.info("Received " + previousAMRunningContainers.size()
|
LOG.info(appAttemptID + " received " + previousAMRunningContainers.size()
|
||||||
+ " previous AM's running containers on AM registration.");
|
+ " previous attempts' running containers on AM registration.");
|
||||||
numAllocatedContainers.addAndGet(previousAMRunningContainers.size());
|
numAllocatedContainers.addAndGet(previousAMRunningContainers.size());
|
||||||
|
|
||||||
int numTotalContainersToRequest =
|
int numTotalContainersToRequest =
|
||||||
|
@ -610,7 +611,7 @@ public class ApplicationMaster {
|
||||||
ContainerRequest containerAsk = setupContainerAskForRM();
|
ContainerRequest containerAsk = setupContainerAskForRM();
|
||||||
amRMClient.addContainerRequest(containerAsk);
|
amRMClient.addContainerRequest(containerAsk);
|
||||||
}
|
}
|
||||||
numRequestedContainers.set(numTotalContainersToRequest);
|
numRequestedContainers.set(numTotalContainers);
|
||||||
try {
|
try {
|
||||||
publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(),
|
publishApplicationAttemptEvent(timelineClient, appAttemptID.toString(),
|
||||||
DSEvent.DS_APP_ATTEMPT_END);
|
DSEvent.DS_APP_ATTEMPT_END);
|
||||||
|
@ -689,7 +690,7 @@ public class ApplicationMaster {
|
||||||
LOG.info("Got response from RM for container ask, completedCnt="
|
LOG.info("Got response from RM for container ask, completedCnt="
|
||||||
+ completedContainers.size());
|
+ completedContainers.size());
|
||||||
for (ContainerStatus containerStatus : completedContainers) {
|
for (ContainerStatus containerStatus : completedContainers) {
|
||||||
LOG.info("Got container status for containerID="
|
LOG.info(appAttemptID + " got container status for containerID="
|
||||||
+ containerStatus.getContainerId() + ", state="
|
+ containerStatus.getContainerId() + ", state="
|
||||||
+ containerStatus.getState() + ", exitStatus="
|
+ containerStatus.getState() + ", exitStatus="
|
||||||
+ containerStatus.getExitStatus() + ", diagnostics="
|
+ containerStatus.getExitStatus() + ", diagnostics="
|
||||||
|
|
|
@ -36,9 +36,11 @@ public class TestDSFailedAppMaster extends ApplicationMaster {
|
||||||
if (appAttemptID.getAttemptId() == 2) {
|
if (appAttemptID.getAttemptId() == 2) {
|
||||||
// should reuse the earlier running container, so numAllocatedContainers
|
// should reuse the earlier running container, so numAllocatedContainers
|
||||||
// should be set to 1. And should ask no more containers, so
|
// should be set to 1. And should ask no more containers, so
|
||||||
// numRequestedContainers should be set to 0.
|
// numRequestedContainers should be the same as numTotalContainers.
|
||||||
|
// The only container is the container requested by the AM in the first
|
||||||
|
// attempt.
|
||||||
if (numAllocatedContainers.get() != 1
|
if (numAllocatedContainers.get() != 1
|
||||||
|| numRequestedContainers.get() != 0) {
|
|| numRequestedContainers.get() != numTotalContainers) {
|
||||||
LOG.info("NumAllocatedContainers is " + numAllocatedContainers.get()
|
LOG.info("NumAllocatedContainers is " + numAllocatedContainers.get()
|
||||||
+ " and NumRequestedContainers is " + numAllocatedContainers.get()
|
+ " and NumRequestedContainers is " + numAllocatedContainers.get()
|
||||||
+ ".Application Master failed. exiting");
|
+ ".Application Master failed. exiting");
|
||||||
|
|
Loading…
Reference in New Issue