YARN-206. TestApplicationCleanup.testContainerCleanup occasionally fails. (jlowe via jeagles)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1407607 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jonathan Turner Eagles 2012-11-09 19:55:37 +00:00
parent 8a5955f4ef
commit f7f9924ef8
2 changed files with 32 additions and 29 deletions

View File

@ -196,6 +196,9 @@ Release 0.23.5 - UNRELEASED
YARN-201. Fix CapacityScheduler to be less conservative for starved YARN-201. Fix CapacityScheduler to be less conservative for starved
off-switch requests. (jlowe via acmurthy) off-switch requests. (jlowe via acmurthy)
YARN-206. TestApplicationCleanup.testContainerCleanup occasionally fails.
(jlowe via jeagles)
Release 0.23.4 - UNRELEASED Release 0.23.4 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -81,38 +81,38 @@ public class TestApplicationCleanup {
new ArrayList<ContainerId>()).getAllocatedContainers(); new ArrayList<ContainerId>()).getAllocatedContainers();
int contReceived = conts.size(); int contReceived = conts.size();
int waitCount = 0; int waitCount = 0;
while (contReceived < request && waitCount++ < 20) { while (contReceived < request && waitCount++ < 200) {
LOG.info("Got " + contReceived + " containers. Waiting to get "
+ request);
Thread.sleep(100);
conts = am.allocate(new ArrayList<ResourceRequest>(), conts = am.allocate(new ArrayList<ResourceRequest>(),
new ArrayList<ContainerId>()).getAllocatedContainers(); new ArrayList<ContainerId>()).getAllocatedContainers();
contReceived += conts.size(); contReceived += conts.size();
LOG.info("Got " + contReceived + " containers. Waiting to get "
+ request);
Thread.sleep(2000);
} }
Assert.assertEquals(request, conts.size()); Assert.assertEquals(request, contReceived);
am.unregisterAppAttempt(); am.unregisterAppAttempt();
HeartbeatResponse resp = nm1.nodeHeartbeat(attempt.getAppAttemptId(), 1, HeartbeatResponse resp = nm1.nodeHeartbeat(attempt.getAppAttemptId(), 1,
ContainerState.COMPLETE); ContainerState.COMPLETE);
am.waitForState(RMAppAttemptState.FINISHED); am.waitForState(RMAppAttemptState.FINISHED);
int cleanedConts = 0;
int cleanedApps = 0;
List<ContainerId> contsToClean = null;
List<ApplicationId> apps = null;
//currently only containers are cleaned via this //currently only containers are cleaned via this
//AM container is cleaned via container launcher //AM container is cleaned via container launcher
resp = nm1.nodeHeartbeat(true);
List<ContainerId> contsToClean = resp.getContainersToCleanupList();
List<ApplicationId> apps = resp.getApplicationsToCleanupList();
int cleanedConts = contsToClean.size();
int cleanedApps = apps.size();
waitCount = 0; waitCount = 0;
while ((cleanedConts < 2 || cleanedApps < 1) && waitCount++ < 20) { while ((cleanedConts < 2 || cleanedApps < 1) && waitCount++ < 200) {
contsToClean = resp.getContainersToCleanupList();
apps = resp.getApplicationsToCleanupList();
LOG.info("Waiting to get cleanup events.. cleanedConts: " LOG.info("Waiting to get cleanup events.. cleanedConts: "
+ cleanedConts + " cleanedApps: " + cleanedApps); + cleanedConts + " cleanedApps: " + cleanedApps);
Thread.sleep(100);
resp = nm1.nodeHeartbeat(true);
contsToClean = resp.getContainersToCleanupList();
apps = resp.getApplicationsToCleanupList();
cleanedConts += contsToClean.size(); cleanedConts += contsToClean.size();
cleanedApps += apps.size(); cleanedApps += apps.size();
Thread.sleep(1000);
resp = nm1.nodeHeartbeat(true);
} }
Assert.assertEquals(1, apps.size()); Assert.assertEquals(1, apps.size());
@ -170,20 +170,20 @@ public class TestApplicationCleanup {
new ArrayList<ContainerId>()).getAllocatedContainers(); new ArrayList<ContainerId>()).getAllocatedContainers();
int contReceived = conts.size(); int contReceived = conts.size();
int waitCount = 0; int waitCount = 0;
while (contReceived < request && waitCount++ < 20) { while (contReceived < request && waitCount++ < 200) {
LOG.info("Got " + contReceived + " containers. Waiting to get "
+ request);
Thread.sleep(100);
conts = am.allocate(new ArrayList<ResourceRequest>(), conts = am.allocate(new ArrayList<ResourceRequest>(),
new ArrayList<ContainerId>()).getAllocatedContainers(); new ArrayList<ContainerId>()).getAllocatedContainers();
dispatcher.await(); dispatcher.await();
contReceived += conts.size(); contReceived += conts.size();
LOG.info("Got " + contReceived + " containers. Waiting to get "
+ request);
Thread.sleep(2000);
} }
Assert.assertEquals(request, conts.size()); Assert.assertEquals(request, contReceived);
// Release a container. // Release a container.
ArrayList<ContainerId> release = new ArrayList<ContainerId>(); ArrayList<ContainerId> release = new ArrayList<ContainerId>();
release.add(conts.get(1).getId()); release.add(conts.get(0).getId());
am.allocate(new ArrayList<ResourceRequest>(), release); am.allocate(new ArrayList<ResourceRequest>(), release);
dispatcher.await(); dispatcher.await();
@ -194,7 +194,7 @@ public class TestApplicationCleanup {
new HashMap<ApplicationId, List<ContainerStatus>>(); new HashMap<ApplicationId, List<ContainerStatus>>();
ArrayList<ContainerStatus> containerStatusList = ArrayList<ContainerStatus> containerStatusList =
new ArrayList<ContainerStatus>(); new ArrayList<ContainerStatus>();
containerStatusList.add(BuilderUtils.newContainerStatus(conts.get(1) containerStatusList.add(BuilderUtils.newContainerStatus(conts.get(0)
.getId(), ContainerState.RUNNING, "nothing", 0)); .getId(), ContainerState.RUNNING, "nothing", 0));
containerStatuses.put(app.getApplicationId(), containerStatusList); containerStatuses.put(app.getApplicationId(), containerStatusList);
@ -203,13 +203,13 @@ public class TestApplicationCleanup {
List<ContainerId> contsToClean = resp.getContainersToCleanupList(); List<ContainerId> contsToClean = resp.getContainersToCleanupList();
int cleanedConts = contsToClean.size(); int cleanedConts = contsToClean.size();
waitCount = 0; waitCount = 0;
while (cleanedConts < 1 && waitCount++ < 20) { while (cleanedConts < 1 && waitCount++ < 200) {
LOG.info("Waiting to get cleanup events.. cleanedConts: " + cleanedConts);
Thread.sleep(100);
resp = nm1.nodeHeartbeat(true); resp = nm1.nodeHeartbeat(true);
dispatcher.await(); dispatcher.await();
contsToClean = resp.getContainersToCleanupList(); contsToClean = resp.getContainersToCleanupList();
LOG.info("Waiting to get cleanup events.. cleanedConts: " + cleanedConts);
cleanedConts += contsToClean.size(); cleanedConts += contsToClean.size();
Thread.sleep(1000);
} }
LOG.info("Got cleanup for " + contsToClean.get(0)); LOG.info("Got cleanup for " + contsToClean.get(0));
Assert.assertEquals(1, cleanedConts); Assert.assertEquals(1, cleanedConts);
@ -220,7 +220,7 @@ public class TestApplicationCleanup {
+ "NM getting cleanup"); + "NM getting cleanup");
containerStatuses.clear(); containerStatuses.clear();
containerStatusList.clear(); containerStatusList.clear();
containerStatusList.add(BuilderUtils.newContainerStatus(conts.get(1) containerStatusList.add(BuilderUtils.newContainerStatus(conts.get(0)
.getId(), ContainerState.RUNNING, "nothing", 0)); .getId(), ContainerState.RUNNING, "nothing", 0));
containerStatuses.put(app.getApplicationId(), containerStatusList); containerStatuses.put(app.getApplicationId(), containerStatusList);
@ -231,13 +231,13 @@ public class TestApplicationCleanup {
// The cleanup list won't be instantaneous as it is given out by scheduler // The cleanup list won't be instantaneous as it is given out by scheduler
// and not RMNodeImpl. // and not RMNodeImpl.
waitCount = 0; waitCount = 0;
while (cleanedConts < 1 && waitCount++ < 20) { while (cleanedConts < 1 && waitCount++ < 200) {
LOG.info("Waiting to get cleanup events.. cleanedConts: " + cleanedConts);
Thread.sleep(100);
resp = nm1.nodeHeartbeat(true); resp = nm1.nodeHeartbeat(true);
dispatcher.await(); dispatcher.await();
contsToClean = resp.getContainersToCleanupList(); contsToClean = resp.getContainersToCleanupList();
LOG.info("Waiting to get cleanup events.. cleanedConts: " + cleanedConts);
cleanedConts += contsToClean.size(); cleanedConts += contsToClean.size();
Thread.sleep(1000);
} }
LOG.info("Got cleanup for " + contsToClean.get(0)); LOG.info("Got cleanup for " + contsToClean.get(0));
Assert.assertEquals(1, cleanedConts); Assert.assertEquals(1, cleanedConts);