diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index ad433648b57..026d54b2d8e 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -211,6 +211,9 @@ Release 2.6.0 - UNRELEASED YARN-2424. LCE should support non-cgroups, non-secure mode (Chris Douglas via aw) + YARN-2434. RM should not recover containers from previously failed attempt + when AM restart is not enabled (Jian He via jlowe) + Release 2.5.0 - 2014-08-11 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java index 72ee7dbbe0a..ab56bb97212 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java @@ -273,6 +273,19 @@ public abstract class AbstractYarnScheduler SchedulerApplicationAttempt schedulerAttempt = schedulerApp.getCurrentAppAttempt(); + if (!rmApp.getApplicationSubmissionContext() + .getKeepContainersAcrossApplicationAttempts()) { + // Do not recover containers for stopped attempt or previous attempt. + if (schedulerAttempt.isStopped() + || !schedulerAttempt.getApplicationAttemptId().equals( + container.getContainerId().getApplicationAttemptId())) { + LOG.info("Skip recovering container " + container + + " for already stopped attempt."); + killOrphanContainerOnNode(nm, container); + continue; + } + } + // create container RMContainer rmContainer = recoverAndCreateContainer(container, nm); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java index df64d4c32d2..d6af0d7307e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java @@ -513,6 +513,19 @@ public class TestWorkPreservingRMRestart { // just-recovered containers. assertNull(scheduler.getRMContainer(runningContainer.getContainerId())); assertNull(scheduler.getRMContainer(completedContainer.getContainerId())); + + rm2.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 2, nm1); + + MockNM nm2 = + new MockNM("127.1.1.1:4321", 8192, rm2.getResourceTrackerService()); + NMContainerStatus previousAttemptContainer = + TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 4, + ContainerState.RUNNING); + nm2.registerNode(Arrays.asList(previousAttemptContainer), null); + // Wait for RM to settle down on recovering containers; + Thread.sleep(3000); + // check containers from previous failed attempt should not be recovered. + assertNull(scheduler.getRMContainer(previousAttemptContainer.getContainerId())); } // Apps already completed before RM restart. Restarted RM scheduler should not