svn merge -c 1619614 FIXES: YARN-2434. RM should not recover containers from previously failed attempt when AM restart is not enabled. Contributed by Jian He
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1619616 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
23b4c0b6c1
commit
fab20109ae
|
@ -211,6 +211,9 @@ Release 2.6.0 - UNRELEASED
|
||||||
YARN-2424. LCE should support non-cgroups, non-secure mode (Chris Douglas
|
YARN-2424. LCE should support non-cgroups, non-secure mode (Chris Douglas
|
||||||
via aw)
|
via aw)
|
||||||
|
|
||||||
|
YARN-2434. RM should not recover containers from previously failed attempt
|
||||||
|
when AM restart is not enabled (Jian He via jlowe)
|
||||||
|
|
||||||
Release 2.5.0 - 2014-08-11
|
Release 2.5.0 - 2014-08-11
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -273,6 +273,19 @@ public abstract class AbstractYarnScheduler
|
||||||
SchedulerApplicationAttempt schedulerAttempt =
|
SchedulerApplicationAttempt schedulerAttempt =
|
||||||
schedulerApp.getCurrentAppAttempt();
|
schedulerApp.getCurrentAppAttempt();
|
||||||
|
|
||||||
|
if (!rmApp.getApplicationSubmissionContext()
|
||||||
|
.getKeepContainersAcrossApplicationAttempts()) {
|
||||||
|
// Do not recover containers for stopped attempt or previous attempt.
|
||||||
|
if (schedulerAttempt.isStopped()
|
||||||
|
|| !schedulerAttempt.getApplicationAttemptId().equals(
|
||||||
|
container.getContainerId().getApplicationAttemptId())) {
|
||||||
|
LOG.info("Skip recovering container " + container
|
||||||
|
+ " for already stopped attempt.");
|
||||||
|
killOrphanContainerOnNode(nm, container);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// create container
|
// create container
|
||||||
RMContainer rmContainer = recoverAndCreateContainer(container, nm);
|
RMContainer rmContainer = recoverAndCreateContainer(container, nm);
|
||||||
|
|
||||||
|
|
|
@ -513,6 +513,19 @@ public class TestWorkPreservingRMRestart {
|
||||||
// just-recovered containers.
|
// just-recovered containers.
|
||||||
assertNull(scheduler.getRMContainer(runningContainer.getContainerId()));
|
assertNull(scheduler.getRMContainer(runningContainer.getContainerId()));
|
||||||
assertNull(scheduler.getRMContainer(completedContainer.getContainerId()));
|
assertNull(scheduler.getRMContainer(completedContainer.getContainerId()));
|
||||||
|
|
||||||
|
rm2.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 2, nm1);
|
||||||
|
|
||||||
|
MockNM nm2 =
|
||||||
|
new MockNM("127.1.1.1:4321", 8192, rm2.getResourceTrackerService());
|
||||||
|
NMContainerStatus previousAttemptContainer =
|
||||||
|
TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 4,
|
||||||
|
ContainerState.RUNNING);
|
||||||
|
nm2.registerNode(Arrays.asList(previousAttemptContainer), null);
|
||||||
|
// Wait for RM to settle down on recovering containers;
|
||||||
|
Thread.sleep(3000);
|
||||||
|
// check containers from previous failed attempt should not be recovered.
|
||||||
|
assertNull(scheduler.getRMContainer(previousAttemptContainer.getContainerId()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apps already completed before RM restart. Restarted RM scheduler should not
|
// Apps already completed before RM restart. Restarted RM scheduler should not
|
||||||
|
|
Loading…
Reference in New Issue