YARN-2359. Application hangs when it fails to launch AM container. (Zhihai Xu via kasha)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1616375 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Karthik Kambatla 2014-08-07 00:06:17 +00:00
parent efc73a0f13
commit 8feddc4c84
3 changed files with 37 additions and 1 deletions

View File

@ -137,6 +137,9 @@ Release 2.6.0 - UNRELEASED
YARN-2374. Fixed TestDistributedShell#testDSShell failure due to hostname
dismatch. (Varun Vasudev via jianhe)
YARN-2359. Application hangs when it fails to launch AM container.
(Zhihai Xu via kasha)
Release 2.5.0 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -217,7 +217,13 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
RMAppAttemptEventType.KILL,
new FinalSavingTransition(new BaseFinalTransition(
RMAppAttemptState.KILLED), RMAppAttemptState.KILLED))
.addTransition(RMAppAttemptState.SCHEDULED,
RMAppAttemptState.FINAL_SAVING,
RMAppAttemptEventType.CONTAINER_FINISHED,
new FinalSavingTransition(
new AMContainerCrashedBeforeRunningTransition(),
RMAppAttemptState.FAILED))
// Transitions from ALLOCATED_SAVING State
.addTransition(RMAppAttemptState.ALLOCATED_SAVING,
RMAppAttemptState.ALLOCATED,

View File

@ -89,6 +89,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAlloca
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
@ -782,6 +783,32 @@ public class TestRMAppAttemptTransitions {
testAppAttemptKilledState(null, EMPTY_DIAGNOSTICS);
}
@Test
public void testAMCrashAtScheduled() {
// This is to test sending CONTAINER_FINISHED event at SCHEDULED state.
// Verify the state transition is correct.
scheduleApplicationAttempt();
ContainerStatus cs =
SchedulerUtils.createAbnormalContainerStatus(
BuilderUtils.newContainerId(
applicationAttempt.getAppAttemptId(), 1),
SchedulerUtils.LOST_CONTAINER);
// send CONTAINER_FINISHED event at SCHEDULED state,
// The state should be FINAL_SAVING with previous state SCHEDULED
applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
applicationAttempt.getAppAttemptId(), cs));
// createApplicationAttemptState will return previous state (SCHEDULED),
// if the current state is FINAL_SAVING.
assertEquals(YarnApplicationAttemptState.SCHEDULED,
applicationAttempt.createApplicationAttemptState());
// send ATTEMPT_UPDATE_SAVED event,
// verify the state is changed to state FAILED.
sendAttemptUpdateSavedEvent(applicationAttempt);
assertEquals(RMAppAttemptState.FAILED,
applicationAttempt.getAppAttemptState());
verifyApplicationAttemptFinished(RMAppAttemptState.FAILED);
}
@Test
public void testAllocatedToKilled() {
Container amContainer = allocateApplicationAttempt();