YARN-9194. Invalid event: REGISTERED and LAUNCH_FAILED at FAILED, and NullPointerException happens in RM while shutdown a NM. (lujie via wangda)
Change-Id: I4359f59a73a278a941f4bb9d106dd38c9cb471fe
(cherry picked from commit 6d7eedfd28
)
This commit is contained in:
parent
4aee7946dc
commit
fe7cb2d84a
|
@ -437,9 +437,11 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|||
RMAppAttemptState.FAILED,
|
||||
EnumSet.of(
|
||||
RMAppAttemptEventType.LAUNCHED,
|
||||
RMAppAttemptEventType.LAUNCH_FAILED,
|
||||
RMAppAttemptEventType.EXPIRE,
|
||||
RMAppAttemptEventType.KILL,
|
||||
RMAppAttemptEventType.FAIL,
|
||||
RMAppAttemptEventType.REGISTERED,
|
||||
RMAppAttemptEventType.UNREGISTERED,
|
||||
RMAppAttemptEventType.STATUS_UPDATE,
|
||||
RMAppAttemptEventType.CONTAINER_ALLOCATED))
|
||||
|
@ -1203,10 +1205,16 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|||
}
|
||||
|
||||
// Set the masterContainer
|
||||
appAttempt.setMasterContainer(amContainerAllocation.getContainers()
|
||||
.get(0));
|
||||
Container amContainer = amContainerAllocation.getContainers().get(0);
|
||||
RMContainerImpl rmMasterContainer = (RMContainerImpl)appAttempt.scheduler
|
||||
.getRMContainer(appAttempt.getMasterContainer().getId());
|
||||
.getRMContainer(amContainer.getId());
|
||||
//while one NM is removed, the scheduler will clean the container,the
|
||||
//following CONTAINER_FINISHED event will handle the cleaned container.
|
||||
//so just return RMAppAttemptState.SCHEDULED
|
||||
if (rmMasterContainer == null) {
|
||||
return RMAppAttemptState.SCHEDULED;
|
||||
}
|
||||
appAttempt.setMasterContainer(amContainer);
|
||||
rmMasterContainer.setAMContainer(true);
|
||||
// The node set in NMTokenSecrentManager is used for marking whether the
|
||||
// NMToken has been issued for this node to the AM.
|
||||
|
|
|
@ -986,7 +986,7 @@ public class TestRMAppAttemptTransitions {
|
|||
public void testAttemptAddedAtFinalSaving() {
|
||||
submitApplicationAttempt();
|
||||
|
||||
// SUBNITED->FINAL_SAVING
|
||||
// SUBMITTED->FINAL_SAVING
|
||||
applicationAttempt.handle(new RMAppAttemptEvent(applicationAttempt
|
||||
.getAppAttemptId(), RMAppAttemptEventType.KILL));
|
||||
assertEquals(RMAppAttemptState.FINAL_SAVING,
|
||||
|
@ -999,6 +999,56 @@ public class TestRMAppAttemptTransitions {
|
|||
applicationAttempt.getAppAttemptState());
|
||||
}
|
||||
|
||||
@Test(timeout = 10000)
|
||||
public void testAttemptRegisteredAtFailed() {
|
||||
Container amContainer = allocateApplicationAttempt();
|
||||
launchApplicationAttempt(amContainer);
|
||||
|
||||
//send CONTAINER_FINISHED event
|
||||
NodeId anyNodeId = NodeId.newInstance("host", 1234);
|
||||
applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
|
||||
applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus(
|
||||
amContainer.getId(), ContainerState.COMPLETE, "", 0,
|
||||
amContainer.getResource()), anyNodeId));
|
||||
assertEquals(RMAppAttemptState.FINAL_SAVING,
|
||||
applicationAttempt.getAppAttemptState());
|
||||
|
||||
sendAttemptUpdateSavedEvent(applicationAttempt);
|
||||
assertEquals(RMAppAttemptState.FAILED,
|
||||
applicationAttempt.getAppAttemptState());
|
||||
|
||||
//send REGISTERED event
|
||||
applicationAttempt.handle(new RMAppAttemptEvent(applicationAttempt
|
||||
.getAppAttemptId(), RMAppAttemptEventType.REGISTERED));
|
||||
|
||||
assertEquals(RMAppAttemptState.FAILED,
|
||||
applicationAttempt.getAppAttemptState());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAttemptLaunchFailedAtFailed() {
|
||||
Container amContainer = allocateApplicationAttempt();
|
||||
launchApplicationAttempt(amContainer);
|
||||
//send CONTAINER_FINISHED event
|
||||
NodeId anyNodeId = NodeId.newInstance("host", 1234);
|
||||
applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
|
||||
applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus(
|
||||
amContainer.getId(), ContainerState.COMPLETE, "", 0,
|
||||
amContainer.getResource()), anyNodeId));
|
||||
assertEquals(RMAppAttemptState.FINAL_SAVING,
|
||||
applicationAttempt.getAppAttemptState());
|
||||
sendAttemptUpdateSavedEvent(applicationAttempt);
|
||||
assertEquals(RMAppAttemptState.FAILED,
|
||||
applicationAttempt.getAppAttemptState());
|
||||
|
||||
//send LAUNCH_FAILED event
|
||||
applicationAttempt.handle(new RMAppAttemptEvent(applicationAttempt
|
||||
.getAppAttemptId(), RMAppAttemptEventType.LAUNCH_FAILED));
|
||||
|
||||
assertEquals(RMAppAttemptState.FAILED,
|
||||
applicationAttempt.getAppAttemptState());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAMCrashAtAllocated() {
|
||||
Container amContainer = allocateApplicationAttempt();
|
||||
|
@ -1598,6 +1648,34 @@ public class TestRMAppAttemptTransitions {
|
|||
assertTrue(found);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testContainerRemovedBeforeAllocate() {
|
||||
scheduleApplicationAttempt();
|
||||
|
||||
// Mock the allocation of AM container
|
||||
Container container = mock(Container.class);
|
||||
Resource resource = BuilderUtils.newResource(2048, 1);
|
||||
when(container.getId()).thenReturn(
|
||||
BuilderUtils.newContainerId(applicationAttempt.getAppAttemptId(), 1));
|
||||
when(container.getResource()).thenReturn(resource);
|
||||
Allocation allocation = mock(Allocation.class);
|
||||
when(allocation.getContainers()).
|
||||
thenReturn(Collections.singletonList(container));
|
||||
when(scheduler.allocate(any(ApplicationAttemptId.class), any(List.class),
|
||||
any(List.class), any(List.class), any(List.class), any(List.class),
|
||||
any(ContainerUpdates.class))).
|
||||
thenReturn(allocation);
|
||||
|
||||
//container removed, so return null
|
||||
when(scheduler.getRMContainer(container.getId())).
|
||||
thenReturn(null);
|
||||
|
||||
applicationAttempt.handle(
|
||||
new RMAppAttemptEvent(applicationAttempt.getAppAttemptId(),
|
||||
RMAppAttemptEventType.CONTAINER_ALLOCATED));
|
||||
assertEquals(RMAppAttemptState.SCHEDULED,
|
||||
applicationAttempt.getAppAttemptState());
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue