YARN-9194. Invalid event: REGISTERED and LAUNCH_FAILED at FAILED, and NullPointerException happens in RM while shutdown a NM. (lujie via wangda)
Change-Id: I4359f59a73a278a941f4bb9d106dd38c9cb471fe
(cherry picked from commit 6d7eedfd28
)
This commit is contained in:
parent
4aee7946dc
commit
fe7cb2d84a
|
@ -437,9 +437,11 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
||||||
RMAppAttemptState.FAILED,
|
RMAppAttemptState.FAILED,
|
||||||
EnumSet.of(
|
EnumSet.of(
|
||||||
RMAppAttemptEventType.LAUNCHED,
|
RMAppAttemptEventType.LAUNCHED,
|
||||||
|
RMAppAttemptEventType.LAUNCH_FAILED,
|
||||||
RMAppAttemptEventType.EXPIRE,
|
RMAppAttemptEventType.EXPIRE,
|
||||||
RMAppAttemptEventType.KILL,
|
RMAppAttemptEventType.KILL,
|
||||||
RMAppAttemptEventType.FAIL,
|
RMAppAttemptEventType.FAIL,
|
||||||
|
RMAppAttemptEventType.REGISTERED,
|
||||||
RMAppAttemptEventType.UNREGISTERED,
|
RMAppAttemptEventType.UNREGISTERED,
|
||||||
RMAppAttemptEventType.STATUS_UPDATE,
|
RMAppAttemptEventType.STATUS_UPDATE,
|
||||||
RMAppAttemptEventType.CONTAINER_ALLOCATED))
|
RMAppAttemptEventType.CONTAINER_ALLOCATED))
|
||||||
|
@ -1203,10 +1205,16 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set the masterContainer
|
// Set the masterContainer
|
||||||
appAttempt.setMasterContainer(amContainerAllocation.getContainers()
|
Container amContainer = amContainerAllocation.getContainers().get(0);
|
||||||
.get(0));
|
|
||||||
RMContainerImpl rmMasterContainer = (RMContainerImpl)appAttempt.scheduler
|
RMContainerImpl rmMasterContainer = (RMContainerImpl)appAttempt.scheduler
|
||||||
.getRMContainer(appAttempt.getMasterContainer().getId());
|
.getRMContainer(amContainer.getId());
|
||||||
|
//while one NM is removed, the scheduler will clean the container,the
|
||||||
|
//following CONTAINER_FINISHED event will handle the cleaned container.
|
||||||
|
//so just return RMAppAttemptState.SCHEDULED
|
||||||
|
if (rmMasterContainer == null) {
|
||||||
|
return RMAppAttemptState.SCHEDULED;
|
||||||
|
}
|
||||||
|
appAttempt.setMasterContainer(amContainer);
|
||||||
rmMasterContainer.setAMContainer(true);
|
rmMasterContainer.setAMContainer(true);
|
||||||
// The node set in NMTokenSecrentManager is used for marking whether the
|
// The node set in NMTokenSecrentManager is used for marking whether the
|
||||||
// NMToken has been issued for this node to the AM.
|
// NMToken has been issued for this node to the AM.
|
||||||
|
|
|
@ -986,7 +986,7 @@ public class TestRMAppAttemptTransitions {
|
||||||
public void testAttemptAddedAtFinalSaving() {
|
public void testAttemptAddedAtFinalSaving() {
|
||||||
submitApplicationAttempt();
|
submitApplicationAttempt();
|
||||||
|
|
||||||
// SUBNITED->FINAL_SAVING
|
// SUBMITTED->FINAL_SAVING
|
||||||
applicationAttempt.handle(new RMAppAttemptEvent(applicationAttempt
|
applicationAttempt.handle(new RMAppAttemptEvent(applicationAttempt
|
||||||
.getAppAttemptId(), RMAppAttemptEventType.KILL));
|
.getAppAttemptId(), RMAppAttemptEventType.KILL));
|
||||||
assertEquals(RMAppAttemptState.FINAL_SAVING,
|
assertEquals(RMAppAttemptState.FINAL_SAVING,
|
||||||
|
@ -999,6 +999,56 @@ public class TestRMAppAttemptTransitions {
|
||||||
applicationAttempt.getAppAttemptState());
|
applicationAttempt.getAppAttemptState());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 10000)
|
||||||
|
public void testAttemptRegisteredAtFailed() {
|
||||||
|
Container amContainer = allocateApplicationAttempt();
|
||||||
|
launchApplicationAttempt(amContainer);
|
||||||
|
|
||||||
|
//send CONTAINER_FINISHED event
|
||||||
|
NodeId anyNodeId = NodeId.newInstance("host", 1234);
|
||||||
|
applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
|
||||||
|
applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus(
|
||||||
|
amContainer.getId(), ContainerState.COMPLETE, "", 0,
|
||||||
|
amContainer.getResource()), anyNodeId));
|
||||||
|
assertEquals(RMAppAttemptState.FINAL_SAVING,
|
||||||
|
applicationAttempt.getAppAttemptState());
|
||||||
|
|
||||||
|
sendAttemptUpdateSavedEvent(applicationAttempt);
|
||||||
|
assertEquals(RMAppAttemptState.FAILED,
|
||||||
|
applicationAttempt.getAppAttemptState());
|
||||||
|
|
||||||
|
//send REGISTERED event
|
||||||
|
applicationAttempt.handle(new RMAppAttemptEvent(applicationAttempt
|
||||||
|
.getAppAttemptId(), RMAppAttemptEventType.REGISTERED));
|
||||||
|
|
||||||
|
assertEquals(RMAppAttemptState.FAILED,
|
||||||
|
applicationAttempt.getAppAttemptState());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAttemptLaunchFailedAtFailed() {
|
||||||
|
Container amContainer = allocateApplicationAttempt();
|
||||||
|
launchApplicationAttempt(amContainer);
|
||||||
|
//send CONTAINER_FINISHED event
|
||||||
|
NodeId anyNodeId = NodeId.newInstance("host", 1234);
|
||||||
|
applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
|
||||||
|
applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus(
|
||||||
|
amContainer.getId(), ContainerState.COMPLETE, "", 0,
|
||||||
|
amContainer.getResource()), anyNodeId));
|
||||||
|
assertEquals(RMAppAttemptState.FINAL_SAVING,
|
||||||
|
applicationAttempt.getAppAttemptState());
|
||||||
|
sendAttemptUpdateSavedEvent(applicationAttempt);
|
||||||
|
assertEquals(RMAppAttemptState.FAILED,
|
||||||
|
applicationAttempt.getAppAttemptState());
|
||||||
|
|
||||||
|
//send LAUNCH_FAILED event
|
||||||
|
applicationAttempt.handle(new RMAppAttemptEvent(applicationAttempt
|
||||||
|
.getAppAttemptId(), RMAppAttemptEventType.LAUNCH_FAILED));
|
||||||
|
|
||||||
|
assertEquals(RMAppAttemptState.FAILED,
|
||||||
|
applicationAttempt.getAppAttemptState());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testAMCrashAtAllocated() {
|
public void testAMCrashAtAllocated() {
|
||||||
Container amContainer = allocateApplicationAttempt();
|
Container amContainer = allocateApplicationAttempt();
|
||||||
|
@ -1598,6 +1648,34 @@ public class TestRMAppAttemptTransitions {
|
||||||
assertTrue(found);
|
assertTrue(found);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testContainerRemovedBeforeAllocate() {
|
||||||
|
scheduleApplicationAttempt();
|
||||||
|
|
||||||
|
// Mock the allocation of AM container
|
||||||
|
Container container = mock(Container.class);
|
||||||
|
Resource resource = BuilderUtils.newResource(2048, 1);
|
||||||
|
when(container.getId()).thenReturn(
|
||||||
|
BuilderUtils.newContainerId(applicationAttempt.getAppAttemptId(), 1));
|
||||||
|
when(container.getResource()).thenReturn(resource);
|
||||||
|
Allocation allocation = mock(Allocation.class);
|
||||||
|
when(allocation.getContainers()).
|
||||||
|
thenReturn(Collections.singletonList(container));
|
||||||
|
when(scheduler.allocate(any(ApplicationAttemptId.class), any(List.class),
|
||||||
|
any(List.class), any(List.class), any(List.class), any(List.class),
|
||||||
|
any(ContainerUpdates.class))).
|
||||||
|
thenReturn(allocation);
|
||||||
|
|
||||||
|
//container removed, so return null
|
||||||
|
when(scheduler.getRMContainer(container.getId())).
|
||||||
|
thenReturn(null);
|
||||||
|
|
||||||
|
applicationAttempt.handle(
|
||||||
|
new RMAppAttemptEvent(applicationAttempt.getAppAttemptId(),
|
||||||
|
RMAppAttemptEventType.CONTAINER_ALLOCATED));
|
||||||
|
assertEquals(RMAppAttemptState.SCHEDULED,
|
||||||
|
applicationAttempt.getAppAttemptState());
|
||||||
|
}
|
||||||
|
|
||||||
@SuppressWarnings("deprecation")
|
@SuppressWarnings("deprecation")
|
||||||
@Test
|
@Test
|
||||||
|
|
Loading…
Reference in New Issue