YARN-2416. InvalidStateTransitonException in ResourceManager if AMLauncher does not receive response for startContainers() call in time. Contributed by Jonathan Eagles

(cherry picked from commit 3efcd51c3b)
This commit is contained in:
Jason Lowe 2017-08-22 12:56:09 -05:00
parent b3ea11dfdb
commit fc7df25a11
2 changed files with 41 additions and 16 deletions

View File

@ -184,7 +184,10 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
new ExpiredTransition(); new ExpiredTransition();
private static final AttemptFailedTransition FAILED_TRANSITION = private static final AttemptFailedTransition FAILED_TRANSITION =
new AttemptFailedTransition(); new AttemptFailedTransition();
private static final AMRegisteredTransition REGISTERED_TRANSITION =
new AMRegisteredTransition();
private static final AMLaunchedTransition LAUNCHED_TRANSITION =
new AMLaunchedTransition();
private RMAppAttemptEvent eventCausingFinalSaving; private RMAppAttemptEvent eventCausingFinalSaving;
private RMAppAttemptState targetedFinalState; private RMAppAttemptState targetedFinalState;
private RMAppAttemptState recoveredFinalState; private RMAppAttemptState recoveredFinalState;
@ -314,7 +317,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
// Transitions from ALLOCATED State // Transitions from ALLOCATED State
.addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.LAUNCHED, .addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.LAUNCHED,
RMAppAttemptEventType.LAUNCHED, new AMLaunchedTransition()) RMAppAttemptEventType.LAUNCHED, LAUNCHED_TRANSITION)
.addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.FINAL_SAVING, .addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.FINAL_SAVING,
RMAppAttemptEventType.LAUNCH_FAILED, RMAppAttemptEventType.LAUNCH_FAILED,
new FinalSavingTransition(new LaunchFailedTransition(), new FinalSavingTransition(new LaunchFailedTransition(),
@ -328,6 +331,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
RMAppAttemptEventType.FAIL, RMAppAttemptEventType.FAIL,
new FinalSavingTransition(FAILED_TRANSITION, new FinalSavingTransition(FAILED_TRANSITION,
RMAppAttemptState.FAILED)) RMAppAttemptState.FAILED))
.addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.RUNNING,
RMAppAttemptEventType.REGISTERED, REGISTERED_TRANSITION)
.addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.FINAL_SAVING, .addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.FINAL_SAVING,
RMAppAttemptEventType.CONTAINER_FINISHED, RMAppAttemptEventType.CONTAINER_FINISHED,
new FinalSavingTransition( new FinalSavingTransition(
@ -335,7 +340,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
// Transitions from LAUNCHED State // Transitions from LAUNCHED State
.addTransition(RMAppAttemptState.LAUNCHED, RMAppAttemptState.RUNNING, .addTransition(RMAppAttemptState.LAUNCHED, RMAppAttemptState.RUNNING,
RMAppAttemptEventType.REGISTERED, new AMRegisteredTransition()) RMAppAttemptEventType.REGISTERED, REGISTERED_TRANSITION)
.addTransition(RMAppAttemptState.LAUNCHED, .addTransition(RMAppAttemptState.LAUNCHED,
EnumSet.of(RMAppAttemptState.LAUNCHED, RMAppAttemptState.FINAL_SAVING), EnumSet.of(RMAppAttemptState.LAUNCHED, RMAppAttemptState.FINAL_SAVING),
RMAppAttemptEventType.CONTAINER_FINISHED, RMAppAttemptEventType.CONTAINER_FINISHED,
@ -357,6 +362,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
RMAppAttemptState.FAILED)) RMAppAttemptState.FAILED))
// Transitions from RUNNING State // Transitions from RUNNING State
.addTransition(RMAppAttemptState.RUNNING, RMAppAttemptState.RUNNING,
RMAppAttemptEventType.LAUNCHED)
.addTransition(RMAppAttemptState.RUNNING, RMAppAttemptState.FINAL_SAVING, .addTransition(RMAppAttemptState.RUNNING, RMAppAttemptState.FINAL_SAVING,
RMAppAttemptEventType.UNREGISTERED, new AMUnregisteredTransition()) RMAppAttemptEventType.UNREGISTERED, new AMUnregisteredTransition())
.addTransition(RMAppAttemptState.RUNNING, RMAppAttemptState.RUNNING, .addTransition(RMAppAttemptState.RUNNING, RMAppAttemptState.RUNNING,
@ -421,6 +428,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
RMAppAttemptState.FAILED, RMAppAttemptState.FAILED,
RMAppAttemptState.FAILED, RMAppAttemptState.FAILED,
EnumSet.of( EnumSet.of(
RMAppAttemptEventType.LAUNCHED,
RMAppAttemptEventType.EXPIRE, RMAppAttemptEventType.EXPIRE,
RMAppAttemptEventType.KILL, RMAppAttemptEventType.KILL,
RMAppAttemptEventType.FAIL, RMAppAttemptEventType.FAIL,
@ -438,6 +446,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
new FinalTransition(RMAppAttemptState.FINISHED)) new FinalTransition(RMAppAttemptState.FINISHED))
.addTransition(RMAppAttemptState.FINISHING, RMAppAttemptState.FINISHING, .addTransition(RMAppAttemptState.FINISHING, RMAppAttemptState.FINISHING,
EnumSet.of( EnumSet.of(
RMAppAttemptEventType.LAUNCHED,
RMAppAttemptEventType.UNREGISTERED, RMAppAttemptEventType.UNREGISTERED,
RMAppAttemptEventType.STATUS_UPDATE, RMAppAttemptEventType.STATUS_UPDATE,
RMAppAttemptEventType.CONTAINER_ALLOCATED, RMAppAttemptEventType.CONTAINER_ALLOCATED,
@ -451,6 +460,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
RMAppAttemptState.FINISHED, RMAppAttemptState.FINISHED,
RMAppAttemptState.FINISHED, RMAppAttemptState.FINISHED,
EnumSet.of( EnumSet.of(
RMAppAttemptEventType.LAUNCHED,
RMAppAttemptEventType.EXPIRE, RMAppAttemptEventType.EXPIRE,
RMAppAttemptEventType.UNREGISTERED, RMAppAttemptEventType.UNREGISTERED,
RMAppAttemptEventType.CONTAINER_ALLOCATED, RMAppAttemptEventType.CONTAINER_ALLOCATED,
@ -1293,7 +1303,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
* 2) OR AMLivelinessMonitor expires this attempt (when am doesn't * 2) OR AMLivelinessMonitor expires this attempt (when am doesn't
* heart beat back). * heart beat back).
*/ */
(new AMLaunchedTransition()).transition(appAttempt, event); LAUNCHED_TRANSITION.transition(appAttempt, event);
return RMAppAttemptState.LAUNCHED; return RMAppAttemptState.LAUNCHED;
} }
} }
@ -1518,7 +1528,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
@Override @Override
public void transition(RMAppAttemptImpl appAttempt, public void transition(RMAppAttemptImpl appAttempt,
RMAppAttemptEvent event) { RMAppAttemptEvent event) {
if (event.getType() == RMAppAttemptEventType.LAUNCHED) { if (event.getType() == RMAppAttemptEventType.LAUNCHED
|| event.getType() == RMAppAttemptEventType.REGISTERED) {
appAttempt.launchAMEndTime = System.currentTimeMillis(); appAttempt.launchAMEndTime = System.currentTimeMillis();
long delay = appAttempt.launchAMEndTime - long delay = appAttempt.launchAMEndTime -
appAttempt.launchAMStartTime; appAttempt.launchAMStartTime;
@ -1653,6 +1664,10 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
@Override @Override
public void transition(RMAppAttemptImpl appAttempt, public void transition(RMAppAttemptImpl appAttempt,
RMAppAttemptEvent event) { RMAppAttemptEvent event) {
if (!RMAppAttemptState.LAUNCHED.equals(appAttempt.getState())) {
// registered received before launch
LAUNCHED_TRANSITION.transition(appAttempt, event);
}
long delay = System.currentTimeMillis() - appAttempt.launchAMEndTime; long delay = System.currentTimeMillis() - appAttempt.launchAMEndTime;
ClusterMetrics.getMetrics().addAMRegisterDelay(delay); ClusterMetrics.getMetrics().addAMRegisterDelay(delay);
RMAppAttemptRegistrationEvent registrationEvent RMAppAttemptRegistrationEvent registrationEvent

View File

@ -526,12 +526,9 @@ public class TestRMAppAttemptTransitions {
verifyApplicationAttemptFinished(RMAppAttemptState.FAILED); verifyApplicationAttemptFinished(RMAppAttemptState.FAILED);
} }
/** private void testAppAttemptLaunchedState(Container container,
* {@link RMAppAttemptState#LAUNCHED} RMAppAttemptState state) {
*/ assertEquals(state, applicationAttempt.getAppAttemptState());
private void testAppAttemptLaunchedState(Container container) {
assertEquals(RMAppAttemptState.LAUNCHED,
applicationAttempt.getAppAttemptState());
assertEquals(container, applicationAttempt.getMasterContainer()); assertEquals(container, applicationAttempt.getMasterContainer());
if (UserGroupInformation.isSecurityEnabled()) { if (UserGroupInformation.isSecurityEnabled()) {
// ClientTokenMasterKey has been registered in SecretManager, it's able to // ClientTokenMasterKey has been registered in SecretManager, it's able to
@ -686,13 +683,18 @@ public class TestRMAppAttemptTransitions {
} }
private void launchApplicationAttempt(Container container) { private void launchApplicationAttempt(Container container) {
launchApplicationAttempt(container, RMAppAttemptState.LAUNCHED);
}
private void launchApplicationAttempt(Container container,
RMAppAttemptState state) {
applicationAttempt.handle( applicationAttempt.handle(
new RMAppAttemptEvent(applicationAttempt.getAppAttemptId(), new RMAppAttemptEvent(applicationAttempt.getAppAttemptId(),
RMAppAttemptEventType.LAUNCHED)); RMAppAttemptEventType.LAUNCHED));
testAppAttemptLaunchedState(container); testAppAttemptLaunchedState(container, state);
} }
private void runApplicationAttempt(Container container, private void runApplicationAttempt(Container container,
String host, String host,
int rpcPort, int rpcPort,
@ -723,7 +725,7 @@ public class TestRMAppAttemptTransitions {
when(submissionContext.getUnmanagedAM()).thenReturn(true); when(submissionContext.getUnmanagedAM()).thenReturn(true);
// submit AM and check it goes to LAUNCHED state // submit AM and check it goes to LAUNCHED state
scheduleApplicationAttempt(); scheduleApplicationAttempt();
testAppAttemptLaunchedState(null); testAppAttemptLaunchedState(null, RMAppAttemptState.LAUNCHED);
verify(amLivelinessMonitor, times(1)).register( verify(amLivelinessMonitor, times(1)).register(
applicationAttempt.getAppAttemptId()); applicationAttempt.getAppAttemptId());
@ -930,7 +932,15 @@ public class TestRMAppAttemptTransitions {
applicationAttempt.createApplicationAttemptState()); applicationAttempt.createApplicationAttemptState());
testAppAttemptFailedState(amContainer, diagnostics); testAppAttemptFailedState(amContainer, diagnostics);
} }
@Test(timeout = 10000)
public void testAllocatedToRunning() {
Container amContainer = allocateApplicationAttempt();
// Register attempt event arrives before launched attempt event
runApplicationAttempt(amContainer, "host", 8042, "oldtrackingurl", false);
launchApplicationAttempt(amContainer, RMAppAttemptState.RUNNING);
}
@Test(timeout = 10000) @Test(timeout = 10000)
public void testCreateAppAttemptReport() { public void testCreateAppAttemptReport() {
RMAppAttemptState[] attemptStates = RMAppAttemptState.values(); RMAppAttemptState[] attemptStates = RMAppAttemptState.values();