YARN-2416. InvalidStateTransitonException in ResourceManager if AMLauncher does not receive response for startContainers() call in time. Contributed by Jonathan Eagles
This commit is contained in:
parent
4ec5acc704
commit
3efcd51c3b
|
@ -184,7 +184,10 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
||||||
new ExpiredTransition();
|
new ExpiredTransition();
|
||||||
private static final AttemptFailedTransition FAILED_TRANSITION =
|
private static final AttemptFailedTransition FAILED_TRANSITION =
|
||||||
new AttemptFailedTransition();
|
new AttemptFailedTransition();
|
||||||
|
private static final AMRegisteredTransition REGISTERED_TRANSITION =
|
||||||
|
new AMRegisteredTransition();
|
||||||
|
private static final AMLaunchedTransition LAUNCHED_TRANSITION =
|
||||||
|
new AMLaunchedTransition();
|
||||||
private RMAppAttemptEvent eventCausingFinalSaving;
|
private RMAppAttemptEvent eventCausingFinalSaving;
|
||||||
private RMAppAttemptState targetedFinalState;
|
private RMAppAttemptState targetedFinalState;
|
||||||
private RMAppAttemptState recoveredFinalState;
|
private RMAppAttemptState recoveredFinalState;
|
||||||
|
@ -314,7 +317,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
||||||
|
|
||||||
// Transitions from ALLOCATED State
|
// Transitions from ALLOCATED State
|
||||||
.addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.LAUNCHED,
|
.addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.LAUNCHED,
|
||||||
RMAppAttemptEventType.LAUNCHED, new AMLaunchedTransition())
|
RMAppAttemptEventType.LAUNCHED, LAUNCHED_TRANSITION)
|
||||||
.addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.FINAL_SAVING,
|
.addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.FINAL_SAVING,
|
||||||
RMAppAttemptEventType.LAUNCH_FAILED,
|
RMAppAttemptEventType.LAUNCH_FAILED,
|
||||||
new FinalSavingTransition(new LaunchFailedTransition(),
|
new FinalSavingTransition(new LaunchFailedTransition(),
|
||||||
|
@ -328,6 +331,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
||||||
RMAppAttemptEventType.FAIL,
|
RMAppAttemptEventType.FAIL,
|
||||||
new FinalSavingTransition(FAILED_TRANSITION,
|
new FinalSavingTransition(FAILED_TRANSITION,
|
||||||
RMAppAttemptState.FAILED))
|
RMAppAttemptState.FAILED))
|
||||||
|
.addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.RUNNING,
|
||||||
|
RMAppAttemptEventType.REGISTERED, REGISTERED_TRANSITION)
|
||||||
.addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.FINAL_SAVING,
|
.addTransition(RMAppAttemptState.ALLOCATED, RMAppAttemptState.FINAL_SAVING,
|
||||||
RMAppAttemptEventType.CONTAINER_FINISHED,
|
RMAppAttemptEventType.CONTAINER_FINISHED,
|
||||||
new FinalSavingTransition(
|
new FinalSavingTransition(
|
||||||
|
@ -335,7 +340,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
||||||
|
|
||||||
// Transitions from LAUNCHED State
|
// Transitions from LAUNCHED State
|
||||||
.addTransition(RMAppAttemptState.LAUNCHED, RMAppAttemptState.RUNNING,
|
.addTransition(RMAppAttemptState.LAUNCHED, RMAppAttemptState.RUNNING,
|
||||||
RMAppAttemptEventType.REGISTERED, new AMRegisteredTransition())
|
RMAppAttemptEventType.REGISTERED, REGISTERED_TRANSITION)
|
||||||
.addTransition(RMAppAttemptState.LAUNCHED,
|
.addTransition(RMAppAttemptState.LAUNCHED,
|
||||||
EnumSet.of(RMAppAttemptState.LAUNCHED, RMAppAttemptState.FINAL_SAVING),
|
EnumSet.of(RMAppAttemptState.LAUNCHED, RMAppAttemptState.FINAL_SAVING),
|
||||||
RMAppAttemptEventType.CONTAINER_FINISHED,
|
RMAppAttemptEventType.CONTAINER_FINISHED,
|
||||||
|
@ -357,6 +362,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
||||||
RMAppAttemptState.FAILED))
|
RMAppAttemptState.FAILED))
|
||||||
|
|
||||||
// Transitions from RUNNING State
|
// Transitions from RUNNING State
|
||||||
|
.addTransition(RMAppAttemptState.RUNNING, RMAppAttemptState.RUNNING,
|
||||||
|
RMAppAttemptEventType.LAUNCHED)
|
||||||
.addTransition(RMAppAttemptState.RUNNING, RMAppAttemptState.FINAL_SAVING,
|
.addTransition(RMAppAttemptState.RUNNING, RMAppAttemptState.FINAL_SAVING,
|
||||||
RMAppAttemptEventType.UNREGISTERED, new AMUnregisteredTransition())
|
RMAppAttemptEventType.UNREGISTERED, new AMUnregisteredTransition())
|
||||||
.addTransition(RMAppAttemptState.RUNNING, RMAppAttemptState.RUNNING,
|
.addTransition(RMAppAttemptState.RUNNING, RMAppAttemptState.RUNNING,
|
||||||
|
@ -421,6 +428,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
||||||
RMAppAttemptState.FAILED,
|
RMAppAttemptState.FAILED,
|
||||||
RMAppAttemptState.FAILED,
|
RMAppAttemptState.FAILED,
|
||||||
EnumSet.of(
|
EnumSet.of(
|
||||||
|
RMAppAttemptEventType.LAUNCHED,
|
||||||
RMAppAttemptEventType.EXPIRE,
|
RMAppAttemptEventType.EXPIRE,
|
||||||
RMAppAttemptEventType.KILL,
|
RMAppAttemptEventType.KILL,
|
||||||
RMAppAttemptEventType.FAIL,
|
RMAppAttemptEventType.FAIL,
|
||||||
|
@ -438,6 +446,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
||||||
new FinalTransition(RMAppAttemptState.FINISHED))
|
new FinalTransition(RMAppAttemptState.FINISHED))
|
||||||
.addTransition(RMAppAttemptState.FINISHING, RMAppAttemptState.FINISHING,
|
.addTransition(RMAppAttemptState.FINISHING, RMAppAttemptState.FINISHING,
|
||||||
EnumSet.of(
|
EnumSet.of(
|
||||||
|
RMAppAttemptEventType.LAUNCHED,
|
||||||
RMAppAttemptEventType.UNREGISTERED,
|
RMAppAttemptEventType.UNREGISTERED,
|
||||||
RMAppAttemptEventType.STATUS_UPDATE,
|
RMAppAttemptEventType.STATUS_UPDATE,
|
||||||
RMAppAttemptEventType.CONTAINER_ALLOCATED,
|
RMAppAttemptEventType.CONTAINER_ALLOCATED,
|
||||||
|
@ -451,6 +460,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
||||||
RMAppAttemptState.FINISHED,
|
RMAppAttemptState.FINISHED,
|
||||||
RMAppAttemptState.FINISHED,
|
RMAppAttemptState.FINISHED,
|
||||||
EnumSet.of(
|
EnumSet.of(
|
||||||
|
RMAppAttemptEventType.LAUNCHED,
|
||||||
RMAppAttemptEventType.EXPIRE,
|
RMAppAttemptEventType.EXPIRE,
|
||||||
RMAppAttemptEventType.UNREGISTERED,
|
RMAppAttemptEventType.UNREGISTERED,
|
||||||
RMAppAttemptEventType.CONTAINER_ALLOCATED,
|
RMAppAttemptEventType.CONTAINER_ALLOCATED,
|
||||||
|
@ -1291,7 +1301,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
||||||
* 2) OR AMLivelinessMonitor expires this attempt (when am doesn't
|
* 2) OR AMLivelinessMonitor expires this attempt (when am doesn't
|
||||||
* heart beat back).
|
* heart beat back).
|
||||||
*/
|
*/
|
||||||
(new AMLaunchedTransition()).transition(appAttempt, event);
|
LAUNCHED_TRANSITION.transition(appAttempt, event);
|
||||||
return RMAppAttemptState.LAUNCHED;
|
return RMAppAttemptState.LAUNCHED;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1516,7 +1526,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
||||||
@Override
|
@Override
|
||||||
public void transition(RMAppAttemptImpl appAttempt,
|
public void transition(RMAppAttemptImpl appAttempt,
|
||||||
RMAppAttemptEvent event) {
|
RMAppAttemptEvent event) {
|
||||||
if (event.getType() == RMAppAttemptEventType.LAUNCHED) {
|
if (event.getType() == RMAppAttemptEventType.LAUNCHED
|
||||||
|
|| event.getType() == RMAppAttemptEventType.REGISTERED) {
|
||||||
appAttempt.launchAMEndTime = System.currentTimeMillis();
|
appAttempt.launchAMEndTime = System.currentTimeMillis();
|
||||||
long delay = appAttempt.launchAMEndTime -
|
long delay = appAttempt.launchAMEndTime -
|
||||||
appAttempt.launchAMStartTime;
|
appAttempt.launchAMStartTime;
|
||||||
|
@ -1651,6 +1662,10 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
||||||
@Override
|
@Override
|
||||||
public void transition(RMAppAttemptImpl appAttempt,
|
public void transition(RMAppAttemptImpl appAttempt,
|
||||||
RMAppAttemptEvent event) {
|
RMAppAttemptEvent event) {
|
||||||
|
if (!RMAppAttemptState.LAUNCHED.equals(appAttempt.getState())) {
|
||||||
|
// registered received before launch
|
||||||
|
LAUNCHED_TRANSITION.transition(appAttempt, event);
|
||||||
|
}
|
||||||
long delay = System.currentTimeMillis() - appAttempt.launchAMEndTime;
|
long delay = System.currentTimeMillis() - appAttempt.launchAMEndTime;
|
||||||
ClusterMetrics.getMetrics().addAMRegisterDelay(delay);
|
ClusterMetrics.getMetrics().addAMRegisterDelay(delay);
|
||||||
RMAppAttemptRegistrationEvent registrationEvent
|
RMAppAttemptRegistrationEvent registrationEvent
|
||||||
|
|
|
@ -526,12 +526,9 @@ public class TestRMAppAttemptTransitions {
|
||||||
verifyApplicationAttemptFinished(RMAppAttemptState.FAILED);
|
verifyApplicationAttemptFinished(RMAppAttemptState.FAILED);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
private void testAppAttemptLaunchedState(Container container,
|
||||||
* {@link RMAppAttemptState#LAUNCHED}
|
RMAppAttemptState state) {
|
||||||
*/
|
assertEquals(state, applicationAttempt.getAppAttemptState());
|
||||||
private void testAppAttemptLaunchedState(Container container) {
|
|
||||||
assertEquals(RMAppAttemptState.LAUNCHED,
|
|
||||||
applicationAttempt.getAppAttemptState());
|
|
||||||
assertEquals(container, applicationAttempt.getMasterContainer());
|
assertEquals(container, applicationAttempt.getMasterContainer());
|
||||||
if (UserGroupInformation.isSecurityEnabled()) {
|
if (UserGroupInformation.isSecurityEnabled()) {
|
||||||
// ClientTokenMasterKey has been registered in SecretManager, it's able to
|
// ClientTokenMasterKey has been registered in SecretManager, it's able to
|
||||||
|
@ -686,13 +683,18 @@ public class TestRMAppAttemptTransitions {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void launchApplicationAttempt(Container container) {
|
private void launchApplicationAttempt(Container container) {
|
||||||
|
launchApplicationAttempt(container, RMAppAttemptState.LAUNCHED);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void launchApplicationAttempt(Container container,
|
||||||
|
RMAppAttemptState state) {
|
||||||
applicationAttempt.handle(
|
applicationAttempt.handle(
|
||||||
new RMAppAttemptEvent(applicationAttempt.getAppAttemptId(),
|
new RMAppAttemptEvent(applicationAttempt.getAppAttemptId(),
|
||||||
RMAppAttemptEventType.LAUNCHED));
|
RMAppAttemptEventType.LAUNCHED));
|
||||||
|
|
||||||
testAppAttemptLaunchedState(container);
|
testAppAttemptLaunchedState(container, state);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void runApplicationAttempt(Container container,
|
private void runApplicationAttempt(Container container,
|
||||||
String host,
|
String host,
|
||||||
int rpcPort,
|
int rpcPort,
|
||||||
|
@ -723,7 +725,7 @@ public class TestRMAppAttemptTransitions {
|
||||||
when(submissionContext.getUnmanagedAM()).thenReturn(true);
|
when(submissionContext.getUnmanagedAM()).thenReturn(true);
|
||||||
// submit AM and check it goes to LAUNCHED state
|
// submit AM and check it goes to LAUNCHED state
|
||||||
scheduleApplicationAttempt();
|
scheduleApplicationAttempt();
|
||||||
testAppAttemptLaunchedState(null);
|
testAppAttemptLaunchedState(null, RMAppAttemptState.LAUNCHED);
|
||||||
verify(amLivelinessMonitor, times(1)).register(
|
verify(amLivelinessMonitor, times(1)).register(
|
||||||
applicationAttempt.getAppAttemptId());
|
applicationAttempt.getAppAttemptId());
|
||||||
|
|
||||||
|
@ -930,7 +932,15 @@ public class TestRMAppAttemptTransitions {
|
||||||
applicationAttempt.createApplicationAttemptState());
|
applicationAttempt.createApplicationAttemptState());
|
||||||
testAppAttemptFailedState(amContainer, diagnostics);
|
testAppAttemptFailedState(amContainer, diagnostics);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 10000)
|
||||||
|
public void testAllocatedToRunning() {
|
||||||
|
Container amContainer = allocateApplicationAttempt();
|
||||||
|
// Register attempt event arrives before launched attempt event
|
||||||
|
runApplicationAttempt(amContainer, "host", 8042, "oldtrackingurl", false);
|
||||||
|
launchApplicationAttempt(amContainer, RMAppAttemptState.RUNNING);
|
||||||
|
}
|
||||||
|
|
||||||
@Test(timeout = 10000)
|
@Test(timeout = 10000)
|
||||||
public void testCreateAppAttemptReport() {
|
public void testCreateAppAttemptReport() {
|
||||||
RMAppAttemptState[] attemptStates = RMAppAttemptState.values();
|
RMAppAttemptState[] attemptStates = RMAppAttemptState.values();
|
||||||
|
|
Loading…
Reference in New Issue