YARN-227. Application expiration difficult to debug for end-users (Jason Lowe via jeagles)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1453087 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jonathan Turner Eagles 2013-03-05 23:23:18 +00:00
parent 54d7d46552
commit b7134a90e2
3 changed files with 67 additions and 7 deletions

View File

@ -323,6 +323,9 @@ Release 0.23.7 - UNRELEASED
YARN-269. Resource Manager not logging the health_check_script result when
taking it out (Jason Lowe via kihwal)
YARN-227. Application expiration difficult to debug for end-users
(Jason Lowe via jeagles)
OPTIMIZATIONS
YARN-357. App submission should not be synchronized (daryn)

View File

@ -147,6 +147,9 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
private Configuration conf;
private static final ExpiredTransition EXPIRED_TRANSITION =
new ExpiredTransition();
private static final StateMachineFactory<RMAppAttemptImpl,
RMAppAttemptState,
RMAppAttemptEventType,
@ -243,7 +246,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
.addTransition(
RMAppAttemptState.LAUNCHED, RMAppAttemptState.FAILED,
RMAppAttemptEventType.EXPIRE,
new FinalTransition(RMAppAttemptState.FAILED))
EXPIRED_TRANSITION)
.addTransition(RMAppAttemptState.LAUNCHED, RMAppAttemptState.KILLED,
RMAppAttemptEventType.KILL,
new FinalTransition(RMAppAttemptState.KILLED))
@ -268,7 +271,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
.addTransition(
RMAppAttemptState.RUNNING, RMAppAttemptState.FAILED,
RMAppAttemptEventType.EXPIRE,
new FinalTransition(RMAppAttemptState.FAILED))
EXPIRED_TRANSITION)
.addTransition(
RMAppAttemptState.RUNNING, RMAppAttemptState.KILLED,
RMAppAttemptEventType.KILL,
@ -491,6 +494,13 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
}
}
private void setTrackingUrlToRMAppPage() {
origTrackingUrl = pjoin(
YarnConfiguration.getRMWebAppHostAndPort(conf),
"cluster", "app", getAppAttemptId().getApplicationId());
proxiedTrackingUrl = origTrackingUrl;
}
@Override
public ClientToken getClientToken() {
return this.clientToken;
@ -992,7 +1002,23 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
}
}
}
private static class ExpiredTransition extends FinalTransition {
public ExpiredTransition() {
super(RMAppAttemptState.FAILED);
}
@Override
public void transition(RMAppAttemptImpl appAttempt,
RMAppAttemptEvent event) {
appAttempt.diagnostics.append("ApplicationMaster for attempt " +
appAttempt.getAppAttemptId() + " timed out");
appAttempt.setTrackingUrlToRMAppPage();
super.transition(appAttempt, event);
}
}
private static class UnexpectedAMRegisteredTransition extends
BaseFinalTransition {
@ -1110,10 +1136,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
// When the AM dies, the trackingUrl is left pointing to the AM's URL,
// which shows up in the scheduler UI as a broken link. Direct the
// user to the app page on the RM so they can see the status and logs.
appAttempt.origTrackingUrl = pjoin(
YarnConfiguration.getRMWebAppHostAndPort(appAttempt.conf),
"cluster", "app", appAttempt.getAppAttemptId().getApplicationId());
appAttempt.proxiedTrackingUrl = appAttempt.origTrackingUrl;
appAttempt.setTrackingUrlToRMAppPage();
new FinalTransition(RMAppAttemptState.FAILED).transition(
appAttempt, containerFinishedEvent);

View File

@ -22,6 +22,7 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
@ -665,6 +666,39 @@ public class TestRMAppAttemptTransitions {
assertEquals(rmAppPageUrl, applicationAttempt.getTrackingUrl());
}
@Test(timeout=10000)
public void testLaunchedExpire() {
Container amContainer = allocateApplicationAttempt();
launchApplicationAttempt(amContainer);
applicationAttempt.handle(new RMAppAttemptEvent(
applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.EXPIRE));
assertEquals(RMAppAttemptState.FAILED,
applicationAttempt.getAppAttemptState());
assertTrue("expire diagnostics missing",
applicationAttempt.getDiagnostics().contains("timed out"));
String rmAppPageUrl = pjoin(RM_WEBAPP_ADDR, "cluster", "app",
applicationAttempt.getAppAttemptId().getApplicationId());
assertEquals(rmAppPageUrl, applicationAttempt.getOriginalTrackingUrl());
assertEquals(rmAppPageUrl, applicationAttempt.getTrackingUrl());
}
@Test(timeout=20000)
public void testRunningExpire() {
Container amContainer = allocateApplicationAttempt();
launchApplicationAttempt(amContainer);
runApplicationAttempt(amContainer, "host", 8042, "oldtrackingurl");
applicationAttempt.handle(new RMAppAttemptEvent(
applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.EXPIRE));
assertEquals(RMAppAttemptState.FAILED,
applicationAttempt.getAppAttemptState());
assertTrue("expire diagnostics missing",
applicationAttempt.getDiagnostics().contains("timed out"));
String rmAppPageUrl = pjoin(RM_WEBAPP_ADDR, "cluster", "app",
applicationAttempt.getAppAttemptId().getApplicationId());
assertEquals(rmAppPageUrl, applicationAttempt.getOriginalTrackingUrl());
assertEquals(rmAppPageUrl, applicationAttempt.getTrackingUrl());
}
@Test
public void testUnregisterToKilledFinishing() {
Container amContainer = allocateApplicationAttempt();