From f9b48206ddae91835e19d965da566e5b4fc47c64 Mon Sep 17 00:00:00 2001 From: Robert Kanter Date: Wed, 28 Dec 2016 15:21:52 -0800 Subject: [PATCH] YARN-4882. Change the log level to DEBUG for recovering completed applications (templedf via rkanter) (cherry picked from commit f216276d2164c6564632c571fd3adbb03bc8b3e4) --- .../server/resourcemanager/RMAppManager.java | 13 ++++++-- .../resourcemanager/rmapp/RMAppImpl.java | 31 ++++++++++++++----- .../rmapp/attempt/RMAppAttemptImpl.java | 27 +++++++++++++--- 3 files changed, 57 insertions(+), 14 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java index 003f1b2a050..d18a5f6858c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java @@ -480,8 +480,17 @@ public class RMAppManager implements EventHandler, Map appStates = state.getApplicationState(); LOG.info("Recovering " + appStates.size() + " applications"); - for (ApplicationStateData appState : appStates.values()) { - recoverApplication(appState, state); + + int count = 0; + + try { + for (ApplicationStateData appState : appStates.values()) { + recoverApplication(appState, state); + count += 1; + } + } finally { + LOG.info("Successfully recovered " + count + " out of " + + appStates.size() + " applications"); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 7b6da591f0a..71b5ab76915 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -127,6 +127,10 @@ public class RMAppImpl implements RMApp, Recoverable { private static final EnumSet COMPLETED_APP_STATES = EnumSet.of(RMAppState.FINISHED, RMAppState.FINISHING, RMAppState.FAILED, RMAppState.KILLED, RMAppState.FINAL_SAVING, RMAppState.KILLING); + private static final String STATE_CHANGE_MESSAGE = + "%s State change from %s to %s on event = %s"; + private static final String RECOVERY_MESSAGE = + "Recovering app: %s with %d attempts and final state = %s"; // Immutable fields private final ApplicationId applicationId; @@ -838,9 +842,16 @@ public class RMAppImpl implements RMApp, Recoverable { /* TODO fail the application on the failed transition */ } - if (oldState != getState()) { - LOG.info(appID + " State change from " + oldState + " to " - + getState() + " on event=" + event.getType()); + // Log at INFO if we're not recovering or not in a terminal state. + // Log at DEBUG otherwise. + if ((oldState != getState()) && + (((recoveredFinalState == null)) || + (event.getType() != RMAppEventType.RECOVER))) { + LOG.info(String.format(STATE_CHANGE_MESSAGE, appID, oldState, + getState(), event.getType())); + } else if ((oldState != getState()) && LOG.isDebugEnabled()) { + LOG.debug(String.format(STATE_CHANGE_MESSAGE, appID, oldState, + getState(), event.getType())); } } finally { this.writeLock.unlock(); @@ -852,9 +863,15 @@ public class RMAppImpl implements RMApp, Recoverable { ApplicationStateData appState = state.getApplicationState().get(getApplicationId()); this.recoveredFinalState = appState.getState(); - LOG.info("Recovering app: " + getApplicationId() + " with " + - + appState.getAttemptCount() + " attempts and final state = " - + this.recoveredFinalState ); + + if (recoveredFinalState == null) { + LOG.info(String.format(RECOVERY_MESSAGE, getApplicationId(), + appState.getAttemptCount(), "NONE")); + } else if (LOG.isDebugEnabled()) { + LOG.debug(String.format(RECOVERY_MESSAGE, getApplicationId(), + appState.getAttemptCount(), recoveredFinalState)); + } + this.diagnostics.append(null == appState.getDiagnostics() ? "" : appState .getDiagnostics()); this.storedFinishTime = appState.getFinishTime(); @@ -1944,4 +1961,4 @@ public class RMAppImpl implements RMApp, Recoverable { public void setApplicationPriority(Priority applicationPriority) { this.applicationPriority = applicationPriority; } -} \ No newline at end of file +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 9e0e0edcb08..b1bc69e69a9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -114,6 +114,10 @@ import com.google.common.annotations.VisibleForTesting; @SuppressWarnings({"unchecked", "rawtypes"}) public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { + private static final String STATE_CHANGE_MESSAGE = + "%s State change from %s to %s on event = %s"; + private static final String RECOVERY_MESSAGE = + "Recovering attempt: %s with final state = %s"; private static final Log LOG = LogFactory.getLog(RMAppAttemptImpl.class); @@ -868,9 +872,16 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { /* TODO fail the application on the failed transition */ } - if (oldState != getAppAttemptState()) { - LOG.info(appAttemptID + " State change from " + oldState + " to " - + getAppAttemptState()); + // Log at INFO if we're not recovering or not in a terminal state. + // Log at DEBUG otherwise. + if ((oldState != getAppAttemptState()) && + ((recoveredFinalState == null) || + (event.getType() != RMAppAttemptEventType.RECOVER))) { + LOG.info(String.format(STATE_CHANGE_MESSAGE, appAttemptID, oldState, + getAppAttemptState(), event.getType())); + } else if ((oldState != getAppAttemptState()) && LOG.isDebugEnabled()) { + LOG.debug(String.format(STATE_CHANGE_MESSAGE, appAttemptID, oldState, + getAppAttemptState(), event.getType())); } } finally { this.writeLock.unlock(); @@ -907,8 +918,14 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { ApplicationAttemptStateData attemptState = appState.getAttempt(getAppAttemptId()); assert attemptState != null; - LOG.info("Recovering attempt: " + getAppAttemptId() + " with final state: " - + attemptState.getState()); + + if (attemptState.getState() == null) { + LOG.info(String.format(RECOVERY_MESSAGE, getAppAttemptId(), "NONE")); + } else if (LOG.isDebugEnabled()) { + LOG.debug(String.format(RECOVERY_MESSAGE, getAppAttemptId(), + attemptState.getState())); + } + diagnostics.append("Attempt recovered after RM restart"); diagnostics.append(attemptState.getDiagnostics()); this.amContainerExitStatus = attemptState.getAMContainerExitStatus();