YARN-2392. Add more diags about app retry limits on AM failures. Contributed by Steve Loughran
(cherry picked from commit 1970ca7cbc
)
This commit is contained in:
parent
ccc834395b
commit
2e99210e4e
|
@ -241,6 +241,9 @@ Release 2.8.0 - UNRELEASED
|
||||||
YARN-3467. Expose allocatedMB, allocatedVCores, and runningContainers metrics on
|
YARN-3467. Expose allocatedMB, allocatedVCores, and runningContainers metrics on
|
||||||
running Applications in RM Web UI. (Anubhav Dhoot via kasha)
|
running Applications in RM Web UI. (Anubhav Dhoot via kasha)
|
||||||
|
|
||||||
|
YARN-2392. Add more diags about app retry limits on AM failures. (Steve
|
||||||
|
Loughran via jianhe)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
YARN-3339. TestDockerContainerExecutor should pull a single image and not
|
YARN-3339. TestDockerContainerExecutor should pull a single image and not
|
||||||
|
|
|
@ -1014,9 +1014,19 @@ public class RMAppImpl implements RMApp, Recoverable {
|
||||||
+ " failed due to " + failedEvent.getDiagnostics()
|
+ " failed due to " + failedEvent.getDiagnostics()
|
||||||
+ ". Failing the application.";
|
+ ". Failing the application.";
|
||||||
} else if (this.isNumAttemptsBeyondThreshold) {
|
} else if (this.isNumAttemptsBeyondThreshold) {
|
||||||
msg = "Application " + this.getApplicationId() + " failed "
|
int globalLimit = conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
|
||||||
+ this.maxAppAttempts + " times due to "
|
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
|
||||||
+ failedEvent.getDiagnostics() + ". Failing the application.";
|
msg = String.format(
|
||||||
|
"Application %s failed %d times%s%s due to %s. Failing the application.",
|
||||||
|
getApplicationId(),
|
||||||
|
maxAppAttempts,
|
||||||
|
(attemptFailuresValidityInterval <= 0 ? ""
|
||||||
|
: (" in previous " + attemptFailuresValidityInterval
|
||||||
|
+ " milliseconds")),
|
||||||
|
(globalLimit == maxAppAttempts) ? ""
|
||||||
|
: (" (global limit =" + globalLimit
|
||||||
|
+ "; local limit is =" + maxAppAttempts + ")"),
|
||||||
|
failedEvent.getDiagnostics());
|
||||||
}
|
}
|
||||||
return msg;
|
return msg;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1459,9 +1459,9 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
||||||
.append(status.getDiagnostics());
|
.append(status.getDiagnostics());
|
||||||
if (this.getTrackingUrl() != null) {
|
if (this.getTrackingUrl() != null) {
|
||||||
diagnosticsBuilder.append("For more detailed output,").append(
|
diagnosticsBuilder.append("For more detailed output,").append(
|
||||||
" check application tracking page: ").append(
|
" check the application tracking page: ").append(
|
||||||
this.getTrackingUrl()).append(
|
this.getTrackingUrl()).append(
|
||||||
" Then, click on links to logs of each attempt.\n");
|
" Then click on links to logs of each attempt.\n");
|
||||||
}
|
}
|
||||||
return diagnosticsBuilder.toString();
|
return diagnosticsBuilder.toString();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue