Rebase onto latest trunk. minor conflicts

This commit is contained in:
Jian He 2017-08-30 22:48:35 -07:00
parent 59fe98cd50
commit bd96c4c235
2 changed files with 4 additions and 33 deletions

View File

@ -150,6 +150,8 @@ public class Apps {
}
// Check if should black list the node based on container exit status
@Private
@Unstable
public static boolean shouldCountTowardsNodeBlacklisting(int exitStatus) {
switch (exitStatus) {
case ContainerExitStatus.PREEMPTED:

View File

@ -107,6 +107,7 @@ import org.apache.hadoop.yarn.state.MultipleArcTransition;
import org.apache.hadoop.yarn.state.SingleArcTransition;
import org.apache.hadoop.yarn.state.StateMachine;
import org.apache.hadoop.yarn.state.StateMachineFactory;
import org.apache.hadoop.yarn.util.Apps;
import org.apache.hadoop.yarn.util.BoundedAppender;
import org.apache.hadoop.yarn.webapp.util.WebAppUtils;
@ -1539,38 +1540,6 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
}
}
private static boolean shouldCountTowardsNodeBlacklisting(int exitStatus) {
switch (exitStatus) {
case ContainerExitStatus.PREEMPTED:
case ContainerExitStatus.KILLED_BY_RESOURCEMANAGER:
case ContainerExitStatus.KILLED_BY_APPMASTER:
case ContainerExitStatus.KILLED_AFTER_APP_COMPLETION:
case ContainerExitStatus.ABORTED:
// Neither the app's fault nor the system's fault. This happens by design,
// so no need for skipping nodes
return false;
case ContainerExitStatus.DISKS_FAILED:
// This container is marked with this exit-status means that the node is
// already marked as unhealthy given that most of the disks failed. So, no
// need for any explicit skipping of nodes.
return false;
case ContainerExitStatus.KILLED_EXCEEDED_VMEM:
case ContainerExitStatus.KILLED_EXCEEDED_PMEM:
// No point in skipping the node as it's not the system's fault
return false;
case ContainerExitStatus.SUCCESS:
return false;
case ContainerExitStatus.INVALID:
// Ideally, this shouldn't be considered for skipping a node. But in
// reality, it seems like there are cases where we are not setting
// exit-code correctly and so it's better to be conservative. See
// YARN-4284.
return true;
default:
return true;
}
}
private static final class UnmanagedAMAttemptSavedTransition
extends AMLaunchedTransition {
@Override
@ -1954,7 +1923,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
containerFinishedEvent.getContainerStatus();
if (containerStatus != null) {
int exitStatus = containerStatus.getExitStatus();
if (shouldCountTowardsNodeBlacklisting(exitStatus)) {
if (Apps.shouldCountTowardsNodeBlacklisting(exitStatus)) {
appAttempt.addAMNodeToBlackList(nodeId);
}
} else {