YARN-4744. Too many signal to container failure in case of LCE. Contributed by Sidharta Seethana

(cherry picked from commit 059caf9989)
This commit is contained in:
Jason Lowe 2016-03-07 15:40:01 +00:00
parent 5f059e03f5
commit 53ec7c9243
9 changed files with 48 additions and 26 deletions

View File

@ -166,7 +166,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
// verify configuration/permissions and exit // verify configuration/permissions and exit
try { try {
PrivilegedOperation checkSetupOp = new PrivilegedOperation( PrivilegedOperation checkSetupOp = new PrivilegedOperation(
PrivilegedOperation.OperationType.CHECK_SETUP, (String) null); PrivilegedOperation.OperationType.CHECK_SETUP);
PrivilegedOperationExecutor privilegedOperationExecutor = PrivilegedOperationExecutor privilegedOperationExecutor =
PrivilegedOperationExecutor.getInstance(conf); PrivilegedOperationExecutor.getInstance(conf);
@ -222,7 +222,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
verifyUsernamePattern(user); verifyUsernamePattern(user);
String runAsUser = getRunAsUser(user); String runAsUser = getRunAsUser(user);
PrivilegedOperation initializeContainerOp = new PrivilegedOperation( PrivilegedOperation initializeContainerOp = new PrivilegedOperation(
PrivilegedOperation.OperationType.INITIALIZE_CONTAINER, (String) null); PrivilegedOperation.OperationType.INITIALIZE_CONTAINER);
List<String> prefixCommands = new ArrayList<>(); List<String> prefixCommands = new ArrayList<>();
addSchedPriorityCommand(prefixCommands); addSchedPriorityCommand(prefixCommands);

View File

@ -68,10 +68,16 @@ public class PrivilegedOperation {
private final OperationType opType; private final OperationType opType;
private final List<String> args; private final List<String> args;
private boolean failureLogging;
public PrivilegedOperation(OperationType opType, String arg) { public PrivilegedOperation(OperationType opType) {
this.opType = opType; this.opType = opType;
this.args = new ArrayList<String>(); this.args = new ArrayList<String>();
this.failureLogging = true;
}
public PrivilegedOperation(OperationType opType, String arg) {
this(opType);
if (arg != null) { if (arg != null) {
this.args.add(arg); this.args.add(arg);
@ -79,8 +85,7 @@ public class PrivilegedOperation {
} }
public PrivilegedOperation(OperationType opType, List<String> args) { public PrivilegedOperation(OperationType opType, List<String> args) {
this.opType = opType; this(opType);
this.args = new ArrayList<String>();
if (args != null) { if (args != null) {
this.args.addAll(args); this.args.addAll(args);
@ -97,6 +102,18 @@ public class PrivilegedOperation {
this.args.addAll(args); this.args.addAll(args);
} }
public void enableFailureLogging() {
this.failureLogging = true;
}
public void disableFailureLogging() {
this.failureLogging = false;
}
public boolean isFailureLoggingEnabled() {
return failureLogging;
}
public OperationType getOperationType() { public OperationType getOperationType() {
return opType; return opType;
} }

View File

@ -155,17 +155,20 @@ public class PrivilegedOperationExecutor {
LOG.debug(exec.getOutput()); LOG.debug(exec.getOutput());
} }
} catch (ExitCodeException e) { } catch (ExitCodeException e) {
StringBuilder logBuilder = new StringBuilder("Shell execution returned " if (operation.isFailureLoggingEnabled()) {
+ "exit code: ")
.append(exec.getExitCode())
.append(". Privileged Execution Operation Output: ")
.append(System.lineSeparator()).append(exec.getOutput());
logBuilder.append("Full command array for failed execution: ") StringBuilder logBuilder = new StringBuilder("Shell execution returned "
.append(System.lineSeparator()); + "exit code: ")
logBuilder.append(Arrays.toString(fullCommandArray)); .append(exec.getExitCode())
.append(". Privileged Execution Operation Output: ")
.append(System.lineSeparator()).append(exec.getOutput());
LOG.warn(logBuilder.toString()); logBuilder.append("Full command array for failed execution: ")
.append(System.lineSeparator());
logBuilder.append(Arrays.toString(fullCommandArray));
LOG.warn(logBuilder.toString());
}
//stderr from shell executor seems to be stuffed into the exception //stderr from shell executor seems to be stuffed into the exception
//'message' - so, we have to extract it and set it as the error out //'message' - so, we have to extract it and set it as the error out

View File

@ -247,7 +247,7 @@ class CGroupsHandlerImpl implements CGroupsHandler {
.append(controller.getName()).append('=').append(controllerPath); .append(controller.getName()).append('=').append(controllerPath);
PrivilegedOperation.OperationType opType = PrivilegedOperation PrivilegedOperation.OperationType opType = PrivilegedOperation
.OperationType.MOUNT_CGROUPS; .OperationType.MOUNT_CGROUPS;
PrivilegedOperation op = new PrivilegedOperation(opType, (String) null); PrivilegedOperation op = new PrivilegedOperation(opType);
op.appendArgs(hierarchy, cGroupKV.toString()); op.appendArgs(hierarchy, cGroupKV.toString());
LOG.info("Mounting controller " + controller.getName() + " at " + LOG.info("Mounting controller " + controller.getName() + " at " +

View File

@ -546,7 +546,7 @@ import java.util.regex.Pattern;
case TC_MODIFY_STATE: case TC_MODIFY_STATE:
case TC_READ_STATE: case TC_READ_STATE:
case TC_READ_STATS: case TC_READ_STATS:
operation = new PrivilegedOperation(opType, (String) null); operation = new PrivilegedOperation(opType);
commands = new ArrayList<>(); commands = new ArrayList<>();
break; break;
default: default:

View File

@ -67,7 +67,7 @@ public class DefaultLinuxContainerRuntime implements LinuxContainerRuntime {
throws ContainerExecutionException { throws ContainerExecutionException {
Container container = ctx.getContainer(); Container container = ctx.getContainer();
PrivilegedOperation launchOp = new PrivilegedOperation( PrivilegedOperation launchOp = new PrivilegedOperation(
PrivilegedOperation.OperationType.LAUNCH_CONTAINER, (String) null); PrivilegedOperation.OperationType.LAUNCH_CONTAINER);
//All of these arguments are expected to be available in the runtime context //All of these arguments are expected to be available in the runtime context
launchOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER), launchOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER),
@ -116,7 +116,7 @@ public class DefaultLinuxContainerRuntime implements LinuxContainerRuntime {
throws ContainerExecutionException { throws ContainerExecutionException {
Container container = ctx.getContainer(); Container container = ctx.getContainer();
PrivilegedOperation signalOp = new PrivilegedOperation( PrivilegedOperation signalOp = new PrivilegedOperation(
PrivilegedOperation.OperationType.SIGNAL_CONTAINER, (String) null); PrivilegedOperation.OperationType.SIGNAL_CONTAINER);
signalOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER), signalOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER),
ctx.getExecutionAttribute(USER), ctx.getExecutionAttribute(USER),
@ -125,6 +125,9 @@ public class DefaultLinuxContainerRuntime implements LinuxContainerRuntime {
ctx.getExecutionAttribute(PID), ctx.getExecutionAttribute(PID),
Integer.toString(ctx.getExecutionAttribute(SIGNAL).getValue())); Integer.toString(ctx.getExecutionAttribute(SIGNAL).getValue()));
//Some failures here are acceptable. Let the calling executor decide.
signalOp.disableFailureLogging();
try { try {
PrivilegedOperationExecutor executor = PrivilegedOperationExecutor PrivilegedOperationExecutor executor = PrivilegedOperationExecutor
.getInstance(conf); .getInstance(conf);
@ -133,8 +136,8 @@ public class DefaultLinuxContainerRuntime implements LinuxContainerRuntime {
signalOp, null, container.getLaunchContext().getEnvironment(), signalOp, null, container.getLaunchContext().getEnvironment(),
false); false);
} catch (PrivilegedOperationException e) { } catch (PrivilegedOperationException e) {
LOG.warn("Signal container failed. Exception: ", e); //Don't log the failure here. Some kinds of signaling failures are
// acceptable. Let the calling executor decide what to do.
throw new ContainerExecutionException("Signal container failed", e throw new ContainerExecutionException("Signal container failed", e
.getExitCode(), e.getOutput(), e.getErrorOutput()); .getExitCode(), e.getOutput(), e.getErrorOutput());
} }

View File

@ -285,8 +285,7 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
String commandFile = dockerClient.writeCommandToTempFile(runCommand, String commandFile = dockerClient.writeCommandToTempFile(runCommand,
containerIdStr); containerIdStr);
PrivilegedOperation launchOp = new PrivilegedOperation( PrivilegedOperation launchOp = new PrivilegedOperation(
PrivilegedOperation.OperationType.LAUNCH_DOCKER_CONTAINER, (String) PrivilegedOperation.OperationType.LAUNCH_DOCKER_CONTAINER);
null);
launchOp.appendArgs(runAsUser, ctx.getExecutionAttribute(USER), launchOp.appendArgs(runAsUser, ctx.getExecutionAttribute(USER),
Integer.toString(PrivilegedOperation Integer.toString(PrivilegedOperation
@ -326,7 +325,7 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
throws ContainerExecutionException { throws ContainerExecutionException {
Container container = ctx.getContainer(); Container container = ctx.getContainer();
PrivilegedOperation signalOp = new PrivilegedOperation( PrivilegedOperation signalOp = new PrivilegedOperation(
PrivilegedOperation.OperationType.SIGNAL_CONTAINER, (String) null); PrivilegedOperation.OperationType.SIGNAL_CONTAINER);
signalOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER), signalOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER),
ctx.getExecutionAttribute(USER), ctx.getExecutionAttribute(USER),

View File

@ -69,7 +69,7 @@ public class TestPrivilegedOperationExecutor {
cGroupTasks2 = "net_cls/hadoop_yarn/container_01/tasks"; cGroupTasks2 = "net_cls/hadoop_yarn/container_01/tasks";
cGroupTasks3 = "blkio/hadoop_yarn/container_01/tasks"; cGroupTasks3 = "blkio/hadoop_yarn/container_01/tasks";
opDisallowed = new PrivilegedOperation opDisallowed = new PrivilegedOperation
(PrivilegedOperation.OperationType.DELETE_AS_USER, (String) null); (PrivilegedOperation.OperationType.DELETE_AS_USER);
opTasksNone = new PrivilegedOperation opTasksNone = new PrivilegedOperation
(PrivilegedOperation.OperationType.ADD_PID_TO_CGROUP, (PrivilegedOperation.OperationType.ADD_PID_TO_CGROUP,
PrivilegedOperation.CGROUP_ARG_PREFIX + cGroupTasksNone); PrivilegedOperation.CGROUP_ARG_PREFIX + cGroupTasksNone);
@ -118,7 +118,7 @@ public class TestPrivilegedOperationExecutor {
PrivilegedOperationExecutor exec = PrivilegedOperationExecutor PrivilegedOperationExecutor exec = PrivilegedOperationExecutor
.getInstance(confWithExecutorPath); .getInstance(confWithExecutorPath);
PrivilegedOperation op = new PrivilegedOperation(PrivilegedOperation PrivilegedOperation op = new PrivilegedOperation(PrivilegedOperation
.OperationType.TC_MODIFY_STATE, (String) null); .OperationType.TC_MODIFY_STATE);
String[] cmdArray = exec.getPrivilegedOperationExecutionCommand(null, op); String[] cmdArray = exec.getPrivilegedOperationExecutionCommand(null, op);
//No arguments added - so the resulting array should consist of //No arguments added - so the resulting array should consist of

View File

@ -89,7 +89,7 @@ public class TestCGroupsHandlerImpl {
cGroupsHandler = new CGroupsHandlerImpl(conf, cGroupsHandler = new CGroupsHandlerImpl(conf,
privilegedOperationExecutorMock); privilegedOperationExecutorMock);
PrivilegedOperation expectedOp = new PrivilegedOperation( PrivilegedOperation expectedOp = new PrivilegedOperation(
PrivilegedOperation.OperationType.MOUNT_CGROUPS, (String) null); PrivilegedOperation.OperationType.MOUNT_CGROUPS);
//This is expected to be of the form : //This is expected to be of the form :
//net_cls=<mount_path>/net_cls //net_cls=<mount_path>/net_cls
StringBuffer controllerKV = new StringBuffer(controller.getName()) StringBuffer controllerKV = new StringBuffer(controller.getName())