YARN-4744. Too many signal to container failure in case of LCE. Contributed by Sidharta Seethana
(cherry picked from commit 059caf9989
)
This commit is contained in:
parent
5f059e03f5
commit
53ec7c9243
|
@ -166,7 +166,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
|
|||
// verify configuration/permissions and exit
|
||||
try {
|
||||
PrivilegedOperation checkSetupOp = new PrivilegedOperation(
|
||||
PrivilegedOperation.OperationType.CHECK_SETUP, (String) null);
|
||||
PrivilegedOperation.OperationType.CHECK_SETUP);
|
||||
PrivilegedOperationExecutor privilegedOperationExecutor =
|
||||
PrivilegedOperationExecutor.getInstance(conf);
|
||||
|
||||
|
@ -222,7 +222,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
|
|||
verifyUsernamePattern(user);
|
||||
String runAsUser = getRunAsUser(user);
|
||||
PrivilegedOperation initializeContainerOp = new PrivilegedOperation(
|
||||
PrivilegedOperation.OperationType.INITIALIZE_CONTAINER, (String) null);
|
||||
PrivilegedOperation.OperationType.INITIALIZE_CONTAINER);
|
||||
List<String> prefixCommands = new ArrayList<>();
|
||||
|
||||
addSchedPriorityCommand(prefixCommands);
|
||||
|
|
|
@ -68,10 +68,16 @@ public class PrivilegedOperation {
|
|||
|
||||
private final OperationType opType;
|
||||
private final List<String> args;
|
||||
private boolean failureLogging;
|
||||
|
||||
public PrivilegedOperation(OperationType opType, String arg) {
|
||||
public PrivilegedOperation(OperationType opType) {
|
||||
this.opType = opType;
|
||||
this.args = new ArrayList<String>();
|
||||
this.failureLogging = true;
|
||||
}
|
||||
|
||||
public PrivilegedOperation(OperationType opType, String arg) {
|
||||
this(opType);
|
||||
|
||||
if (arg != null) {
|
||||
this.args.add(arg);
|
||||
|
@ -79,8 +85,7 @@ public class PrivilegedOperation {
|
|||
}
|
||||
|
||||
public PrivilegedOperation(OperationType opType, List<String> args) {
|
||||
this.opType = opType;
|
||||
this.args = new ArrayList<String>();
|
||||
this(opType);
|
||||
|
||||
if (args != null) {
|
||||
this.args.addAll(args);
|
||||
|
@ -97,6 +102,18 @@ public class PrivilegedOperation {
|
|||
this.args.addAll(args);
|
||||
}
|
||||
|
||||
public void enableFailureLogging() {
|
||||
this.failureLogging = true;
|
||||
}
|
||||
|
||||
public void disableFailureLogging() {
|
||||
this.failureLogging = false;
|
||||
}
|
||||
|
||||
public boolean isFailureLoggingEnabled() {
|
||||
return failureLogging;
|
||||
}
|
||||
|
||||
public OperationType getOperationType() {
|
||||
return opType;
|
||||
}
|
||||
|
|
|
@ -155,17 +155,20 @@ public class PrivilegedOperationExecutor {
|
|||
LOG.debug(exec.getOutput());
|
||||
}
|
||||
} catch (ExitCodeException e) {
|
||||
StringBuilder logBuilder = new StringBuilder("Shell execution returned "
|
||||
+ "exit code: ")
|
||||
.append(exec.getExitCode())
|
||||
.append(". Privileged Execution Operation Output: ")
|
||||
.append(System.lineSeparator()).append(exec.getOutput());
|
||||
if (operation.isFailureLoggingEnabled()) {
|
||||
|
||||
logBuilder.append("Full command array for failed execution: ")
|
||||
.append(System.lineSeparator());
|
||||
logBuilder.append(Arrays.toString(fullCommandArray));
|
||||
StringBuilder logBuilder = new StringBuilder("Shell execution returned "
|
||||
+ "exit code: ")
|
||||
.append(exec.getExitCode())
|
||||
.append(". Privileged Execution Operation Output: ")
|
||||
.append(System.lineSeparator()).append(exec.getOutput());
|
||||
|
||||
LOG.warn(logBuilder.toString());
|
||||
logBuilder.append("Full command array for failed execution: ")
|
||||
.append(System.lineSeparator());
|
||||
logBuilder.append(Arrays.toString(fullCommandArray));
|
||||
|
||||
LOG.warn(logBuilder.toString());
|
||||
}
|
||||
|
||||
//stderr from shell executor seems to be stuffed into the exception
|
||||
//'message' - so, we have to extract it and set it as the error out
|
||||
|
|
|
@ -247,7 +247,7 @@ class CGroupsHandlerImpl implements CGroupsHandler {
|
|||
.append(controller.getName()).append('=').append(controllerPath);
|
||||
PrivilegedOperation.OperationType opType = PrivilegedOperation
|
||||
.OperationType.MOUNT_CGROUPS;
|
||||
PrivilegedOperation op = new PrivilegedOperation(opType, (String) null);
|
||||
PrivilegedOperation op = new PrivilegedOperation(opType);
|
||||
|
||||
op.appendArgs(hierarchy, cGroupKV.toString());
|
||||
LOG.info("Mounting controller " + controller.getName() + " at " +
|
||||
|
|
|
@ -546,7 +546,7 @@ import java.util.regex.Pattern;
|
|||
case TC_MODIFY_STATE:
|
||||
case TC_READ_STATE:
|
||||
case TC_READ_STATS:
|
||||
operation = new PrivilegedOperation(opType, (String) null);
|
||||
operation = new PrivilegedOperation(opType);
|
||||
commands = new ArrayList<>();
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -67,7 +67,7 @@ public class DefaultLinuxContainerRuntime implements LinuxContainerRuntime {
|
|||
throws ContainerExecutionException {
|
||||
Container container = ctx.getContainer();
|
||||
PrivilegedOperation launchOp = new PrivilegedOperation(
|
||||
PrivilegedOperation.OperationType.LAUNCH_CONTAINER, (String) null);
|
||||
PrivilegedOperation.OperationType.LAUNCH_CONTAINER);
|
||||
|
||||
//All of these arguments are expected to be available in the runtime context
|
||||
launchOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER),
|
||||
|
@ -116,7 +116,7 @@ public class DefaultLinuxContainerRuntime implements LinuxContainerRuntime {
|
|||
throws ContainerExecutionException {
|
||||
Container container = ctx.getContainer();
|
||||
PrivilegedOperation signalOp = new PrivilegedOperation(
|
||||
PrivilegedOperation.OperationType.SIGNAL_CONTAINER, (String) null);
|
||||
PrivilegedOperation.OperationType.SIGNAL_CONTAINER);
|
||||
|
||||
signalOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER),
|
||||
ctx.getExecutionAttribute(USER),
|
||||
|
@ -125,6 +125,9 @@ public class DefaultLinuxContainerRuntime implements LinuxContainerRuntime {
|
|||
ctx.getExecutionAttribute(PID),
|
||||
Integer.toString(ctx.getExecutionAttribute(SIGNAL).getValue()));
|
||||
|
||||
//Some failures here are acceptable. Let the calling executor decide.
|
||||
signalOp.disableFailureLogging();
|
||||
|
||||
try {
|
||||
PrivilegedOperationExecutor executor = PrivilegedOperationExecutor
|
||||
.getInstance(conf);
|
||||
|
@ -133,8 +136,8 @@ public class DefaultLinuxContainerRuntime implements LinuxContainerRuntime {
|
|||
signalOp, null, container.getLaunchContext().getEnvironment(),
|
||||
false);
|
||||
} catch (PrivilegedOperationException e) {
|
||||
LOG.warn("Signal container failed. Exception: ", e);
|
||||
|
||||
//Don't log the failure here. Some kinds of signaling failures are
|
||||
// acceptable. Let the calling executor decide what to do.
|
||||
throw new ContainerExecutionException("Signal container failed", e
|
||||
.getExitCode(), e.getOutput(), e.getErrorOutput());
|
||||
}
|
||||
|
|
|
@ -285,8 +285,7 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
|
|||
String commandFile = dockerClient.writeCommandToTempFile(runCommand,
|
||||
containerIdStr);
|
||||
PrivilegedOperation launchOp = new PrivilegedOperation(
|
||||
PrivilegedOperation.OperationType.LAUNCH_DOCKER_CONTAINER, (String)
|
||||
null);
|
||||
PrivilegedOperation.OperationType.LAUNCH_DOCKER_CONTAINER);
|
||||
|
||||
launchOp.appendArgs(runAsUser, ctx.getExecutionAttribute(USER),
|
||||
Integer.toString(PrivilegedOperation
|
||||
|
@ -326,7 +325,7 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
|
|||
throws ContainerExecutionException {
|
||||
Container container = ctx.getContainer();
|
||||
PrivilegedOperation signalOp = new PrivilegedOperation(
|
||||
PrivilegedOperation.OperationType.SIGNAL_CONTAINER, (String) null);
|
||||
PrivilegedOperation.OperationType.SIGNAL_CONTAINER);
|
||||
|
||||
signalOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER),
|
||||
ctx.getExecutionAttribute(USER),
|
||||
|
|
|
@ -69,7 +69,7 @@ public class TestPrivilegedOperationExecutor {
|
|||
cGroupTasks2 = "net_cls/hadoop_yarn/container_01/tasks";
|
||||
cGroupTasks3 = "blkio/hadoop_yarn/container_01/tasks";
|
||||
opDisallowed = new PrivilegedOperation
|
||||
(PrivilegedOperation.OperationType.DELETE_AS_USER, (String) null);
|
||||
(PrivilegedOperation.OperationType.DELETE_AS_USER);
|
||||
opTasksNone = new PrivilegedOperation
|
||||
(PrivilegedOperation.OperationType.ADD_PID_TO_CGROUP,
|
||||
PrivilegedOperation.CGROUP_ARG_PREFIX + cGroupTasksNone);
|
||||
|
@ -118,7 +118,7 @@ public class TestPrivilegedOperationExecutor {
|
|||
PrivilegedOperationExecutor exec = PrivilegedOperationExecutor
|
||||
.getInstance(confWithExecutorPath);
|
||||
PrivilegedOperation op = new PrivilegedOperation(PrivilegedOperation
|
||||
.OperationType.TC_MODIFY_STATE, (String) null);
|
||||
.OperationType.TC_MODIFY_STATE);
|
||||
String[] cmdArray = exec.getPrivilegedOperationExecutionCommand(null, op);
|
||||
|
||||
//No arguments added - so the resulting array should consist of
|
||||
|
|
|
@ -89,7 +89,7 @@ public class TestCGroupsHandlerImpl {
|
|||
cGroupsHandler = new CGroupsHandlerImpl(conf,
|
||||
privilegedOperationExecutorMock);
|
||||
PrivilegedOperation expectedOp = new PrivilegedOperation(
|
||||
PrivilegedOperation.OperationType.MOUNT_CGROUPS, (String) null);
|
||||
PrivilegedOperation.OperationType.MOUNT_CGROUPS);
|
||||
//This is expected to be of the form :
|
||||
//net_cls=<mount_path>/net_cls
|
||||
StringBuffer controllerKV = new StringBuffer(controller.getName())
|
||||
|
|
Loading…
Reference in New Issue