YARN-4744. Too many signal to container failure in case of LCE. Contributed by Sidharta Seethana
(cherry picked from commit 059caf9989
)
This commit is contained in:
parent
5f059e03f5
commit
53ec7c9243
|
@ -166,7 +166,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
|
||||||
// verify configuration/permissions and exit
|
// verify configuration/permissions and exit
|
||||||
try {
|
try {
|
||||||
PrivilegedOperation checkSetupOp = new PrivilegedOperation(
|
PrivilegedOperation checkSetupOp = new PrivilegedOperation(
|
||||||
PrivilegedOperation.OperationType.CHECK_SETUP, (String) null);
|
PrivilegedOperation.OperationType.CHECK_SETUP);
|
||||||
PrivilegedOperationExecutor privilegedOperationExecutor =
|
PrivilegedOperationExecutor privilegedOperationExecutor =
|
||||||
PrivilegedOperationExecutor.getInstance(conf);
|
PrivilegedOperationExecutor.getInstance(conf);
|
||||||
|
|
||||||
|
@ -222,7 +222,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
|
||||||
verifyUsernamePattern(user);
|
verifyUsernamePattern(user);
|
||||||
String runAsUser = getRunAsUser(user);
|
String runAsUser = getRunAsUser(user);
|
||||||
PrivilegedOperation initializeContainerOp = new PrivilegedOperation(
|
PrivilegedOperation initializeContainerOp = new PrivilegedOperation(
|
||||||
PrivilegedOperation.OperationType.INITIALIZE_CONTAINER, (String) null);
|
PrivilegedOperation.OperationType.INITIALIZE_CONTAINER);
|
||||||
List<String> prefixCommands = new ArrayList<>();
|
List<String> prefixCommands = new ArrayList<>();
|
||||||
|
|
||||||
addSchedPriorityCommand(prefixCommands);
|
addSchedPriorityCommand(prefixCommands);
|
||||||
|
|
|
@ -68,10 +68,16 @@ public class PrivilegedOperation {
|
||||||
|
|
||||||
private final OperationType opType;
|
private final OperationType opType;
|
||||||
private final List<String> args;
|
private final List<String> args;
|
||||||
|
private boolean failureLogging;
|
||||||
|
|
||||||
public PrivilegedOperation(OperationType opType, String arg) {
|
public PrivilegedOperation(OperationType opType) {
|
||||||
this.opType = opType;
|
this.opType = opType;
|
||||||
this.args = new ArrayList<String>();
|
this.args = new ArrayList<String>();
|
||||||
|
this.failureLogging = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public PrivilegedOperation(OperationType opType, String arg) {
|
||||||
|
this(opType);
|
||||||
|
|
||||||
if (arg != null) {
|
if (arg != null) {
|
||||||
this.args.add(arg);
|
this.args.add(arg);
|
||||||
|
@ -79,8 +85,7 @@ public class PrivilegedOperation {
|
||||||
}
|
}
|
||||||
|
|
||||||
public PrivilegedOperation(OperationType opType, List<String> args) {
|
public PrivilegedOperation(OperationType opType, List<String> args) {
|
||||||
this.opType = opType;
|
this(opType);
|
||||||
this.args = new ArrayList<String>();
|
|
||||||
|
|
||||||
if (args != null) {
|
if (args != null) {
|
||||||
this.args.addAll(args);
|
this.args.addAll(args);
|
||||||
|
@ -97,6 +102,18 @@ public class PrivilegedOperation {
|
||||||
this.args.addAll(args);
|
this.args.addAll(args);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void enableFailureLogging() {
|
||||||
|
this.failureLogging = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void disableFailureLogging() {
|
||||||
|
this.failureLogging = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isFailureLoggingEnabled() {
|
||||||
|
return failureLogging;
|
||||||
|
}
|
||||||
|
|
||||||
public OperationType getOperationType() {
|
public OperationType getOperationType() {
|
||||||
return opType;
|
return opType;
|
||||||
}
|
}
|
||||||
|
|
|
@ -155,17 +155,20 @@ public class PrivilegedOperationExecutor {
|
||||||
LOG.debug(exec.getOutput());
|
LOG.debug(exec.getOutput());
|
||||||
}
|
}
|
||||||
} catch (ExitCodeException e) {
|
} catch (ExitCodeException e) {
|
||||||
StringBuilder logBuilder = new StringBuilder("Shell execution returned "
|
if (operation.isFailureLoggingEnabled()) {
|
||||||
+ "exit code: ")
|
|
||||||
.append(exec.getExitCode())
|
|
||||||
.append(". Privileged Execution Operation Output: ")
|
|
||||||
.append(System.lineSeparator()).append(exec.getOutput());
|
|
||||||
|
|
||||||
logBuilder.append("Full command array for failed execution: ")
|
StringBuilder logBuilder = new StringBuilder("Shell execution returned "
|
||||||
.append(System.lineSeparator());
|
+ "exit code: ")
|
||||||
logBuilder.append(Arrays.toString(fullCommandArray));
|
.append(exec.getExitCode())
|
||||||
|
.append(". Privileged Execution Operation Output: ")
|
||||||
|
.append(System.lineSeparator()).append(exec.getOutput());
|
||||||
|
|
||||||
LOG.warn(logBuilder.toString());
|
logBuilder.append("Full command array for failed execution: ")
|
||||||
|
.append(System.lineSeparator());
|
||||||
|
logBuilder.append(Arrays.toString(fullCommandArray));
|
||||||
|
|
||||||
|
LOG.warn(logBuilder.toString());
|
||||||
|
}
|
||||||
|
|
||||||
//stderr from shell executor seems to be stuffed into the exception
|
//stderr from shell executor seems to be stuffed into the exception
|
||||||
//'message' - so, we have to extract it and set it as the error out
|
//'message' - so, we have to extract it and set it as the error out
|
||||||
|
|
|
@ -247,7 +247,7 @@ class CGroupsHandlerImpl implements CGroupsHandler {
|
||||||
.append(controller.getName()).append('=').append(controllerPath);
|
.append(controller.getName()).append('=').append(controllerPath);
|
||||||
PrivilegedOperation.OperationType opType = PrivilegedOperation
|
PrivilegedOperation.OperationType opType = PrivilegedOperation
|
||||||
.OperationType.MOUNT_CGROUPS;
|
.OperationType.MOUNT_CGROUPS;
|
||||||
PrivilegedOperation op = new PrivilegedOperation(opType, (String) null);
|
PrivilegedOperation op = new PrivilegedOperation(opType);
|
||||||
|
|
||||||
op.appendArgs(hierarchy, cGroupKV.toString());
|
op.appendArgs(hierarchy, cGroupKV.toString());
|
||||||
LOG.info("Mounting controller " + controller.getName() + " at " +
|
LOG.info("Mounting controller " + controller.getName() + " at " +
|
||||||
|
|
|
@ -546,7 +546,7 @@ import java.util.regex.Pattern;
|
||||||
case TC_MODIFY_STATE:
|
case TC_MODIFY_STATE:
|
||||||
case TC_READ_STATE:
|
case TC_READ_STATE:
|
||||||
case TC_READ_STATS:
|
case TC_READ_STATS:
|
||||||
operation = new PrivilegedOperation(opType, (String) null);
|
operation = new PrivilegedOperation(opType);
|
||||||
commands = new ArrayList<>();
|
commands = new ArrayList<>();
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -67,7 +67,7 @@ public class DefaultLinuxContainerRuntime implements LinuxContainerRuntime {
|
||||||
throws ContainerExecutionException {
|
throws ContainerExecutionException {
|
||||||
Container container = ctx.getContainer();
|
Container container = ctx.getContainer();
|
||||||
PrivilegedOperation launchOp = new PrivilegedOperation(
|
PrivilegedOperation launchOp = new PrivilegedOperation(
|
||||||
PrivilegedOperation.OperationType.LAUNCH_CONTAINER, (String) null);
|
PrivilegedOperation.OperationType.LAUNCH_CONTAINER);
|
||||||
|
|
||||||
//All of these arguments are expected to be available in the runtime context
|
//All of these arguments are expected to be available in the runtime context
|
||||||
launchOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER),
|
launchOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER),
|
||||||
|
@ -116,7 +116,7 @@ public class DefaultLinuxContainerRuntime implements LinuxContainerRuntime {
|
||||||
throws ContainerExecutionException {
|
throws ContainerExecutionException {
|
||||||
Container container = ctx.getContainer();
|
Container container = ctx.getContainer();
|
||||||
PrivilegedOperation signalOp = new PrivilegedOperation(
|
PrivilegedOperation signalOp = new PrivilegedOperation(
|
||||||
PrivilegedOperation.OperationType.SIGNAL_CONTAINER, (String) null);
|
PrivilegedOperation.OperationType.SIGNAL_CONTAINER);
|
||||||
|
|
||||||
signalOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER),
|
signalOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER),
|
||||||
ctx.getExecutionAttribute(USER),
|
ctx.getExecutionAttribute(USER),
|
||||||
|
@ -125,6 +125,9 @@ public class DefaultLinuxContainerRuntime implements LinuxContainerRuntime {
|
||||||
ctx.getExecutionAttribute(PID),
|
ctx.getExecutionAttribute(PID),
|
||||||
Integer.toString(ctx.getExecutionAttribute(SIGNAL).getValue()));
|
Integer.toString(ctx.getExecutionAttribute(SIGNAL).getValue()));
|
||||||
|
|
||||||
|
//Some failures here are acceptable. Let the calling executor decide.
|
||||||
|
signalOp.disableFailureLogging();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
PrivilegedOperationExecutor executor = PrivilegedOperationExecutor
|
PrivilegedOperationExecutor executor = PrivilegedOperationExecutor
|
||||||
.getInstance(conf);
|
.getInstance(conf);
|
||||||
|
@ -133,8 +136,8 @@ public class DefaultLinuxContainerRuntime implements LinuxContainerRuntime {
|
||||||
signalOp, null, container.getLaunchContext().getEnvironment(),
|
signalOp, null, container.getLaunchContext().getEnvironment(),
|
||||||
false);
|
false);
|
||||||
} catch (PrivilegedOperationException e) {
|
} catch (PrivilegedOperationException e) {
|
||||||
LOG.warn("Signal container failed. Exception: ", e);
|
//Don't log the failure here. Some kinds of signaling failures are
|
||||||
|
// acceptable. Let the calling executor decide what to do.
|
||||||
throw new ContainerExecutionException("Signal container failed", e
|
throw new ContainerExecutionException("Signal container failed", e
|
||||||
.getExitCode(), e.getOutput(), e.getErrorOutput());
|
.getExitCode(), e.getOutput(), e.getErrorOutput());
|
||||||
}
|
}
|
||||||
|
|
|
@ -285,8 +285,7 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
|
||||||
String commandFile = dockerClient.writeCommandToTempFile(runCommand,
|
String commandFile = dockerClient.writeCommandToTempFile(runCommand,
|
||||||
containerIdStr);
|
containerIdStr);
|
||||||
PrivilegedOperation launchOp = new PrivilegedOperation(
|
PrivilegedOperation launchOp = new PrivilegedOperation(
|
||||||
PrivilegedOperation.OperationType.LAUNCH_DOCKER_CONTAINER, (String)
|
PrivilegedOperation.OperationType.LAUNCH_DOCKER_CONTAINER);
|
||||||
null);
|
|
||||||
|
|
||||||
launchOp.appendArgs(runAsUser, ctx.getExecutionAttribute(USER),
|
launchOp.appendArgs(runAsUser, ctx.getExecutionAttribute(USER),
|
||||||
Integer.toString(PrivilegedOperation
|
Integer.toString(PrivilegedOperation
|
||||||
|
@ -326,7 +325,7 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
|
||||||
throws ContainerExecutionException {
|
throws ContainerExecutionException {
|
||||||
Container container = ctx.getContainer();
|
Container container = ctx.getContainer();
|
||||||
PrivilegedOperation signalOp = new PrivilegedOperation(
|
PrivilegedOperation signalOp = new PrivilegedOperation(
|
||||||
PrivilegedOperation.OperationType.SIGNAL_CONTAINER, (String) null);
|
PrivilegedOperation.OperationType.SIGNAL_CONTAINER);
|
||||||
|
|
||||||
signalOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER),
|
signalOp.appendArgs(ctx.getExecutionAttribute(RUN_AS_USER),
|
||||||
ctx.getExecutionAttribute(USER),
|
ctx.getExecutionAttribute(USER),
|
||||||
|
|
|
@ -69,7 +69,7 @@ public class TestPrivilegedOperationExecutor {
|
||||||
cGroupTasks2 = "net_cls/hadoop_yarn/container_01/tasks";
|
cGroupTasks2 = "net_cls/hadoop_yarn/container_01/tasks";
|
||||||
cGroupTasks3 = "blkio/hadoop_yarn/container_01/tasks";
|
cGroupTasks3 = "blkio/hadoop_yarn/container_01/tasks";
|
||||||
opDisallowed = new PrivilegedOperation
|
opDisallowed = new PrivilegedOperation
|
||||||
(PrivilegedOperation.OperationType.DELETE_AS_USER, (String) null);
|
(PrivilegedOperation.OperationType.DELETE_AS_USER);
|
||||||
opTasksNone = new PrivilegedOperation
|
opTasksNone = new PrivilegedOperation
|
||||||
(PrivilegedOperation.OperationType.ADD_PID_TO_CGROUP,
|
(PrivilegedOperation.OperationType.ADD_PID_TO_CGROUP,
|
||||||
PrivilegedOperation.CGROUP_ARG_PREFIX + cGroupTasksNone);
|
PrivilegedOperation.CGROUP_ARG_PREFIX + cGroupTasksNone);
|
||||||
|
@ -118,7 +118,7 @@ public class TestPrivilegedOperationExecutor {
|
||||||
PrivilegedOperationExecutor exec = PrivilegedOperationExecutor
|
PrivilegedOperationExecutor exec = PrivilegedOperationExecutor
|
||||||
.getInstance(confWithExecutorPath);
|
.getInstance(confWithExecutorPath);
|
||||||
PrivilegedOperation op = new PrivilegedOperation(PrivilegedOperation
|
PrivilegedOperation op = new PrivilegedOperation(PrivilegedOperation
|
||||||
.OperationType.TC_MODIFY_STATE, (String) null);
|
.OperationType.TC_MODIFY_STATE);
|
||||||
String[] cmdArray = exec.getPrivilegedOperationExecutionCommand(null, op);
|
String[] cmdArray = exec.getPrivilegedOperationExecutionCommand(null, op);
|
||||||
|
|
||||||
//No arguments added - so the resulting array should consist of
|
//No arguments added - so the resulting array should consist of
|
||||||
|
|
|
@ -89,7 +89,7 @@ public class TestCGroupsHandlerImpl {
|
||||||
cGroupsHandler = new CGroupsHandlerImpl(conf,
|
cGroupsHandler = new CGroupsHandlerImpl(conf,
|
||||||
privilegedOperationExecutorMock);
|
privilegedOperationExecutorMock);
|
||||||
PrivilegedOperation expectedOp = new PrivilegedOperation(
|
PrivilegedOperation expectedOp = new PrivilegedOperation(
|
||||||
PrivilegedOperation.OperationType.MOUNT_CGROUPS, (String) null);
|
PrivilegedOperation.OperationType.MOUNT_CGROUPS);
|
||||||
//This is expected to be of the form :
|
//This is expected to be of the form :
|
||||||
//net_cls=<mount_path>/net_cls
|
//net_cls=<mount_path>/net_cls
|
||||||
StringBuffer controllerKV = new StringBuffer(controller.getName())
|
StringBuffer controllerKV = new StringBuffer(controller.getName())
|
||||||
|
|
Loading…
Reference in New Issue