YARN-493. Fixed some shell related flaws in YARN on Windows. Contributed by Chris Nauroth.

HADOOP-9486. Promoted Windows and Shell related utils from YARN to Hadoop Common. Contributed by Chris Nauroth.


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1469667 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Vinod Kumar Vavilapalli 2013-04-19 02:14:58 +00:00
parent 8e1c2823fc
commit 44bf8525a5
11 changed files with 320 additions and 201 deletions

View File

@ -163,6 +163,9 @@ Trunk (Unreleased)
HADOOP-9258 Add stricter tests to FileSystemContractTestBase (stevel)
HADOOP-9486. Promoted Windows and Shell related utils from YARN to Hadoop
Common. (Chris Nauroth via vinodkv)
BUG FIXES
HADOOP-9451. Fault single-layer config if node group topology is enabled.

View File

@ -123,6 +123,56 @@ abstract public class Shell {
: new String[] { "ln", "-s", target, link };
}
/** Return a command for determining if process with specified pid is alive. */
public static String[] getCheckProcessIsAliveCommand(String pid) {
return Shell.WINDOWS ?
new String[] { Shell.WINUTILS, "task", "isAlive", pid } :
new String[] { "kill", "-0", isSetsidAvailable ? "-" + pid : pid };
}
/** Return a command to send a signal to a given pid */
public static String[] getSignalKillCommand(int code, String pid) {
return Shell.WINDOWS ? new String[] { Shell.WINUTILS, "task", "kill", pid } :
new String[] { "kill", "-" + code, isSetsidAvailable ? "-" + pid : pid };
}
/**
* Returns a File referencing a script with the given basename, inside the
* given parent directory. The file extension is inferred by platform: ".cmd"
* on Windows, or ".sh" otherwise.
*
* @param parent File parent directory
* @param basename String script file basename
* @return File referencing the script in the directory
*/
public static File appendScriptExtension(File parent, String basename) {
return new File(parent, appendScriptExtension(basename));
}
/**
* Returns a script file name with the given basename. The file extension is
* inferred by platform: ".cmd" on Windows, or ".sh" otherwise.
*
* @param basename String script file basename
* @return String script file name
*/
public static String appendScriptExtension(String basename) {
return basename + (WINDOWS ? ".cmd" : ".sh");
}
/**
* Returns a command to run the given script. The script interpreter is
* inferred by platform: cmd on Windows or bash otherwise.
*
* @param script File script to run
* @return String[] command to run the script
*/
public static String[] getRunScriptCommand(File script) {
String absolutePath = script.getAbsolutePath();
return WINDOWS ? new String[] { "cmd", "/c", absolutePath } :
new String[] { "/bin/bash", absolutePath };
}
/** a Unix command to set permission */
public static final String SET_PERMISSION_COMMAND = "chmod";
/** a Unix command to set owner */
@ -243,6 +293,26 @@ abstract public class Shell {
return winUtilsPath;
}
public static final boolean isSetsidAvailable = isSetsidSupported();
private static boolean isSetsidSupported() {
if (Shell.WINDOWS) {
return false;
}
ShellCommandExecutor shexec = null;
boolean setsidSupported = true;
try {
String[] args = {"setsid", "bash", "-c", "echo $$"};
shexec = new ShellCommandExecutor(args);
shexec.execute();
} catch (IOException ioe) {
LOG.warn("setsid is not available on this machine. So not using it.");
setsidSupported = false;
} finally { // handle the exit code
LOG.info("setsid exited with exit code " + shexec.getExitCode());
}
return setsidSupported;
}
/** Token separator regex used to parse Shell tool outputs */
public static final String TOKEN_SEPARATOR_REGEX
= WINDOWS ? "[|\n\r]" : "[ \t\n\r\f]";

View File

@ -24,6 +24,10 @@
#define ERROR_TASK_NOT_ALIVE 1
// This exit code for killed processes is compatible with Unix, where a killed
// process exits with 128 + signal. For SIGKILL, this would be 128 + 9 = 137.
#define KILLED_PROCESS_EXIT_CODE 137
// List of different task related command line options supported by
// winutils.
typedef enum TaskCommandOptionType
@ -264,7 +268,7 @@ DWORD killTask(_TCHAR* jobObjName)
return err;
}
if(TerminateJobObject(jobObject, 1) == 0)
if(TerminateJobObject(jobObject, KILLED_PROCESS_EXIT_CODE) == 0)
{
return GetLastError();
}

View File

@ -34,6 +34,9 @@ Trunk - Unreleased
YARN-487. Modify path manipulation in LocalDirsHandlerService to let
TestDiskFailures pass on Windows. (Chris Nauroth via vinodkv)
YARN-493. Fixed some shell related flaws in YARN on Windows. (Chris Nauroth
via vinodkv)
BREAKDOWN OF HADOOP-8562 SUBTASKS
YARN-158. Yarn creating package-info.java must not depend on sh.

View File

@ -222,19 +222,6 @@ public abstract class ContainerExecutor implements Configurable {
}
/** Return a command for determining if process with specified pid is alive. */
protected static String[] getCheckProcessIsAliveCommand(String pid) {
return Shell.WINDOWS ?
new String[] { Shell.WINUTILS, "task", "isAlive", pid } :
new String[] { "kill", "-0", pid };
}
/** Return a command to send a signal to a given pid */
protected static String[] getSignalKillCommand(int code, String pid) {
return Shell.WINDOWS ? new String[] { Shell.WINUTILS, "task", "kill", pid } :
new String[] { "kill", "-" + code, pid };
}
/**
* Is the container still active?
* @param containerId
@ -303,26 +290,6 @@ public abstract class ContainerExecutor implements Configurable {
return pid;
}
public static final boolean isSetsidAvailable = isSetsidSupported();
private static boolean isSetsidSupported() {
if (Shell.WINDOWS) {
return true;
}
ShellCommandExecutor shexec = null;
boolean setsidSupported = true;
try {
String[] args = {"setsid", "bash", "-c", "echo $$"};
shexec = new ShellCommandExecutor(args);
shexec.execute();
} catch (IOException ioe) {
LOG.warn("setsid is not available on this machine. So not using it.");
setsidSupported = false;
} finally { // handle the exit code
LOG.info("setsid exited with exit code " + shexec.getExitCode());
}
return setsidSupported;
}
public static class DelayedProcessKiller extends Thread {
private final String user;
private final String pid;

View File

@ -50,6 +50,8 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.Conta
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer;
import org.apache.hadoop.yarn.util.ConverterUtils;
import com.google.common.annotations.VisibleForTesting;
public class DefaultContainerExecutor extends ContainerExecutor {
private static final Log LOG = LogFactory
@ -237,8 +239,9 @@ public class DefaultContainerExecutor extends ContainerExecutor {
protected abstract void writeLocalWrapperScript(Path launchDst, Path pidFile,
PrintStream pout);
protected LocalWrapperScriptBuilder(Path wrapperScriptPath) {
this.wrapperScriptPath = wrapperScriptPath;
protected LocalWrapperScriptBuilder(Path containerWorkDir) {
this.wrapperScriptPath = new Path(containerWorkDir,
Shell.appendScriptExtension("default_container_executor"));
}
}
@ -246,7 +249,7 @@ public class DefaultContainerExecutor extends ContainerExecutor {
extends LocalWrapperScriptBuilder {
public UnixLocalWrapperScriptBuilder(Path containerWorkDir) {
super(new Path(containerWorkDir, "default_container_executor.sh"));
super(containerWorkDir);
}
@Override
@ -260,7 +263,7 @@ public class DefaultContainerExecutor extends ContainerExecutor {
pout.println();
pout.println("echo $$ > " + pidFile.toString() + ".tmp");
pout.println("/bin/mv -f " + pidFile.toString() + ".tmp " + pidFile);
String exec = ContainerExecutor.isSetsidAvailable? "exec setsid" : "exec";
String exec = Shell.isSetsidAvailable? "exec setsid" : "exec";
pout.println(exec + " /bin/bash -c \"" +
launchDst.toUri().getPath().toString() + "\"");
}
@ -274,7 +277,7 @@ public class DefaultContainerExecutor extends ContainerExecutor {
public WindowsLocalWrapperScriptBuilder(String containerIdStr,
Path containerWorkDir) {
super(new Path(containerWorkDir, "default_container_executor.cmd"));
super(containerWorkDir);
this.containerIdStr = containerIdStr;
}
@ -297,18 +300,15 @@ public class DefaultContainerExecutor extends ContainerExecutor {
@Override
public boolean signalContainer(String user, String pid, Signal signal)
throws IOException {
final String sigpid = ContainerExecutor.isSetsidAvailable
? "-" + pid
: pid;
LOG.debug("Sending signal " + signal.getValue() + " to pid " + sigpid
LOG.debug("Sending signal " + signal.getValue() + " to pid " + pid
+ " as user " + user);
if (!containerIsAlive(sigpid)) {
if (!containerIsAlive(pid)) {
return false;
}
try {
killContainer(sigpid, signal);
killContainer(pid, signal);
} catch (IOException e) {
if (!containerIsAlive(sigpid)) {
if (!containerIsAlive(pid)) {
return false;
}
throw e;
@ -322,9 +322,11 @@ public class DefaultContainerExecutor extends ContainerExecutor {
* @param pid String pid
* @return boolean true if the process is alive
*/
private boolean containerIsAlive(String pid) throws IOException {
@VisibleForTesting
public static boolean containerIsAlive(String pid) throws IOException {
try {
new ShellCommandExecutor(getCheckProcessIsAliveCommand(pid)).execute();
new ShellCommandExecutor(Shell.getCheckProcessIsAliveCommand(pid))
.execute();
// successful execution means process is alive
return true;
}
@ -342,7 +344,7 @@ public class DefaultContainerExecutor extends ContainerExecutor {
* (for logging).
*/
private void killContainer(String pid, Signal signal) throws IOException {
new ShellCommandExecutor(getSignalKillCommand(signal.getValue(), pid))
new ShellCommandExecutor(Shell.getSignalKillCommand(signal.getValue(), pid))
.execute();
}

View File

@ -72,8 +72,8 @@ public class ContainerLaunch implements Callable<Integer> {
private static final Log LOG = LogFactory.getLog(ContainerLaunch.class);
public static final String CONTAINER_SCRIPT = Shell.WINDOWS ?
"launch_container.cmd" : "launch_container.sh";
public static final String CONTAINER_SCRIPT =
Shell.appendScriptExtension("launch_container");
public static final String FINAL_CONTAINER_TOKENS_FILE = "container_tokens";
private static final String PID_FILE_NAME_FMT = "%s.pid";

View File

@ -22,12 +22,13 @@ import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -40,6 +41,7 @@ import junit.framework.Assert;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.UnsupportedFileSystemException;
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
@ -59,6 +61,7 @@ import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
import org.apache.hadoop.yarn.util.BuilderUtils;
@ -81,6 +84,7 @@ public class TestNodeManagerShutdown {
.getRecordFactory(null);
static final String user = "nobody";
private FileContext localFS;
private ContainerId cId;
private CyclicBarrier syncBarrier = new CyclicBarrier(2);
@Before
@ -90,6 +94,9 @@ public class TestNodeManagerShutdown {
logsDir.mkdirs();
remoteLogsDir.mkdirs();
nmLocalDir.mkdirs();
// Construct the Container-id
cId = createContainerId();
}
@After
@ -115,9 +122,15 @@ public class TestNodeManagerShutdown {
nm.stop();
// Now verify the contents of the file
// Script generates a message when it receives a sigterm
// so we look for that
// Now verify the contents of the file. Script generates a message when it
// receives a sigterm so we look for that. We cannot perform this check on
// Windows, because the process is not notified when killed by winutils.
// There is no way for the process to trap and respond. Instead, we can
// verify that the job object with ID matching container ID no longer exists.
if (Shell.WINDOWS) {
Assert.assertFalse("Process is still alive!",
DefaultContainerExecutor.containerIsAlive(cId.toString()));
} else {
BufferedReader reader =
new BufferedReader(new FileReader(processStartFile));
@ -135,6 +148,7 @@ public class TestNodeManagerShutdown {
Assert.assertTrue("Did not find sigterm message", foundSigTermMessage);
reader.close();
}
}
@SuppressWarnings("unchecked")
@Test
@ -162,8 +176,6 @@ public class TestNodeManagerShutdown {
ContainerLaunchContext containerLaunchContext =
recordFactory.newRecordInstance(ContainerLaunchContext.class);
Container mockContainer = mock(Container.class);
// Construct the Container-id
ContainerId cId = createContainerId();
when(mockContainer.getId()).thenReturn(cId);
containerLaunchContext.setUser(user);
@ -184,9 +196,7 @@ public class TestNodeManagerShutdown {
localResources.put(destinationFile, localResource);
containerLaunchContext.setLocalResources(localResources);
containerLaunchContext.setUser(containerLaunchContext.getUser());
List<String> commands = new ArrayList<String>();
commands.add("/bin/bash");
commands.add(scriptFile.getAbsolutePath());
List<String> commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile));
containerLaunchContext.setCommands(commands);
Resource resource = BuilderUtils.newResource(1024, 1);
when(mockContainer.getResource()).thenReturn(resource);
@ -234,16 +244,24 @@ public class TestNodeManagerShutdown {
* stopped by external means.
*/
private File createUnhaltingScriptFile() throws IOException {
File scriptFile = new File(tmpDir, "scriptFile.sh");
BufferedWriter fileWriter = new BufferedWriter(new FileWriter(scriptFile));
File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile");
PrintWriter fileWriter = new PrintWriter(scriptFile);
if (Shell.WINDOWS) {
fileWriter.println("@echo \"Running testscript for delayed kill\"");
fileWriter.println("@echo \"Writing pid to start file\"");
fileWriter.println("@echo " + cId + ">> " + processStartFile);
fileWriter.println("@pause");
} else {
fileWriter.write("#!/bin/bash\n\n");
fileWriter.write("echo \"Running testscript for delayed kill\"\n");
fileWriter.write("hello=\"Got SIGTERM\"\n");
fileWriter.write("umask 0\n");
fileWriter.write("trap \"echo $hello >> " + processStartFile + "\" SIGTERM\n");
fileWriter.write("trap \"echo $hello >> " + processStartFile +
"\" SIGTERM\n");
fileWriter.write("echo \"Writing pid to start file\"\n");
fileWriter.write("echo $$ >> " + processStartFile + "\n");
fileWriter.write("while true; do\ndate >> /dev/null;\n done\n");
}
fileWriter.close();
return scriptFile;

View File

@ -76,15 +76,15 @@ public abstract class BaseContainerManagerTest {
public BaseContainerManagerTest() throws UnsupportedFileSystemException {
localFS = FileContext.getLocalFSFileContext();
localDir =
new File("target", this.getClass().getName() + "-localDir")
new File("target", this.getClass().getSimpleName() + "-localDir")
.getAbsoluteFile();
localLogDir =
new File("target", this.getClass().getName() + "-localLogDir")
new File("target", this.getClass().getSimpleName() + "-localLogDir")
.getAbsoluteFile();
remoteLogDir =
new File("target", this.getClass().getName() + "-remoteLogDir")
new File("target", this.getClass().getSimpleName() + "-remoteLogDir")
.getAbsoluteFile();
tmpDir = new File("target", this.getClass().getName() + "-tmpDir");
tmpDir = new File("target", this.getClass().getSimpleName() + "-tmpDir");
}
protected static Log LOG = LogFactory

View File

@ -35,6 +35,7 @@ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.UnsupportedFileSystemException;
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
import org.apache.hadoop.yarn.api.protocolrecords.StopContainerRequest;
@ -53,6 +54,7 @@ import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
import org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal;
import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor;
import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer;
@ -196,22 +198,29 @@ public class TestContainerManager extends BaseContainerManagerTest {
InterruptedException {
containerManager.start();
File scriptFile = new File(tmpDir, "scriptFile.sh");
File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile");
PrintWriter fileWriter = new PrintWriter(scriptFile);
File processStartFile =
new File(tmpDir, "start_file.txt").getAbsoluteFile();
// ////// Construct the Container-id
ContainerId cId = createContainerId();
if (Shell.WINDOWS) {
fileWriter.println("@echo Hello World!> " + processStartFile);
fileWriter.println("@echo " + cId + ">> " + processStartFile);
fileWriter.println("@ping -n 100 127.0.0.1 >nul");
} else {
fileWriter.write("\numask 0"); // So that start file is readable by the test
fileWriter.write("\necho Hello World! > " + processStartFile);
fileWriter.write("\necho $$ >> " + processStartFile);
fileWriter.write("\nexec sleep 100");
}
fileWriter.close();
ContainerLaunchContext containerLaunchContext =
recordFactory.newRecordInstance(ContainerLaunchContext.class);
// ////// Construct the Container-id
ContainerId cId = createContainerId();
containerLaunchContext.setUser(user);
URL resource_alpha =
@ -230,14 +239,12 @@ public class TestContainerManager extends BaseContainerManagerTest {
localResources.put(destinationFile, rsrc_alpha);
containerLaunchContext.setLocalResources(localResources);
containerLaunchContext.setUser(containerLaunchContext.getUser());
List<String> commands = new ArrayList<String>();
commands.add("/bin/bash");
commands.add(scriptFile.getAbsolutePath());
List<String> commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile));
containerLaunchContext.setCommands(commands);
Container mockContainer = mock(Container.class);
when(mockContainer.getId()).thenReturn(cId);
when(mockContainer.getResource()).thenReturn(
BuilderUtils.newResource(100 * 1024 * 1024, 1));
BuilderUtils.newResource(100, 1)); // MB
StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class);
startRequest.setContainerLaunchContext(containerLaunchContext);
startRequest.setContainer(mockContainer);
@ -264,12 +271,10 @@ public class TestContainerManager extends BaseContainerManagerTest {
// Assert that the process is alive
Assert.assertTrue("Process is not alive!",
exec.signalContainer(user,
pid, Signal.NULL));
DefaultContainerExecutor.containerIsAlive(pid));
// Once more
Assert.assertTrue("Process is not alive!",
exec.signalContainer(user,
pid, Signal.NULL));
DefaultContainerExecutor.containerIsAlive(pid));
StopContainerRequest stopRequest = recordFactory.newRecordInstance(StopContainerRequest.class);
stopRequest.setContainerId(cId);
@ -283,38 +288,46 @@ public class TestContainerManager extends BaseContainerManagerTest {
gcsRequest.setContainerId(cId);
ContainerStatus containerStatus =
containerManager.getContainerStatus(gcsRequest).getStatus();
Assert.assertEquals(ExitCode.TERMINATED.getExitCode(),
containerStatus.getExitStatus());
int expectedExitCode = Shell.WINDOWS ? ExitCode.FORCE_KILLED.getExitCode() :
ExitCode.TERMINATED.getExitCode();
Assert.assertEquals(expectedExitCode, containerStatus.getExitStatus());
// Assert that the process is not alive anymore
Assert.assertFalse("Process is still alive!",
exec.signalContainer(user,
pid, Signal.NULL));
DefaultContainerExecutor.containerIsAlive(pid));
}
private void testContainerLaunchAndExit(int exitCode) throws IOException, InterruptedException {
File scriptFile = new File(tmpDir, "scriptFile.sh");
File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile");
PrintWriter fileWriter = new PrintWriter(scriptFile);
File processStartFile =
new File(tmpDir, "start_file.txt").getAbsoluteFile();
// ////// Construct the Container-id
ContainerId cId = createContainerId();
if (Shell.WINDOWS) {
fileWriter.println("@echo Hello World!> " + processStartFile);
fileWriter.println("@echo " + cId + ">> " + processStartFile);
if (exitCode != 0) {
fileWriter.println("@exit " + exitCode);
}
} else {
fileWriter.write("\numask 0"); // So that start file is readable by the test
fileWriter.write("\necho Hello World! > " + processStartFile);
fileWriter.write("\necho $$ >> " + processStartFile);
// Have script throw an exit code at the end
if (exitCode != 0) {
fileWriter.write("\nexit "+exitCode);
}
}
fileWriter.close();
ContainerLaunchContext containerLaunchContext =
recordFactory.newRecordInstance(ContainerLaunchContext.class);
// ////// Construct the Container-id
ContainerId cId = createContainerId();
containerLaunchContext.setUser(user);
URL resource_alpha =
@ -333,14 +346,12 @@ public class TestContainerManager extends BaseContainerManagerTest {
localResources.put(destinationFile, rsrc_alpha);
containerLaunchContext.setLocalResources(localResources);
containerLaunchContext.setUser(containerLaunchContext.getUser());
List<String> commands = new ArrayList<String>();
commands.add("/bin/bash");
commands.add(scriptFile.getAbsolutePath());
List<String> commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile));
containerLaunchContext.setCommands(commands);
Container mockContainer = mock(Container.class);
when(mockContainer.getId()).thenReturn(cId);
when(mockContainer.getResource()).thenReturn(
BuilderUtils.newResource(100 * 1024 * 1024, 1));
BuilderUtils.newResource(100, 1)); // MB
StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class);
startRequest.setContainerLaunchContext(containerLaunchContext);

View File

@ -56,6 +56,7 @@ import org.apache.hadoop.yarn.api.records.URL;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal;
import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch;
import org.apache.hadoop.yarn.util.BuilderUtils;
@ -88,13 +89,15 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
File shellFile = null;
File tempFile = null;
String badSymlink = "foo@zz%_#*&!-+= bar()";
String badSymlink = Shell.WINDOWS ? "foo@zz_#!-+bar.cmd" :
"foo@zz%_#*&!-+= bar()";
File symLinkFile = null;
try {
shellFile = new File(tmpDir, "hello.sh");
tempFile = new File(tmpDir, "temp.sh");
String timeoutCommand = "echo \"hello\"";
shellFile = Shell.appendScriptExtension(tmpDir, "hello");
tempFile = Shell.appendScriptExtension(tmpDir, "temp");
String timeoutCommand = Shell.WINDOWS ? "@echo \"hello\"" :
"echo \"hello\"";
PrintWriter writer = new PrintWriter(new FileOutputStream(shellFile));
shellFile.setExecutable(true);
writer.println(timeoutCommand);
@ -109,7 +112,13 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
Map<String, String> env = new HashMap<String, String>();
List<String> commands = new ArrayList<String>();
if (Shell.WINDOWS) {
commands.add("cmd");
commands.add("/c");
commands.add("\"" + badSymlink + "\"");
} else {
commands.add("/bin/sh ./\\\"" + badSymlink + "\\\"");
}
ContainerLaunch.writeLaunchEnv(fos, env, resources, commands);
fos.flush();
@ -145,6 +154,19 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
// this is a dirty hack - but should be ok for a unittest.
@SuppressWarnings({ "rawtypes", "unchecked" })
public static void setNewEnvironmentHack(Map<String, String> newenv) throws Exception {
try {
Class<?> cl = Class.forName("java.lang.ProcessEnvironment");
Field field = cl.getDeclaredField("theEnvironment");
field.setAccessible(true);
Map<String, String> env = (Map<String, String>)field.get(null);
env.clear();
env.putAll(newenv);
Field ciField = cl.getDeclaredField("theCaseInsensitiveEnvironment");
ciField.setAccessible(true);
Map<String, String> cienv = (Map<String, String>)ciField.get(null);
cienv.clear();
cienv.putAll(newenv);
} catch (NoSuchFieldException e) {
Class[] classes = Collections.class.getDeclaredClasses();
Map<String, String> env = System.getenv();
for (Class cl : classes) {
@ -158,6 +180,7 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
}
}
}
}
/**
* See if environment variable is forwarded using sanitizeEnv.
@ -172,22 +195,6 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
envWithDummy.put(Environment.MALLOC_ARENA_MAX.name(), "99");
setNewEnvironmentHack(envWithDummy);
String malloc = System.getenv(Environment.MALLOC_ARENA_MAX.name());
File scriptFile = new File(tmpDir, "scriptFile.sh");
PrintWriter fileWriter = new PrintWriter(scriptFile);
File processStartFile =
new File(tmpDir, "env_vars.txt").getAbsoluteFile();
fileWriter.write("\numask 0"); // So that start file is readable by the test
fileWriter.write("\necho $" + Environment.MALLOC_ARENA_MAX.name() + " > " + processStartFile);
fileWriter.write("\necho $$ >> " + processStartFile);
fileWriter.write("\nexec sleep 100");
fileWriter.close();
assert(malloc != null && !"".equals(malloc));
ContainerLaunchContext containerLaunchContext =
recordFactory.newRecordInstance(ContainerLaunchContext.class);
Container mockContainer = mock(Container.class);
// ////// Construct the Container-id
ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class);
@ -200,6 +207,30 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
ContainerId cId =
recordFactory.newRecordInstance(ContainerId.class);
cId.setApplicationAttemptId(appAttemptId);
String malloc = System.getenv(Environment.MALLOC_ARENA_MAX.name());
File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile");
PrintWriter fileWriter = new PrintWriter(scriptFile);
File processStartFile =
new File(tmpDir, "env_vars.txt").getAbsoluteFile();
if (Shell.WINDOWS) {
fileWriter.println("@echo " + Environment.MALLOC_ARENA_MAX.$() + "> " +
processStartFile);
fileWriter.println("@echo " + cId + ">> " + processStartFile);
fileWriter.println("@ping -n 100 127.0.0.1 >nul");
} else {
fileWriter.write("\numask 0"); // So that start file is readable by the test
fileWriter.write("\necho " + Environment.MALLOC_ARENA_MAX.$() + " > " +
processStartFile);
fileWriter.write("\necho $$ >> " + processStartFile);
fileWriter.write("\nexec sleep 100");
}
fileWriter.close();
assert(malloc != null && !"".equals(malloc));
ContainerLaunchContext containerLaunchContext =
recordFactory.newRecordInstance(ContainerLaunchContext.class);
when(mockContainer.getId()).thenReturn(cId);
containerLaunchContext.setUser(user);
@ -223,9 +254,7 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
// set up the rest of the container
containerLaunchContext.setUser(containerLaunchContext.getUser());
List<String> commands = new ArrayList<String>();
commands.add("/bin/bash");
commands.add(scriptFile.getAbsolutePath());
List<String> commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile));
containerLaunchContext.setCommands(commands);
when(mockContainer.getResource()).thenReturn(
BuilderUtils.newResource(1024, 1));
@ -255,12 +284,10 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
// Assert that the process is alive
Assert.assertTrue("Process is not alive!",
exec.signalContainer(user,
pid, Signal.NULL));
DefaultContainerExecutor.containerIsAlive(pid));
// Once more
Assert.assertTrue("Process is not alive!",
exec.signalContainer(user,
pid, Signal.NULL));
DefaultContainerExecutor.containerIsAlive(pid));
StopContainerRequest stopRequest = recordFactory.newRecordInstance(StopContainerRequest.class);
stopRequest.setContainerId(cId);
@ -274,38 +301,19 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
gcsRequest.setContainerId(cId);
ContainerStatus containerStatus =
containerManager.getContainerStatus(gcsRequest).getStatus();
Assert.assertEquals(ExitCode.TERMINATED.getExitCode(),
containerStatus.getExitStatus());
int expectedExitCode = Shell.WINDOWS ? ExitCode.FORCE_KILLED.getExitCode() :
ExitCode.TERMINATED.getExitCode();
Assert.assertEquals(expectedExitCode, containerStatus.getExitStatus());
// Assert that the process is not alive anymore
Assert.assertFalse("Process is still alive!",
exec.signalContainer(user,
pid, Signal.NULL));
DefaultContainerExecutor.containerIsAlive(pid));
}
@Test
public void testDelayedKill() throws Exception {
containerManager.start();
File processStartFile =
new File(tmpDir, "pid.txt").getAbsoluteFile();
// setup a script that can handle sigterm gracefully
File scriptFile = new File(tmpDir, "testscript.sh");
PrintWriter writer = new PrintWriter(new FileOutputStream(scriptFile));
writer.println("#!/bin/bash\n\n");
writer.println("echo \"Running testscript for delayed kill\"");
writer.println("hello=\"Got SIGTERM\"");
writer.println("umask 0");
writer.println("trap \"echo $hello >> " + processStartFile + "\" SIGTERM");
writer.println("echo \"Writing pid to start file\"");
writer.println("echo $$ >> " + processStartFile);
writer.println("while true; do\nsleep 1s;\ndone");
writer.close();
scriptFile.setExecutable(true);
ContainerLaunchContext containerLaunchContext =
recordFactory.newRecordInstance(ContainerLaunchContext.class);
Container mockContainer = mock(Container.class);
// ////// Construct the Container-id
ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class);
@ -318,6 +326,33 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
ContainerId cId =
recordFactory.newRecordInstance(ContainerId.class);
cId.setApplicationAttemptId(appAttemptId);
File processStartFile =
new File(tmpDir, "pid.txt").getAbsoluteFile();
// setup a script that can handle sigterm gracefully
File scriptFile = Shell.appendScriptExtension(tmpDir, "testscript");
PrintWriter writer = new PrintWriter(new FileOutputStream(scriptFile));
if (Shell.WINDOWS) {
writer.println("@echo \"Running testscript for delayed kill\"");
writer.println("@echo \"Writing pid to start file\"");
writer.println("@echo " + cId + "> " + processStartFile);
writer.println("@ping -n 100 127.0.0.1 >nul");
} else {
writer.println("#!/bin/bash\n\n");
writer.println("echo \"Running testscript for delayed kill\"");
writer.println("hello=\"Got SIGTERM\"");
writer.println("umask 0");
writer.println("trap \"echo $hello >> " + processStartFile + "\" SIGTERM");
writer.println("echo \"Writing pid to start file\"");
writer.println("echo $$ >> " + processStartFile);
writer.println("while true; do\nsleep 1s;\ndone");
}
writer.close();
scriptFile.setExecutable(true);
ContainerLaunchContext containerLaunchContext =
recordFactory.newRecordInstance(ContainerLaunchContext.class);
when(mockContainer.getId()).thenReturn(cId);
containerLaunchContext.setUser(user);
@ -341,8 +376,7 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
// set up the rest of the container
containerLaunchContext.setUser(containerLaunchContext.getUser());
List<String> commands = new ArrayList<String>();
commands.add(scriptFile.getAbsolutePath());
List<String> commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile));
containerLaunchContext.setCommands(commands);
when(mockContainer.getResource()).thenReturn(
BuilderUtils.newResource(1024, 1));
@ -376,9 +410,15 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
Assert.assertEquals(ExitCode.FORCE_KILLED.getExitCode(),
containerStatus.getExitStatus());
// Now verify the contents of the file
// Script generates a message when it receives a sigterm
// so we look for that
// Now verify the contents of the file. Script generates a message when it
// receives a sigterm so we look for that. We cannot perform this check on
// Windows, because the process is not notified when killed by winutils.
// There is no way for the process to trap and respond. Instead, we can
// verify that the job object with ID matching container ID no longer exists.
if (Shell.WINDOWS) {
Assert.assertFalse("Process is still alive!",
DefaultContainerExecutor.containerIsAlive(cId.toString()));
} else {
BufferedReader reader =
new BufferedReader(new FileReader(processStartFile));
@ -396,5 +436,6 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
Assert.assertTrue("Did not find sigterm message", foundSigTermMessage);
reader.close();
}
}
}