YARN-493. Fixed some shell related flaws in YARN on Windows. Contributed by Chris Nauroth.
HADOOP-9486. Promoted Windows and Shell related utils from YARN to Hadoop Common. Contributed by Chris Nauroth. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1469667 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8e1c2823fc
commit
44bf8525a5
|
@ -163,6 +163,9 @@ Trunk (Unreleased)
|
|||
|
||||
HADOOP-9258 Add stricter tests to FileSystemContractTestBase (stevel)
|
||||
|
||||
HADOOP-9486. Promoted Windows and Shell related utils from YARN to Hadoop
|
||||
Common. (Chris Nauroth via vinodkv)
|
||||
|
||||
BUG FIXES
|
||||
|
||||
HADOOP-9451. Fault single-layer config if node group topology is enabled.
|
||||
|
|
|
@ -123,6 +123,56 @@ abstract public class Shell {
|
|||
: new String[] { "ln", "-s", target, link };
|
||||
}
|
||||
|
||||
/** Return a command for determining if process with specified pid is alive. */
|
||||
public static String[] getCheckProcessIsAliveCommand(String pid) {
|
||||
return Shell.WINDOWS ?
|
||||
new String[] { Shell.WINUTILS, "task", "isAlive", pid } :
|
||||
new String[] { "kill", "-0", isSetsidAvailable ? "-" + pid : pid };
|
||||
}
|
||||
|
||||
/** Return a command to send a signal to a given pid */
|
||||
public static String[] getSignalKillCommand(int code, String pid) {
|
||||
return Shell.WINDOWS ? new String[] { Shell.WINUTILS, "task", "kill", pid } :
|
||||
new String[] { "kill", "-" + code, isSetsidAvailable ? "-" + pid : pid };
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a File referencing a script with the given basename, inside the
|
||||
* given parent directory. The file extension is inferred by platform: ".cmd"
|
||||
* on Windows, or ".sh" otherwise.
|
||||
*
|
||||
* @param parent File parent directory
|
||||
* @param basename String script file basename
|
||||
* @return File referencing the script in the directory
|
||||
*/
|
||||
public static File appendScriptExtension(File parent, String basename) {
|
||||
return new File(parent, appendScriptExtension(basename));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a script file name with the given basename. The file extension is
|
||||
* inferred by platform: ".cmd" on Windows, or ".sh" otherwise.
|
||||
*
|
||||
* @param basename String script file basename
|
||||
* @return String script file name
|
||||
*/
|
||||
public static String appendScriptExtension(String basename) {
|
||||
return basename + (WINDOWS ? ".cmd" : ".sh");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a command to run the given script. The script interpreter is
|
||||
* inferred by platform: cmd on Windows or bash otherwise.
|
||||
*
|
||||
* @param script File script to run
|
||||
* @return String[] command to run the script
|
||||
*/
|
||||
public static String[] getRunScriptCommand(File script) {
|
||||
String absolutePath = script.getAbsolutePath();
|
||||
return WINDOWS ? new String[] { "cmd", "/c", absolutePath } :
|
||||
new String[] { "/bin/bash", absolutePath };
|
||||
}
|
||||
|
||||
/** a Unix command to set permission */
|
||||
public static final String SET_PERMISSION_COMMAND = "chmod";
|
||||
/** a Unix command to set owner */
|
||||
|
@ -243,6 +293,26 @@ abstract public class Shell {
|
|||
return winUtilsPath;
|
||||
}
|
||||
|
||||
public static final boolean isSetsidAvailable = isSetsidSupported();
|
||||
private static boolean isSetsidSupported() {
|
||||
if (Shell.WINDOWS) {
|
||||
return false;
|
||||
}
|
||||
ShellCommandExecutor shexec = null;
|
||||
boolean setsidSupported = true;
|
||||
try {
|
||||
String[] args = {"setsid", "bash", "-c", "echo $$"};
|
||||
shexec = new ShellCommandExecutor(args);
|
||||
shexec.execute();
|
||||
} catch (IOException ioe) {
|
||||
LOG.warn("setsid is not available on this machine. So not using it.");
|
||||
setsidSupported = false;
|
||||
} finally { // handle the exit code
|
||||
LOG.info("setsid exited with exit code " + shexec.getExitCode());
|
||||
}
|
||||
return setsidSupported;
|
||||
}
|
||||
|
||||
/** Token separator regex used to parse Shell tool outputs */
|
||||
public static final String TOKEN_SEPARATOR_REGEX
|
||||
= WINDOWS ? "[|\n\r]" : "[ \t\n\r\f]";
|
||||
|
|
|
@ -24,6 +24,10 @@
|
|||
|
||||
#define ERROR_TASK_NOT_ALIVE 1
|
||||
|
||||
// This exit code for killed processes is compatible with Unix, where a killed
|
||||
// process exits with 128 + signal. For SIGKILL, this would be 128 + 9 = 137.
|
||||
#define KILLED_PROCESS_EXIT_CODE 137
|
||||
|
||||
// List of different task related command line options supported by
|
||||
// winutils.
|
||||
typedef enum TaskCommandOptionType
|
||||
|
@ -264,7 +268,7 @@ DWORD killTask(_TCHAR* jobObjName)
|
|||
return err;
|
||||
}
|
||||
|
||||
if(TerminateJobObject(jobObject, 1) == 0)
|
||||
if(TerminateJobObject(jobObject, KILLED_PROCESS_EXIT_CODE) == 0)
|
||||
{
|
||||
return GetLastError();
|
||||
}
|
||||
|
|
|
@ -34,6 +34,9 @@ Trunk - Unreleased
|
|||
YARN-487. Modify path manipulation in LocalDirsHandlerService to let
|
||||
TestDiskFailures pass on Windows. (Chris Nauroth via vinodkv)
|
||||
|
||||
YARN-493. Fixed some shell related flaws in YARN on Windows. (Chris Nauroth
|
||||
via vinodkv)
|
||||
|
||||
BREAKDOWN OF HADOOP-8562 SUBTASKS
|
||||
|
||||
YARN-158. Yarn creating package-info.java must not depend on sh.
|
||||
|
|
|
@ -222,19 +222,6 @@ public abstract class ContainerExecutor implements Configurable {
|
|||
|
||||
}
|
||||
|
||||
/** Return a command for determining if process with specified pid is alive. */
|
||||
protected static String[] getCheckProcessIsAliveCommand(String pid) {
|
||||
return Shell.WINDOWS ?
|
||||
new String[] { Shell.WINUTILS, "task", "isAlive", pid } :
|
||||
new String[] { "kill", "-0", pid };
|
||||
}
|
||||
|
||||
/** Return a command to send a signal to a given pid */
|
||||
protected static String[] getSignalKillCommand(int code, String pid) {
|
||||
return Shell.WINDOWS ? new String[] { Shell.WINUTILS, "task", "kill", pid } :
|
||||
new String[] { "kill", "-" + code, pid };
|
||||
}
|
||||
|
||||
/**
|
||||
* Is the container still active?
|
||||
* @param containerId
|
||||
|
@ -303,26 +290,6 @@ public abstract class ContainerExecutor implements Configurable {
|
|||
return pid;
|
||||
}
|
||||
|
||||
public static final boolean isSetsidAvailable = isSetsidSupported();
|
||||
private static boolean isSetsidSupported() {
|
||||
if (Shell.WINDOWS) {
|
||||
return true;
|
||||
}
|
||||
ShellCommandExecutor shexec = null;
|
||||
boolean setsidSupported = true;
|
||||
try {
|
||||
String[] args = {"setsid", "bash", "-c", "echo $$"};
|
||||
shexec = new ShellCommandExecutor(args);
|
||||
shexec.execute();
|
||||
} catch (IOException ioe) {
|
||||
LOG.warn("setsid is not available on this machine. So not using it.");
|
||||
setsidSupported = false;
|
||||
} finally { // handle the exit code
|
||||
LOG.info("setsid exited with exit code " + shexec.getExitCode());
|
||||
}
|
||||
return setsidSupported;
|
||||
}
|
||||
|
||||
public static class DelayedProcessKiller extends Thread {
|
||||
private final String user;
|
||||
private final String pid;
|
||||
|
|
|
@ -50,6 +50,8 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.Conta
|
|||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer;
|
||||
import org.apache.hadoop.yarn.util.ConverterUtils;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
|
||||
public class DefaultContainerExecutor extends ContainerExecutor {
|
||||
|
||||
private static final Log LOG = LogFactory
|
||||
|
@ -237,8 +239,9 @@ public class DefaultContainerExecutor extends ContainerExecutor {
|
|||
protected abstract void writeLocalWrapperScript(Path launchDst, Path pidFile,
|
||||
PrintStream pout);
|
||||
|
||||
protected LocalWrapperScriptBuilder(Path wrapperScriptPath) {
|
||||
this.wrapperScriptPath = wrapperScriptPath;
|
||||
protected LocalWrapperScriptBuilder(Path containerWorkDir) {
|
||||
this.wrapperScriptPath = new Path(containerWorkDir,
|
||||
Shell.appendScriptExtension("default_container_executor"));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -246,7 +249,7 @@ public class DefaultContainerExecutor extends ContainerExecutor {
|
|||
extends LocalWrapperScriptBuilder {
|
||||
|
||||
public UnixLocalWrapperScriptBuilder(Path containerWorkDir) {
|
||||
super(new Path(containerWorkDir, "default_container_executor.sh"));
|
||||
super(containerWorkDir);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -260,7 +263,7 @@ public class DefaultContainerExecutor extends ContainerExecutor {
|
|||
pout.println();
|
||||
pout.println("echo $$ > " + pidFile.toString() + ".tmp");
|
||||
pout.println("/bin/mv -f " + pidFile.toString() + ".tmp " + pidFile);
|
||||
String exec = ContainerExecutor.isSetsidAvailable? "exec setsid" : "exec";
|
||||
String exec = Shell.isSetsidAvailable? "exec setsid" : "exec";
|
||||
pout.println(exec + " /bin/bash -c \"" +
|
||||
launchDst.toUri().getPath().toString() + "\"");
|
||||
}
|
||||
|
@ -274,7 +277,7 @@ public class DefaultContainerExecutor extends ContainerExecutor {
|
|||
public WindowsLocalWrapperScriptBuilder(String containerIdStr,
|
||||
Path containerWorkDir) {
|
||||
|
||||
super(new Path(containerWorkDir, "default_container_executor.cmd"));
|
||||
super(containerWorkDir);
|
||||
this.containerIdStr = containerIdStr;
|
||||
}
|
||||
|
||||
|
@ -297,18 +300,15 @@ public class DefaultContainerExecutor extends ContainerExecutor {
|
|||
@Override
|
||||
public boolean signalContainer(String user, String pid, Signal signal)
|
||||
throws IOException {
|
||||
final String sigpid = ContainerExecutor.isSetsidAvailable
|
||||
? "-" + pid
|
||||
: pid;
|
||||
LOG.debug("Sending signal " + signal.getValue() + " to pid " + sigpid
|
||||
LOG.debug("Sending signal " + signal.getValue() + " to pid " + pid
|
||||
+ " as user " + user);
|
||||
if (!containerIsAlive(sigpid)) {
|
||||
if (!containerIsAlive(pid)) {
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
killContainer(sigpid, signal);
|
||||
killContainer(pid, signal);
|
||||
} catch (IOException e) {
|
||||
if (!containerIsAlive(sigpid)) {
|
||||
if (!containerIsAlive(pid)) {
|
||||
return false;
|
||||
}
|
||||
throw e;
|
||||
|
@ -322,9 +322,11 @@ public class DefaultContainerExecutor extends ContainerExecutor {
|
|||
* @param pid String pid
|
||||
* @return boolean true if the process is alive
|
||||
*/
|
||||
private boolean containerIsAlive(String pid) throws IOException {
|
||||
@VisibleForTesting
|
||||
public static boolean containerIsAlive(String pid) throws IOException {
|
||||
try {
|
||||
new ShellCommandExecutor(getCheckProcessIsAliveCommand(pid)).execute();
|
||||
new ShellCommandExecutor(Shell.getCheckProcessIsAliveCommand(pid))
|
||||
.execute();
|
||||
// successful execution means process is alive
|
||||
return true;
|
||||
}
|
||||
|
@ -342,7 +344,7 @@ public class DefaultContainerExecutor extends ContainerExecutor {
|
|||
* (for logging).
|
||||
*/
|
||||
private void killContainer(String pid, Signal signal) throws IOException {
|
||||
new ShellCommandExecutor(getSignalKillCommand(signal.getValue(), pid))
|
||||
new ShellCommandExecutor(Shell.getSignalKillCommand(signal.getValue(), pid))
|
||||
.execute();
|
||||
}
|
||||
|
||||
|
|
|
@ -72,8 +72,8 @@ public class ContainerLaunch implements Callable<Integer> {
|
|||
|
||||
private static final Log LOG = LogFactory.getLog(ContainerLaunch.class);
|
||||
|
||||
public static final String CONTAINER_SCRIPT = Shell.WINDOWS ?
|
||||
"launch_container.cmd" : "launch_container.sh";
|
||||
public static final String CONTAINER_SCRIPT =
|
||||
Shell.appendScriptExtension("launch_container");
|
||||
public static final String FINAL_CONTAINER_TOKENS_FILE = "container_tokens";
|
||||
|
||||
private static final String PID_FILE_NAME_FMT = "%s.pid";
|
||||
|
|
|
@ -22,12 +22,13 @@ import static org.mockito.Mockito.mock;
|
|||
import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -40,6 +41,7 @@ import junit.framework.Assert;
|
|||
import org.apache.hadoop.fs.FileContext;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.UnsupportedFileSystemException;
|
||||
import org.apache.hadoop.util.Shell;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
|
@ -59,6 +61,7 @@ import org.apache.hadoop.yarn.event.Dispatcher;
|
|||
import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
|
||||
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
|
||||
import org.apache.hadoop.yarn.util.BuilderUtils;
|
||||
|
@ -81,6 +84,7 @@ public class TestNodeManagerShutdown {
|
|||
.getRecordFactory(null);
|
||||
static final String user = "nobody";
|
||||
private FileContext localFS;
|
||||
private ContainerId cId;
|
||||
private CyclicBarrier syncBarrier = new CyclicBarrier(2);
|
||||
|
||||
@Before
|
||||
|
@ -90,6 +94,9 @@ public class TestNodeManagerShutdown {
|
|||
logsDir.mkdirs();
|
||||
remoteLogsDir.mkdirs();
|
||||
nmLocalDir.mkdirs();
|
||||
|
||||
// Construct the Container-id
|
||||
cId = createContainerId();
|
||||
}
|
||||
|
||||
@After
|
||||
|
@ -115,9 +122,15 @@ public class TestNodeManagerShutdown {
|
|||
|
||||
nm.stop();
|
||||
|
||||
// Now verify the contents of the file
|
||||
// Script generates a message when it receives a sigterm
|
||||
// so we look for that
|
||||
// Now verify the contents of the file. Script generates a message when it
|
||||
// receives a sigterm so we look for that. We cannot perform this check on
|
||||
// Windows, because the process is not notified when killed by winutils.
|
||||
// There is no way for the process to trap and respond. Instead, we can
|
||||
// verify that the job object with ID matching container ID no longer exists.
|
||||
if (Shell.WINDOWS) {
|
||||
Assert.assertFalse("Process is still alive!",
|
||||
DefaultContainerExecutor.containerIsAlive(cId.toString()));
|
||||
} else {
|
||||
BufferedReader reader =
|
||||
new BufferedReader(new FileReader(processStartFile));
|
||||
|
||||
|
@ -135,6 +148,7 @@ public class TestNodeManagerShutdown {
|
|||
Assert.assertTrue("Did not find sigterm message", foundSigTermMessage);
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Test
|
||||
|
@ -162,8 +176,6 @@ public class TestNodeManagerShutdown {
|
|||
ContainerLaunchContext containerLaunchContext =
|
||||
recordFactory.newRecordInstance(ContainerLaunchContext.class);
|
||||
Container mockContainer = mock(Container.class);
|
||||
// Construct the Container-id
|
||||
ContainerId cId = createContainerId();
|
||||
when(mockContainer.getId()).thenReturn(cId);
|
||||
|
||||
containerLaunchContext.setUser(user);
|
||||
|
@ -184,9 +196,7 @@ public class TestNodeManagerShutdown {
|
|||
localResources.put(destinationFile, localResource);
|
||||
containerLaunchContext.setLocalResources(localResources);
|
||||
containerLaunchContext.setUser(containerLaunchContext.getUser());
|
||||
List<String> commands = new ArrayList<String>();
|
||||
commands.add("/bin/bash");
|
||||
commands.add(scriptFile.getAbsolutePath());
|
||||
List<String> commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile));
|
||||
containerLaunchContext.setCommands(commands);
|
||||
Resource resource = BuilderUtils.newResource(1024, 1);
|
||||
when(mockContainer.getResource()).thenReturn(resource);
|
||||
|
@ -234,16 +244,24 @@ public class TestNodeManagerShutdown {
|
|||
* stopped by external means.
|
||||
*/
|
||||
private File createUnhaltingScriptFile() throws IOException {
|
||||
File scriptFile = new File(tmpDir, "scriptFile.sh");
|
||||
BufferedWriter fileWriter = new BufferedWriter(new FileWriter(scriptFile));
|
||||
File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile");
|
||||
PrintWriter fileWriter = new PrintWriter(scriptFile);
|
||||
if (Shell.WINDOWS) {
|
||||
fileWriter.println("@echo \"Running testscript for delayed kill\"");
|
||||
fileWriter.println("@echo \"Writing pid to start file\"");
|
||||
fileWriter.println("@echo " + cId + ">> " + processStartFile);
|
||||
fileWriter.println("@pause");
|
||||
} else {
|
||||
fileWriter.write("#!/bin/bash\n\n");
|
||||
fileWriter.write("echo \"Running testscript for delayed kill\"\n");
|
||||
fileWriter.write("hello=\"Got SIGTERM\"\n");
|
||||
fileWriter.write("umask 0\n");
|
||||
fileWriter.write("trap \"echo $hello >> " + processStartFile + "\" SIGTERM\n");
|
||||
fileWriter.write("trap \"echo $hello >> " + processStartFile +
|
||||
"\" SIGTERM\n");
|
||||
fileWriter.write("echo \"Writing pid to start file\"\n");
|
||||
fileWriter.write("echo $$ >> " + processStartFile + "\n");
|
||||
fileWriter.write("while true; do\ndate >> /dev/null;\n done\n");
|
||||
}
|
||||
|
||||
fileWriter.close();
|
||||
return scriptFile;
|
||||
|
|
|
@ -76,15 +76,15 @@ public abstract class BaseContainerManagerTest {
|
|||
public BaseContainerManagerTest() throws UnsupportedFileSystemException {
|
||||
localFS = FileContext.getLocalFSFileContext();
|
||||
localDir =
|
||||
new File("target", this.getClass().getName() + "-localDir")
|
||||
new File("target", this.getClass().getSimpleName() + "-localDir")
|
||||
.getAbsoluteFile();
|
||||
localLogDir =
|
||||
new File("target", this.getClass().getName() + "-localLogDir")
|
||||
new File("target", this.getClass().getSimpleName() + "-localLogDir")
|
||||
.getAbsoluteFile();
|
||||
remoteLogDir =
|
||||
new File("target", this.getClass().getName() + "-remoteLogDir")
|
||||
new File("target", this.getClass().getSimpleName() + "-remoteLogDir")
|
||||
.getAbsoluteFile();
|
||||
tmpDir = new File("target", this.getClass().getName() + "-tmpDir");
|
||||
tmpDir = new File("target", this.getClass().getSimpleName() + "-tmpDir");
|
||||
}
|
||||
|
||||
protected static Log LOG = LogFactory
|
||||
|
|
|
@ -35,6 +35,7 @@ import org.apache.commons.logging.LogFactory;
|
|||
import org.apache.hadoop.fs.FileContext;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.UnsupportedFileSystemException;
|
||||
import org.apache.hadoop.util.Shell;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.StopContainerRequest;
|
||||
|
@ -53,6 +54,7 @@ import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
|
|||
import org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer;
|
||||
|
@ -196,22 +198,29 @@ public class TestContainerManager extends BaseContainerManagerTest {
|
|||
InterruptedException {
|
||||
containerManager.start();
|
||||
|
||||
File scriptFile = new File(tmpDir, "scriptFile.sh");
|
||||
File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile");
|
||||
PrintWriter fileWriter = new PrintWriter(scriptFile);
|
||||
File processStartFile =
|
||||
new File(tmpDir, "start_file.txt").getAbsoluteFile();
|
||||
|
||||
// ////// Construct the Container-id
|
||||
ContainerId cId = createContainerId();
|
||||
|
||||
if (Shell.WINDOWS) {
|
||||
fileWriter.println("@echo Hello World!> " + processStartFile);
|
||||
fileWriter.println("@echo " + cId + ">> " + processStartFile);
|
||||
fileWriter.println("@ping -n 100 127.0.0.1 >nul");
|
||||
} else {
|
||||
fileWriter.write("\numask 0"); // So that start file is readable by the test
|
||||
fileWriter.write("\necho Hello World! > " + processStartFile);
|
||||
fileWriter.write("\necho $$ >> " + processStartFile);
|
||||
fileWriter.write("\nexec sleep 100");
|
||||
}
|
||||
fileWriter.close();
|
||||
|
||||
ContainerLaunchContext containerLaunchContext =
|
||||
recordFactory.newRecordInstance(ContainerLaunchContext.class);
|
||||
|
||||
// ////// Construct the Container-id
|
||||
ContainerId cId = createContainerId();
|
||||
|
||||
containerLaunchContext.setUser(user);
|
||||
|
||||
URL resource_alpha =
|
||||
|
@ -230,14 +239,12 @@ public class TestContainerManager extends BaseContainerManagerTest {
|
|||
localResources.put(destinationFile, rsrc_alpha);
|
||||
containerLaunchContext.setLocalResources(localResources);
|
||||
containerLaunchContext.setUser(containerLaunchContext.getUser());
|
||||
List<String> commands = new ArrayList<String>();
|
||||
commands.add("/bin/bash");
|
||||
commands.add(scriptFile.getAbsolutePath());
|
||||
List<String> commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile));
|
||||
containerLaunchContext.setCommands(commands);
|
||||
Container mockContainer = mock(Container.class);
|
||||
when(mockContainer.getId()).thenReturn(cId);
|
||||
when(mockContainer.getResource()).thenReturn(
|
||||
BuilderUtils.newResource(100 * 1024 * 1024, 1));
|
||||
BuilderUtils.newResource(100, 1)); // MB
|
||||
StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class);
|
||||
startRequest.setContainerLaunchContext(containerLaunchContext);
|
||||
startRequest.setContainer(mockContainer);
|
||||
|
@ -264,12 +271,10 @@ public class TestContainerManager extends BaseContainerManagerTest {
|
|||
|
||||
// Assert that the process is alive
|
||||
Assert.assertTrue("Process is not alive!",
|
||||
exec.signalContainer(user,
|
||||
pid, Signal.NULL));
|
||||
DefaultContainerExecutor.containerIsAlive(pid));
|
||||
// Once more
|
||||
Assert.assertTrue("Process is not alive!",
|
||||
exec.signalContainer(user,
|
||||
pid, Signal.NULL));
|
||||
DefaultContainerExecutor.containerIsAlive(pid));
|
||||
|
||||
StopContainerRequest stopRequest = recordFactory.newRecordInstance(StopContainerRequest.class);
|
||||
stopRequest.setContainerId(cId);
|
||||
|
@ -283,38 +288,46 @@ public class TestContainerManager extends BaseContainerManagerTest {
|
|||
gcsRequest.setContainerId(cId);
|
||||
ContainerStatus containerStatus =
|
||||
containerManager.getContainerStatus(gcsRequest).getStatus();
|
||||
Assert.assertEquals(ExitCode.TERMINATED.getExitCode(),
|
||||
containerStatus.getExitStatus());
|
||||
int expectedExitCode = Shell.WINDOWS ? ExitCode.FORCE_KILLED.getExitCode() :
|
||||
ExitCode.TERMINATED.getExitCode();
|
||||
Assert.assertEquals(expectedExitCode, containerStatus.getExitStatus());
|
||||
|
||||
// Assert that the process is not alive anymore
|
||||
Assert.assertFalse("Process is still alive!",
|
||||
exec.signalContainer(user,
|
||||
pid, Signal.NULL));
|
||||
DefaultContainerExecutor.containerIsAlive(pid));
|
||||
}
|
||||
|
||||
private void testContainerLaunchAndExit(int exitCode) throws IOException, InterruptedException {
|
||||
|
||||
File scriptFile = new File(tmpDir, "scriptFile.sh");
|
||||
File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile");
|
||||
PrintWriter fileWriter = new PrintWriter(scriptFile);
|
||||
File processStartFile =
|
||||
new File(tmpDir, "start_file.txt").getAbsoluteFile();
|
||||
|
||||
// ////// Construct the Container-id
|
||||
ContainerId cId = createContainerId();
|
||||
|
||||
if (Shell.WINDOWS) {
|
||||
fileWriter.println("@echo Hello World!> " + processStartFile);
|
||||
fileWriter.println("@echo " + cId + ">> " + processStartFile);
|
||||
if (exitCode != 0) {
|
||||
fileWriter.println("@exit " + exitCode);
|
||||
}
|
||||
} else {
|
||||
fileWriter.write("\numask 0"); // So that start file is readable by the test
|
||||
fileWriter.write("\necho Hello World! > " + processStartFile);
|
||||
fileWriter.write("\necho $$ >> " + processStartFile);
|
||||
|
||||
// Have script throw an exit code at the end
|
||||
if (exitCode != 0) {
|
||||
fileWriter.write("\nexit "+exitCode);
|
||||
}
|
||||
}
|
||||
|
||||
fileWriter.close();
|
||||
|
||||
ContainerLaunchContext containerLaunchContext =
|
||||
recordFactory.newRecordInstance(ContainerLaunchContext.class);
|
||||
|
||||
// ////// Construct the Container-id
|
||||
ContainerId cId = createContainerId();
|
||||
|
||||
containerLaunchContext.setUser(user);
|
||||
|
||||
URL resource_alpha =
|
||||
|
@ -333,14 +346,12 @@ public class TestContainerManager extends BaseContainerManagerTest {
|
|||
localResources.put(destinationFile, rsrc_alpha);
|
||||
containerLaunchContext.setLocalResources(localResources);
|
||||
containerLaunchContext.setUser(containerLaunchContext.getUser());
|
||||
List<String> commands = new ArrayList<String>();
|
||||
commands.add("/bin/bash");
|
||||
commands.add(scriptFile.getAbsolutePath());
|
||||
List<String> commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile));
|
||||
containerLaunchContext.setCommands(commands);
|
||||
Container mockContainer = mock(Container.class);
|
||||
when(mockContainer.getId()).thenReturn(cId);
|
||||
when(mockContainer.getResource()).thenReturn(
|
||||
BuilderUtils.newResource(100 * 1024 * 1024, 1));
|
||||
BuilderUtils.newResource(100, 1)); // MB
|
||||
|
||||
StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class);
|
||||
startRequest.setContainerLaunchContext(containerLaunchContext);
|
||||
|
|
|
@ -56,6 +56,7 @@ import org.apache.hadoop.yarn.api.records.URL;
|
|||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch;
|
||||
import org.apache.hadoop.yarn.util.BuilderUtils;
|
||||
|
@ -88,13 +89,15 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
|
|||
|
||||
File shellFile = null;
|
||||
File tempFile = null;
|
||||
String badSymlink = "foo@zz%_#*&!-+= bar()";
|
||||
String badSymlink = Shell.WINDOWS ? "foo@zz_#!-+bar.cmd" :
|
||||
"foo@zz%_#*&!-+= bar()";
|
||||
File symLinkFile = null;
|
||||
|
||||
try {
|
||||
shellFile = new File(tmpDir, "hello.sh");
|
||||
tempFile = new File(tmpDir, "temp.sh");
|
||||
String timeoutCommand = "echo \"hello\"";
|
||||
shellFile = Shell.appendScriptExtension(tmpDir, "hello");
|
||||
tempFile = Shell.appendScriptExtension(tmpDir, "temp");
|
||||
String timeoutCommand = Shell.WINDOWS ? "@echo \"hello\"" :
|
||||
"echo \"hello\"";
|
||||
PrintWriter writer = new PrintWriter(new FileOutputStream(shellFile));
|
||||
shellFile.setExecutable(true);
|
||||
writer.println(timeoutCommand);
|
||||
|
@ -109,7 +112,13 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
|
|||
|
||||
Map<String, String> env = new HashMap<String, String>();
|
||||
List<String> commands = new ArrayList<String>();
|
||||
if (Shell.WINDOWS) {
|
||||
commands.add("cmd");
|
||||
commands.add("/c");
|
||||
commands.add("\"" + badSymlink + "\"");
|
||||
} else {
|
||||
commands.add("/bin/sh ./\\\"" + badSymlink + "\\\"");
|
||||
}
|
||||
|
||||
ContainerLaunch.writeLaunchEnv(fos, env, resources, commands);
|
||||
fos.flush();
|
||||
|
@ -145,6 +154,19 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
|
|||
// this is a dirty hack - but should be ok for a unittest.
|
||||
@SuppressWarnings({ "rawtypes", "unchecked" })
|
||||
public static void setNewEnvironmentHack(Map<String, String> newenv) throws Exception {
|
||||
try {
|
||||
Class<?> cl = Class.forName("java.lang.ProcessEnvironment");
|
||||
Field field = cl.getDeclaredField("theEnvironment");
|
||||
field.setAccessible(true);
|
||||
Map<String, String> env = (Map<String, String>)field.get(null);
|
||||
env.clear();
|
||||
env.putAll(newenv);
|
||||
Field ciField = cl.getDeclaredField("theCaseInsensitiveEnvironment");
|
||||
ciField.setAccessible(true);
|
||||
Map<String, String> cienv = (Map<String, String>)ciField.get(null);
|
||||
cienv.clear();
|
||||
cienv.putAll(newenv);
|
||||
} catch (NoSuchFieldException e) {
|
||||
Class[] classes = Collections.class.getDeclaredClasses();
|
||||
Map<String, String> env = System.getenv();
|
||||
for (Class cl : classes) {
|
||||
|
@ -158,6 +180,7 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* See if environment variable is forwarded using sanitizeEnv.
|
||||
|
@ -172,22 +195,6 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
|
|||
envWithDummy.put(Environment.MALLOC_ARENA_MAX.name(), "99");
|
||||
setNewEnvironmentHack(envWithDummy);
|
||||
|
||||
String malloc = System.getenv(Environment.MALLOC_ARENA_MAX.name());
|
||||
File scriptFile = new File(tmpDir, "scriptFile.sh");
|
||||
PrintWriter fileWriter = new PrintWriter(scriptFile);
|
||||
File processStartFile =
|
||||
new File(tmpDir, "env_vars.txt").getAbsoluteFile();
|
||||
fileWriter.write("\numask 0"); // So that start file is readable by the test
|
||||
fileWriter.write("\necho $" + Environment.MALLOC_ARENA_MAX.name() + " > " + processStartFile);
|
||||
fileWriter.write("\necho $$ >> " + processStartFile);
|
||||
fileWriter.write("\nexec sleep 100");
|
||||
fileWriter.close();
|
||||
|
||||
assert(malloc != null && !"".equals(malloc));
|
||||
|
||||
ContainerLaunchContext containerLaunchContext =
|
||||
recordFactory.newRecordInstance(ContainerLaunchContext.class);
|
||||
|
||||
Container mockContainer = mock(Container.class);
|
||||
// ////// Construct the Container-id
|
||||
ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class);
|
||||
|
@ -200,6 +207,30 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
|
|||
ContainerId cId =
|
||||
recordFactory.newRecordInstance(ContainerId.class);
|
||||
cId.setApplicationAttemptId(appAttemptId);
|
||||
String malloc = System.getenv(Environment.MALLOC_ARENA_MAX.name());
|
||||
File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile");
|
||||
PrintWriter fileWriter = new PrintWriter(scriptFile);
|
||||
File processStartFile =
|
||||
new File(tmpDir, "env_vars.txt").getAbsoluteFile();
|
||||
if (Shell.WINDOWS) {
|
||||
fileWriter.println("@echo " + Environment.MALLOC_ARENA_MAX.$() + "> " +
|
||||
processStartFile);
|
||||
fileWriter.println("@echo " + cId + ">> " + processStartFile);
|
||||
fileWriter.println("@ping -n 100 127.0.0.1 >nul");
|
||||
} else {
|
||||
fileWriter.write("\numask 0"); // So that start file is readable by the test
|
||||
fileWriter.write("\necho " + Environment.MALLOC_ARENA_MAX.$() + " > " +
|
||||
processStartFile);
|
||||
fileWriter.write("\necho $$ >> " + processStartFile);
|
||||
fileWriter.write("\nexec sleep 100");
|
||||
}
|
||||
fileWriter.close();
|
||||
|
||||
assert(malloc != null && !"".equals(malloc));
|
||||
|
||||
ContainerLaunchContext containerLaunchContext =
|
||||
recordFactory.newRecordInstance(ContainerLaunchContext.class);
|
||||
|
||||
when(mockContainer.getId()).thenReturn(cId);
|
||||
|
||||
containerLaunchContext.setUser(user);
|
||||
|
@ -223,9 +254,7 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
|
|||
|
||||
// set up the rest of the container
|
||||
containerLaunchContext.setUser(containerLaunchContext.getUser());
|
||||
List<String> commands = new ArrayList<String>();
|
||||
commands.add("/bin/bash");
|
||||
commands.add(scriptFile.getAbsolutePath());
|
||||
List<String> commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile));
|
||||
containerLaunchContext.setCommands(commands);
|
||||
when(mockContainer.getResource()).thenReturn(
|
||||
BuilderUtils.newResource(1024, 1));
|
||||
|
@ -255,12 +284,10 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
|
|||
|
||||
// Assert that the process is alive
|
||||
Assert.assertTrue("Process is not alive!",
|
||||
exec.signalContainer(user,
|
||||
pid, Signal.NULL));
|
||||
DefaultContainerExecutor.containerIsAlive(pid));
|
||||
// Once more
|
||||
Assert.assertTrue("Process is not alive!",
|
||||
exec.signalContainer(user,
|
||||
pid, Signal.NULL));
|
||||
DefaultContainerExecutor.containerIsAlive(pid));
|
||||
|
||||
StopContainerRequest stopRequest = recordFactory.newRecordInstance(StopContainerRequest.class);
|
||||
stopRequest.setContainerId(cId);
|
||||
|
@ -274,38 +301,19 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
|
|||
gcsRequest.setContainerId(cId);
|
||||
ContainerStatus containerStatus =
|
||||
containerManager.getContainerStatus(gcsRequest).getStatus();
|
||||
Assert.assertEquals(ExitCode.TERMINATED.getExitCode(),
|
||||
containerStatus.getExitStatus());
|
||||
int expectedExitCode = Shell.WINDOWS ? ExitCode.FORCE_KILLED.getExitCode() :
|
||||
ExitCode.TERMINATED.getExitCode();
|
||||
Assert.assertEquals(expectedExitCode, containerStatus.getExitStatus());
|
||||
|
||||
// Assert that the process is not alive anymore
|
||||
Assert.assertFalse("Process is still alive!",
|
||||
exec.signalContainer(user,
|
||||
pid, Signal.NULL));
|
||||
DefaultContainerExecutor.containerIsAlive(pid));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDelayedKill() throws Exception {
|
||||
containerManager.start();
|
||||
|
||||
File processStartFile =
|
||||
new File(tmpDir, "pid.txt").getAbsoluteFile();
|
||||
|
||||
// setup a script that can handle sigterm gracefully
|
||||
File scriptFile = new File(tmpDir, "testscript.sh");
|
||||
PrintWriter writer = new PrintWriter(new FileOutputStream(scriptFile));
|
||||
writer.println("#!/bin/bash\n\n");
|
||||
writer.println("echo \"Running testscript for delayed kill\"");
|
||||
writer.println("hello=\"Got SIGTERM\"");
|
||||
writer.println("umask 0");
|
||||
writer.println("trap \"echo $hello >> " + processStartFile + "\" SIGTERM");
|
||||
writer.println("echo \"Writing pid to start file\"");
|
||||
writer.println("echo $$ >> " + processStartFile);
|
||||
writer.println("while true; do\nsleep 1s;\ndone");
|
||||
writer.close();
|
||||
scriptFile.setExecutable(true);
|
||||
|
||||
ContainerLaunchContext containerLaunchContext =
|
||||
recordFactory.newRecordInstance(ContainerLaunchContext.class);
|
||||
Container mockContainer = mock(Container.class);
|
||||
// ////// Construct the Container-id
|
||||
ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class);
|
||||
|
@ -318,6 +326,33 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
|
|||
ContainerId cId =
|
||||
recordFactory.newRecordInstance(ContainerId.class);
|
||||
cId.setApplicationAttemptId(appAttemptId);
|
||||
|
||||
File processStartFile =
|
||||
new File(tmpDir, "pid.txt").getAbsoluteFile();
|
||||
|
||||
// setup a script that can handle sigterm gracefully
|
||||
File scriptFile = Shell.appendScriptExtension(tmpDir, "testscript");
|
||||
PrintWriter writer = new PrintWriter(new FileOutputStream(scriptFile));
|
||||
if (Shell.WINDOWS) {
|
||||
writer.println("@echo \"Running testscript for delayed kill\"");
|
||||
writer.println("@echo \"Writing pid to start file\"");
|
||||
writer.println("@echo " + cId + "> " + processStartFile);
|
||||
writer.println("@ping -n 100 127.0.0.1 >nul");
|
||||
} else {
|
||||
writer.println("#!/bin/bash\n\n");
|
||||
writer.println("echo \"Running testscript for delayed kill\"");
|
||||
writer.println("hello=\"Got SIGTERM\"");
|
||||
writer.println("umask 0");
|
||||
writer.println("trap \"echo $hello >> " + processStartFile + "\" SIGTERM");
|
||||
writer.println("echo \"Writing pid to start file\"");
|
||||
writer.println("echo $$ >> " + processStartFile);
|
||||
writer.println("while true; do\nsleep 1s;\ndone");
|
||||
}
|
||||
writer.close();
|
||||
scriptFile.setExecutable(true);
|
||||
|
||||
ContainerLaunchContext containerLaunchContext =
|
||||
recordFactory.newRecordInstance(ContainerLaunchContext.class);
|
||||
when(mockContainer.getId()).thenReturn(cId);
|
||||
|
||||
containerLaunchContext.setUser(user);
|
||||
|
@ -341,8 +376,7 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
|
|||
|
||||
// set up the rest of the container
|
||||
containerLaunchContext.setUser(containerLaunchContext.getUser());
|
||||
List<String> commands = new ArrayList<String>();
|
||||
commands.add(scriptFile.getAbsolutePath());
|
||||
List<String> commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile));
|
||||
containerLaunchContext.setCommands(commands);
|
||||
when(mockContainer.getResource()).thenReturn(
|
||||
BuilderUtils.newResource(1024, 1));
|
||||
|
@ -376,9 +410,15 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
|
|||
Assert.assertEquals(ExitCode.FORCE_KILLED.getExitCode(),
|
||||
containerStatus.getExitStatus());
|
||||
|
||||
// Now verify the contents of the file
|
||||
// Script generates a message when it receives a sigterm
|
||||
// so we look for that
|
||||
// Now verify the contents of the file. Script generates a message when it
|
||||
// receives a sigterm so we look for that. We cannot perform this check on
|
||||
// Windows, because the process is not notified when killed by winutils.
|
||||
// There is no way for the process to trap and respond. Instead, we can
|
||||
// verify that the job object with ID matching container ID no longer exists.
|
||||
if (Shell.WINDOWS) {
|
||||
Assert.assertFalse("Process is still alive!",
|
||||
DefaultContainerExecutor.containerIsAlive(cId.toString()));
|
||||
} else {
|
||||
BufferedReader reader =
|
||||
new BufferedReader(new FileReader(processStartFile));
|
||||
|
||||
|
@ -396,5 +436,6 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
|
|||
Assert.assertTrue("Did not find sigterm message", foundSigTermMessage);
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue