YARN-2934. Improve handling of container's stderr. (Naganarasimha G R via gera)
(cherry picked from commit 2c17b81569
)
This commit is contained in:
parent
c6e0f64582
commit
8baf9369bb
|
@ -28,6 +28,9 @@ Release 2.9.0 - UNRELEASED
|
||||||
YARN-4156. TestAMRestart#testAMBlacklistPreventsRestartOnSameNode
|
YARN-4156. TestAMRestart#testAMBlacklistPreventsRestartOnSameNode
|
||||||
assumes CapacityScheduler. (Anubhav Dhoot via kasha)
|
assumes CapacityScheduler. (Anubhav Dhoot via kasha)
|
||||||
|
|
||||||
|
YARN-2934. Improve handling of container's stderr.
|
||||||
|
(Naganarasimha G R via gera)
|
||||||
|
|
||||||
Release 2.8.0 - UNRELEASED
|
Release 2.8.0 - UNRELEASED
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -1333,6 +1333,17 @@ public class YarnConfiguration extends Configuration {
|
||||||
public static final String NM_USER_HOME_DIR =
|
public static final String NM_USER_HOME_DIR =
|
||||||
NM_PREFIX + "user-home-dir";
|
NM_PREFIX + "user-home-dir";
|
||||||
|
|
||||||
|
public static final String NM_CONTAINER_STDERR_PATTERN =
|
||||||
|
NM_PREFIX + "container.stderr.pattern";
|
||||||
|
|
||||||
|
public static final String DEFAULT_NM_CONTAINER_STDERR_PATTERN =
|
||||||
|
"{*stderr*,*STDERR*}";
|
||||||
|
|
||||||
|
public static final String NM_CONTAINER_STDERR_BYTES =
|
||||||
|
NM_PREFIX + "container.stderr.tail.bytes";
|
||||||
|
|
||||||
|
public static final long DEFAULT_NM_CONTAINER_STDERR_BYTES = 4 * 1024;
|
||||||
|
|
||||||
/**The kerberos principal to be used for spnego filter for NM.*/
|
/**The kerberos principal to be used for spnego filter for NM.*/
|
||||||
public static final String NM_WEBAPP_SPNEGO_USER_NAME_KEY =
|
public static final String NM_WEBAPP_SPNEGO_USER_NAME_KEY =
|
||||||
NM_PREFIX + "webapp.spnego-principal";
|
NM_PREFIX + "webapp.spnego-principal";
|
||||||
|
|
|
@ -2459,6 +2459,29 @@
|
||||||
<value>org.apache.hadoop.yarn.server.nodemanager.amrmproxy.DefaultRequestInterceptor</value>
|
<value>org.apache.hadoop.yarn.server.nodemanager.amrmproxy.DefaultRequestInterceptor</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>
|
||||||
|
Error filename pattern, to identify the file in the container's
|
||||||
|
Log directory which contain the container's error log. As error file
|
||||||
|
redirection is done by client/AM and yarn will not be aware of the error
|
||||||
|
file name. YARN uses this pattern to identify the error file and tail
|
||||||
|
the error log as diagnostics when the container execution returns non zero
|
||||||
|
value. Filename patterns are case sensitive and should match the
|
||||||
|
specifications of FileSystem.globStatus(Path) api. If multiple filenames
|
||||||
|
matches the pattern, first file matching the pattern will be picked.
|
||||||
|
</description>
|
||||||
|
<name>yarn.nodemanager.container.stderr.pattern</name>
|
||||||
|
<value>{*stderr*,*STDERR*}</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>
|
||||||
|
Size of the container error file which needs to be tailed, in bytes.
|
||||||
|
</description>
|
||||||
|
<name>yarn.nodemanager.container.stderr.tail.bytes </name>
|
||||||
|
<value>4096</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<description>
|
<description>
|
||||||
Enable/disable blacklisting of hosts for AM based on AM failures on those
|
Enable/disable blacklisting of hosts for AM based on AM failures on those
|
||||||
|
|
|
@ -26,6 +26,7 @@ import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
@ -38,7 +39,10 @@ import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FSDataInputStream;
|
||||||
import org.apache.hadoop.fs.FileContext;
|
import org.apache.hadoop.fs.FileContext;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.FileUtil;
|
import org.apache.hadoop.fs.FileUtil;
|
||||||
import org.apache.hadoop.fs.LocalDirAllocator;
|
import org.apache.hadoop.fs.LocalDirAllocator;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
@ -61,6 +65,7 @@ import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal;
|
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.Context;
|
import org.apache.hadoop.yarn.server.nodemanager.Context;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
|
import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
|
||||||
|
import org.apache.hadoop.yarn.server.nodemanager.WindowsSecureContainerExecutor;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
||||||
|
@ -71,7 +76,6 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Cont
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.WindowsSecureContainerExecutor;
|
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext;
|
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext;
|
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.util.ProcessIdFileReader;
|
import org.apache.hadoop.yarn.server.nodemanager.util.ProcessIdFileReader;
|
||||||
|
@ -171,6 +175,7 @@ public class ContainerLaunch implements Callable<Integer> {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Path containerLogDir;
|
||||||
try {
|
try {
|
||||||
localResources = container.getLocalizedResources();
|
localResources = container.getLocalizedResources();
|
||||||
if (localResources == null) {
|
if (localResources == null) {
|
||||||
|
@ -186,7 +191,7 @@ public class ContainerLaunch implements Callable<Integer> {
|
||||||
String appIdStr = app.getAppId().toString();
|
String appIdStr = app.getAppId().toString();
|
||||||
String relativeContainerLogDir = ContainerLaunch
|
String relativeContainerLogDir = ContainerLaunch
|
||||||
.getRelativeContainerLogDir(appIdStr, containerIdStr);
|
.getRelativeContainerLogDir(appIdStr, containerIdStr);
|
||||||
Path containerLogDir =
|
containerLogDir =
|
||||||
dirsHandler.getLogPathForWrite(relativeContainerLogDir, false);
|
dirsHandler.getLogPathForWrite(relativeContainerLogDir, false);
|
||||||
for (String str : command) {
|
for (String str : command) {
|
||||||
// TODO: Should we instead work via symlinks without this grammar?
|
// TODO: Should we instead work via symlinks without this grammar?
|
||||||
|
@ -334,6 +339,11 @@ public class ContainerLaunch implements Callable<Integer> {
|
||||||
LOG.debug("Container " + containerIdStr + " completed with exit code "
|
LOG.debug("Container " + containerIdStr + " completed with exit code "
|
||||||
+ ret);
|
+ ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
StringBuilder diagnosticInfo =
|
||||||
|
new StringBuilder("Container exited with a non-zero exit code ");
|
||||||
|
diagnosticInfo.append(ret);
|
||||||
|
diagnosticInfo.append(". ");
|
||||||
if (ret == ExitCode.FORCE_KILLED.getExitCode()
|
if (ret == ExitCode.FORCE_KILLED.getExitCode()
|
||||||
|| ret == ExitCode.TERMINATED.getExitCode()) {
|
|| ret == ExitCode.TERMINATED.getExitCode()) {
|
||||||
// If the process was killed, Send container_cleanedup_after_kill and
|
// If the process was killed, Send container_cleanedup_after_kill and
|
||||||
|
@ -341,16 +351,13 @@ public class ContainerLaunch implements Callable<Integer> {
|
||||||
dispatcher.getEventHandler().handle(
|
dispatcher.getEventHandler().handle(
|
||||||
new ContainerExitEvent(containerID,
|
new ContainerExitEvent(containerID,
|
||||||
ContainerEventType.CONTAINER_KILLED_ON_REQUEST, ret,
|
ContainerEventType.CONTAINER_KILLED_ON_REQUEST, ret,
|
||||||
"Container exited with a non-zero exit code " + ret));
|
diagnosticInfo.toString()));
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
LOG.warn("Container exited with a non-zero exit code " + ret);
|
handleContainerExitWithFailure(containerID, ret, containerLogDir,
|
||||||
this.dispatcher.getEventHandler().handle(new ContainerExitEvent(
|
diagnosticInfo);
|
||||||
containerID,
|
|
||||||
ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret,
|
|
||||||
"Container exited with a non-zero exit code " + ret));
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -361,6 +368,78 @@ public class ContainerLaunch implements Callable<Integer> {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tries to tail and fetch TAIL_SIZE_IN_BYTES of data from the error log.
|
||||||
|
* ErrorLog filename is not fixed and depends upon app, hence file name
|
||||||
|
* pattern is used.
|
||||||
|
* @param containerID
|
||||||
|
* @param ret
|
||||||
|
* @param containerLogDir
|
||||||
|
* @param diagnosticInfo
|
||||||
|
*/
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
private void handleContainerExitWithFailure(ContainerId containerID, int ret,
|
||||||
|
Path containerLogDir, StringBuilder diagnosticInfo) {
|
||||||
|
LOG.warn(diagnosticInfo);
|
||||||
|
|
||||||
|
String errorFileNamePattern =
|
||||||
|
conf.get(YarnConfiguration.NM_CONTAINER_STDERR_PATTERN,
|
||||||
|
YarnConfiguration.DEFAULT_NM_CONTAINER_STDERR_PATTERN);
|
||||||
|
FSDataInputStream errorFileIS = null;
|
||||||
|
try {
|
||||||
|
FileSystem fileSystem = FileSystem.getLocal(conf).getRaw();
|
||||||
|
FileStatus[] errorFileStatuses = fileSystem
|
||||||
|
.globStatus(new Path(containerLogDir, errorFileNamePattern));
|
||||||
|
if (errorFileStatuses != null && errorFileStatuses.length != 0) {
|
||||||
|
long tailSizeInBytes =
|
||||||
|
conf.getLong(YarnConfiguration.NM_CONTAINER_STDERR_BYTES,
|
||||||
|
YarnConfiguration.DEFAULT_NM_CONTAINER_STDERR_BYTES);
|
||||||
|
Path errorFile = errorFileStatuses[0].getPath();
|
||||||
|
long fileSize = errorFileStatuses[0].getLen();
|
||||||
|
|
||||||
|
// if more than one file matches the stderr pattern, take the latest
|
||||||
|
// modified file, and also append the file names in the diagnosticInfo
|
||||||
|
if (errorFileStatuses.length > 1) {
|
||||||
|
String[] errorFileNames = new String[errorFileStatuses.length];
|
||||||
|
long latestModifiedTime = errorFileStatuses[0].getModificationTime();
|
||||||
|
errorFileNames[0] = errorFileStatuses[0].getPath().getName();
|
||||||
|
for (int i = 1; i < errorFileStatuses.length; i++) {
|
||||||
|
errorFileNames[i] = errorFileStatuses[i].getPath().getName();
|
||||||
|
if (errorFileStatuses[i]
|
||||||
|
.getModificationTime() > latestModifiedTime) {
|
||||||
|
latestModifiedTime = errorFileStatuses[i].getModificationTime();
|
||||||
|
errorFile = errorFileStatuses[i].getPath();
|
||||||
|
fileSize = errorFileStatuses[i].getLen();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
diagnosticInfo.append("Error files: ")
|
||||||
|
.append(StringUtils.join(", ", errorFileNames)).append(".\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
long startPosition =
|
||||||
|
(fileSize < tailSizeInBytes) ? 0 : fileSize - tailSizeInBytes;
|
||||||
|
int bufferSize =
|
||||||
|
(int) ((fileSize < tailSizeInBytes) ? fileSize : tailSizeInBytes);
|
||||||
|
byte[] tailBuffer = new byte[bufferSize];
|
||||||
|
errorFileIS = fileSystem.open(errorFile);
|
||||||
|
errorFileIS.readFully(startPosition, tailBuffer);
|
||||||
|
|
||||||
|
diagnosticInfo.append("Last ").append(tailSizeInBytes)
|
||||||
|
.append(" bytes of ").append(errorFile.getName()).append(" :\n")
|
||||||
|
.append(new String(tailBuffer, StandardCharsets.UTF_8));
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.error("Failed to get tail of the container's error log file", e);
|
||||||
|
} finally {
|
||||||
|
IOUtils.cleanup(LOG, errorFileIS);
|
||||||
|
}
|
||||||
|
|
||||||
|
this.dispatcher.getEventHandler()
|
||||||
|
.handle(new ContainerExitEvent(containerID,
|
||||||
|
ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret,
|
||||||
|
diagnosticInfo.toString()));
|
||||||
|
}
|
||||||
|
|
||||||
protected String getPidFileSubpath(String appIdStr, String containerIdStr) {
|
protected String getPidFileSubpath(String appIdStr, String containerIdStr) {
|
||||||
return getContainerPrivateDir(appIdStr, containerIdStr) + Path.SEPARATOR
|
return getContainerPrivateDir(appIdStr, containerIdStr) + Path.SEPARATOR
|
||||||
+ String.format(ContainerLaunch.PID_FILE_NAME_FMT, containerIdStr);
|
+ String.format(ContainerLaunch.PID_FILE_NAME_FMT, containerIdStr);
|
||||||
|
|
|
@ -21,7 +21,6 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertThat;
|
import static org.junit.Assert.assertThat;
|
||||||
import static org.junit.Assert.fail;
|
import static org.junit.Assert.fail;
|
||||||
import static org.junit.matchers.JUnitMatchers.containsString;
|
|
||||||
import static org.mockito.Mockito.mock;
|
import static org.mockito.Mockito.mock;
|
||||||
import static org.mockito.Mockito.when;
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
@ -48,6 +47,7 @@ import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileUtil;
|
import org.apache.hadoop.fs.FileUtil;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.UnsupportedFileSystemException;
|
import org.apache.hadoop.fs.UnsupportedFileSystemException;
|
||||||
|
import org.apache.hadoop.security.Credentials;
|
||||||
import org.apache.hadoop.security.token.SecretManager.InvalidToken;
|
import org.apache.hadoop.security.token.SecretManager.InvalidToken;
|
||||||
import org.apache.hadoop.util.Shell;
|
import org.apache.hadoop.util.Shell;
|
||||||
import org.apache.hadoop.util.Shell.ExitCodeException;
|
import org.apache.hadoop.util.Shell.ExitCodeException;
|
||||||
|
@ -81,19 +81,21 @@ import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
|
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode;
|
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor;
|
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.Context;
|
import org.apache.hadoop.yarn.server.nodemanager.Context;
|
||||||
|
import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext;
|
import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest;
|
||||||
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
||||||
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.ShellScriptBuilder;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.ShellScriptBuilder;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer;
|
||||||
|
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
|
import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM;
|
import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM;
|
||||||
import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
|
import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
|
|
||||||
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||||
import org.apache.hadoop.yarn.util.Apps;
|
import org.apache.hadoop.yarn.util.Apps;
|
||||||
import org.apache.hadoop.yarn.util.AuxiliaryServiceHelper;
|
import org.apache.hadoop.yarn.util.AuxiliaryServiceHelper;
|
||||||
|
@ -108,6 +110,7 @@ import org.junit.Test;
|
||||||
|
|
||||||
public class TestContainerLaunch extends BaseContainerManagerTest {
|
public class TestContainerLaunch extends BaseContainerManagerTest {
|
||||||
|
|
||||||
|
private static final String INVALID_JAVA_HOME = "/no/jvm/here";
|
||||||
protected Context distContext = new NMContext(new NMContainerTokenSecretManager(
|
protected Context distContext = new NMContext(new NMContainerTokenSecretManager(
|
||||||
conf), new NMTokenSecretManagerInNM(), null,
|
conf), new NMTokenSecretManagerInNM(), null,
|
||||||
new ApplicationACLsManager(conf), new NMNullStateStoreService()) {
|
new ApplicationACLsManager(conf), new NMNullStateStoreService()) {
|
||||||
|
@ -492,6 +495,147 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testErrorLogOnContainerExit() throws Exception {
|
||||||
|
verifyTailErrorLogOnContainerExit(new Configuration(), "/stderr", false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testErrorLogOnContainerExitForCase() throws Exception {
|
||||||
|
verifyTailErrorLogOnContainerExit(new Configuration(), "/STDERR.log",
|
||||||
|
false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testErrorLogOnContainerExitForExt() throws Exception {
|
||||||
|
verifyTailErrorLogOnContainerExit(new Configuration(), "/AppMaster.stderr",
|
||||||
|
false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testErrorLogOnContainerExitWithCustomPattern() throws Exception {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.setStrings(YarnConfiguration.NM_CONTAINER_STDERR_PATTERN,
|
||||||
|
"{*stderr*,*log*}");
|
||||||
|
verifyTailErrorLogOnContainerExit(conf, "/error.log", false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testErrorLogOnContainerExitWithMultipleFiles() throws Exception {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.setStrings(YarnConfiguration.NM_CONTAINER_STDERR_PATTERN,
|
||||||
|
"{*stderr*,*stdout*}");
|
||||||
|
verifyTailErrorLogOnContainerExit(conf, "/stderr.log", true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void verifyTailErrorLogOnContainerExit(Configuration conf,
|
||||||
|
String errorFileName, boolean testForMultipleErrFiles) throws Exception {
|
||||||
|
Container container = mock(Container.class);
|
||||||
|
ApplicationId appId =
|
||||||
|
ApplicationId.newInstance(System.currentTimeMillis(), 1);
|
||||||
|
ContainerId containerId = ContainerId
|
||||||
|
.newContainerId(ApplicationAttemptId.newInstance(appId, 1), 1);
|
||||||
|
when(container.getContainerId()).thenReturn(containerId);
|
||||||
|
when(container.getUser()).thenReturn("test");
|
||||||
|
String relativeContainerLogDir = ContainerLaunch.getRelativeContainerLogDir(
|
||||||
|
appId.toString(), ConverterUtils.toString(containerId));
|
||||||
|
Path containerLogDir =
|
||||||
|
dirsHandler.getLogPathForWrite(relativeContainerLogDir, false);
|
||||||
|
|
||||||
|
ContainerLaunchContext clc = mock(ContainerLaunchContext.class);
|
||||||
|
List<String> invalidCommand = new ArrayList<String>();
|
||||||
|
invalidCommand.add("$JAVA_HOME/bin/java");
|
||||||
|
invalidCommand.add("-Djava.io.tmpdir=$PWD/tmp");
|
||||||
|
invalidCommand.add("-Dlog4j.configuration=container-log4j.properties");
|
||||||
|
invalidCommand.add("-Dyarn.app.container.log.dir=" + containerLogDir);
|
||||||
|
invalidCommand.add("-Dyarn.app.container.log.filesize=0");
|
||||||
|
invalidCommand.add("-Dhadoop.root.logger=INFO,CLA");
|
||||||
|
invalidCommand.add("-Dhadoop.root.logfile=syslog");
|
||||||
|
invalidCommand.add("-Xmx1024m");
|
||||||
|
invalidCommand.add("org.apache.hadoop.mapreduce.v2.app.MRAppMaster");
|
||||||
|
invalidCommand.add("1>" + containerLogDir + "/stdout");
|
||||||
|
invalidCommand.add("2>" + containerLogDir + errorFileName);
|
||||||
|
when(clc.getCommands()).thenReturn(invalidCommand);
|
||||||
|
|
||||||
|
Map<String, String> userSetEnv = new HashMap<String, String>();
|
||||||
|
userSetEnv.put(Environment.CONTAINER_ID.name(), "user_set_container_id");
|
||||||
|
userSetEnv.put("JAVA_HOME", INVALID_JAVA_HOME);
|
||||||
|
userSetEnv.put(Environment.NM_HOST.name(), "user_set_NM_HOST");
|
||||||
|
userSetEnv.put(Environment.NM_PORT.name(), "user_set_NM_PORT");
|
||||||
|
userSetEnv.put(Environment.NM_HTTP_PORT.name(), "user_set_NM_HTTP_PORT");
|
||||||
|
userSetEnv.put(Environment.LOCAL_DIRS.name(), "user_set_LOCAL_DIR");
|
||||||
|
userSetEnv.put(Environment.USER.key(),
|
||||||
|
"user_set_" + Environment.USER.key());
|
||||||
|
userSetEnv.put(Environment.LOGNAME.name(), "user_set_LOGNAME");
|
||||||
|
userSetEnv.put(Environment.PWD.name(), "user_set_PWD");
|
||||||
|
userSetEnv.put(Environment.HOME.name(), "user_set_HOME");
|
||||||
|
userSetEnv.put(Environment.CLASSPATH.name(), "APATH");
|
||||||
|
when(clc.getEnvironment()).thenReturn(userSetEnv);
|
||||||
|
when(container.getLaunchContext()).thenReturn(clc);
|
||||||
|
|
||||||
|
when(container.getLocalizedResources())
|
||||||
|
.thenReturn(Collections.<Path, List<String>> emptyMap());
|
||||||
|
Dispatcher dispatcher = mock(Dispatcher.class);
|
||||||
|
|
||||||
|
@SuppressWarnings("rawtypes")
|
||||||
|
ContainerExitHandler eventHandler =
|
||||||
|
new ContainerExitHandler(testForMultipleErrFiles);
|
||||||
|
when(dispatcher.getEventHandler()).thenReturn(eventHandler);
|
||||||
|
|
||||||
|
Application app = mock(Application.class);
|
||||||
|
when(app.getAppId()).thenReturn(appId);
|
||||||
|
when(app.getUser()).thenReturn("test");
|
||||||
|
|
||||||
|
Credentials creds = mock(Credentials.class);
|
||||||
|
when(container.getCredentials()).thenReturn(creds);
|
||||||
|
|
||||||
|
((NMContext) context).setNodeId(NodeId.newInstance("127.0.0.1", HTTP_PORT));
|
||||||
|
|
||||||
|
ContainerLaunch launch = new ContainerLaunch(context, conf, dispatcher,
|
||||||
|
exec, app, container, dirsHandler, containerManager);
|
||||||
|
launch.call();
|
||||||
|
Assert.assertTrue("ContainerExitEvent should have occured",
|
||||||
|
eventHandler.isContainerExitEventOccured());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class ContainerExitHandler
|
||||||
|
implements EventHandler<ContainerEvent> {
|
||||||
|
private boolean testForMultiFile;
|
||||||
|
|
||||||
|
ContainerExitHandler(boolean testForMultiFile) {
|
||||||
|
this.testForMultiFile = testForMultiFile;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean containerExitEventOccured = false;
|
||||||
|
|
||||||
|
public boolean isContainerExitEventOccured() {
|
||||||
|
return containerExitEventOccured;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void handle(ContainerEvent event) {
|
||||||
|
if (event instanceof ContainerExitEvent) {
|
||||||
|
containerExitEventOccured = true;
|
||||||
|
ContainerExitEvent exitEvent = (ContainerExitEvent) event;
|
||||||
|
Assert.assertEquals(ContainerEventType.CONTAINER_EXITED_WITH_FAILURE,
|
||||||
|
exitEvent.getType());
|
||||||
|
LOG.info("Diagnostic Info : " + exitEvent.getDiagnosticInfo());
|
||||||
|
if (testForMultiFile) {
|
||||||
|
Assert.assertTrue("Should contain the Multi file information",
|
||||||
|
exitEvent.getDiagnosticInfo().contains("Error files: "));
|
||||||
|
}
|
||||||
|
Assert.assertTrue(
|
||||||
|
"Should contain the error Log message with tail size info",
|
||||||
|
exitEvent.getDiagnosticInfo()
|
||||||
|
.contains("Last "
|
||||||
|
+ YarnConfiguration.DEFAULT_NM_CONTAINER_STDERR_BYTES
|
||||||
|
+ " bytes of"));
|
||||||
|
Assert.assertTrue("Should contain contents of error Log",
|
||||||
|
exitEvent.getDiagnosticInfo().contains(
|
||||||
|
INVALID_JAVA_HOME + "/bin/java: No such file or directory"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static List<String> getJarManifestClasspath(String path)
|
private static List<String> getJarManifestClasspath(String path)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
List<String> classpath = new ArrayList<String>();
|
List<String> classpath = new ArrayList<String>();
|
||||||
|
|
Loading…
Reference in New Issue