MAPREDUCE-4300. OOM in AM can turn it into a zombie. (Robert Evans via tgraves)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1359399 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
820be7cbef
commit
11782dd3a5
|
@ -664,6 +664,9 @@ Release 0.23.3 - UNRELEASED
|
||||||
MAPREDUCE-4402. TestFileInputFormat fails intermittently (Jason Lowe via
|
MAPREDUCE-4402. TestFileInputFormat fails intermittently (Jason Lowe via
|
||||||
bobby)
|
bobby)
|
||||||
|
|
||||||
|
MAPREDUCE-4300. OOM in AM can turn it into a zombie. (Robert Evans via
|
||||||
|
tgraves)
|
||||||
|
|
||||||
Release 0.23.2 - UNRELEASED
|
Release 0.23.2 - UNRELEASED
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -58,6 +58,7 @@ import org.apache.hadoop.security.token.Token;
|
||||||
import org.apache.hadoop.security.token.TokenIdentifier;
|
import org.apache.hadoop.security.token.TokenIdentifier;
|
||||||
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
|
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
|
||||||
import org.apache.hadoop.util.StringUtils;
|
import org.apache.hadoop.util.StringUtils;
|
||||||
|
import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler;
|
||||||
import org.apache.hadoop.yarn.api.ApplicationConstants;
|
import org.apache.hadoop.yarn.api.ApplicationConstants;
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
|
|
||||||
|
@ -71,6 +72,7 @@ class YarnChild {
|
||||||
static volatile TaskAttemptID taskid = null;
|
static volatile TaskAttemptID taskid = null;
|
||||||
|
|
||||||
public static void main(String[] args) throws Throwable {
|
public static void main(String[] args) throws Throwable {
|
||||||
|
Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());
|
||||||
LOG.debug("Child starting");
|
LOG.debug("Child starting");
|
||||||
|
|
||||||
final JobConf defaultConf = new JobConf();
|
final JobConf defaultConf = new JobConf();
|
||||||
|
|
|
@ -95,6 +95,7 @@ import org.apache.hadoop.yarn.Clock;
|
||||||
import org.apache.hadoop.yarn.ClusterInfo;
|
import org.apache.hadoop.yarn.ClusterInfo;
|
||||||
import org.apache.hadoop.yarn.SystemClock;
|
import org.apache.hadoop.yarn.SystemClock;
|
||||||
import org.apache.hadoop.yarn.YarnException;
|
import org.apache.hadoop.yarn.YarnException;
|
||||||
|
import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler;
|
||||||
import org.apache.hadoop.yarn.api.ApplicationConstants;
|
import org.apache.hadoop.yarn.api.ApplicationConstants;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
@ -969,6 +970,7 @@ public class MRAppMaster extends CompositeService {
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
try {
|
try {
|
||||||
|
Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());
|
||||||
String containerIdStr =
|
String containerIdStr =
|
||||||
System.getenv(ApplicationConstants.AM_CONTAINER_ID_ENV);
|
System.getenv(ApplicationConstants.AM_CONTAINER_ID_ENV);
|
||||||
String nodeHostString = System.getenv(ApplicationConstants.NM_HOST_ENV);
|
String nodeHostString = System.getenv(ApplicationConstants.NM_HOST_ENV);
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.hadoop.security.SecurityUtil;
|
||||||
import org.apache.hadoop.util.ShutdownHookManager;
|
import org.apache.hadoop.util.ShutdownHookManager;
|
||||||
import org.apache.hadoop.util.StringUtils;
|
import org.apache.hadoop.util.StringUtils;
|
||||||
import org.apache.hadoop.yarn.YarnException;
|
import org.apache.hadoop.yarn.YarnException;
|
||||||
|
import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.event.Dispatcher;
|
import org.apache.hadoop.yarn.event.Dispatcher;
|
||||||
import org.apache.hadoop.yarn.service.CompositeService;
|
import org.apache.hadoop.yarn.service.CompositeService;
|
||||||
|
@ -122,6 +123,7 @@ public class JobHistoryServer extends CompositeService {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
|
Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());
|
||||||
StringUtils.startupShutdownMessage(JobHistoryServer.class, args, LOG);
|
StringUtils.startupShutdownMessage(JobHistoryServer.class, args, LOG);
|
||||||
try {
|
try {
|
||||||
JobHistoryServer jobHistoryServer = new JobHistoryServer();
|
JobHistoryServer jobHistoryServer = new JobHistoryServer();
|
||||||
|
|
|
@ -187,6 +187,11 @@
|
||||||
<Class name="org.apache.hadoop.yarn.server.resourcemanager.security.DelegationTokenRenewer$DelegationTokenCancelThread" />
|
<Class name="org.apache.hadoop.yarn.server.resourcemanager.security.DelegationTokenRenewer$DelegationTokenCancelThread" />
|
||||||
<Bug pattern="DM_EXIT" />
|
<Bug pattern="DM_EXIT" />
|
||||||
</Match>
|
</Match>
|
||||||
|
<Match>
|
||||||
|
<Class name="org.apache.hadoop.yarn.YarnUncaughtExceptionHandler"/>
|
||||||
|
<Bug pattern="DM_EXIT"/>
|
||||||
|
</Match>
|
||||||
|
|
||||||
<!-- AsyncDispatcher will kill the process if there is an error dispatching -->
|
<!-- AsyncDispatcher will kill the process if there is an error dispatching -->
|
||||||
<Match>
|
<Match>
|
||||||
<Class name="org.apache.hadoop.yarn.event.AsyncDispatcher" />
|
<Class name="org.apache.hadoop.yarn.event.AsyncDispatcher" />
|
||||||
|
|
|
@ -0,0 +1,66 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn;
|
||||||
|
|
||||||
|
import java.lang.Thread.UncaughtExceptionHandler;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.util.ShutdownHookManager;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class is intended to be installed by calling
|
||||||
|
* {@link Thread#setDefaultUncaughtExceptionHandler(UncaughtExceptionHandler)}
|
||||||
|
* In the main entry point. It is intended to try and cleanly shut down
|
||||||
|
* programs using the Yarn Event framework.
|
||||||
|
*
|
||||||
|
* Note: Right now it only will shut down the program if a Error is caught, but
|
||||||
|
* not any other exception. Anything else is just logged.
|
||||||
|
*/
|
||||||
|
public class YarnUncaughtExceptionHandler implements UncaughtExceptionHandler {
|
||||||
|
private static final Log LOG = LogFactory.getLog(YarnUncaughtExceptionHandler.class);
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void uncaughtException(Thread t, Throwable e) {
|
||||||
|
if(ShutdownHookManager.get().isShutdownInProgress()) {
|
||||||
|
LOG.error("Thread " + t + " threw an Throwable, but we are shutting " +
|
||||||
|
"down, so ignoring this", e);
|
||||||
|
} else if(e instanceof Error) {
|
||||||
|
try {
|
||||||
|
LOG.fatal("Thread " + t + " threw an Error. Shutting down now...", e);
|
||||||
|
} catch (Throwable err) {
|
||||||
|
//We don't want to not exit because of an issue with logging
|
||||||
|
}
|
||||||
|
if(e instanceof OutOfMemoryError) {
|
||||||
|
//After catching an OOM java says it is undefined behavior, so don't
|
||||||
|
//even try to clean up or we can get stuck on shutdown.
|
||||||
|
try {
|
||||||
|
System.err.println("Halting due to Out Of Memory Error...");
|
||||||
|
} catch (Throwable err) {
|
||||||
|
//Again we done want to exit because of logging issues.
|
||||||
|
}
|
||||||
|
Runtime.getRuntime().halt(-1);
|
||||||
|
} else {
|
||||||
|
System.exit(-1);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
LOG.error("Thread " + t + " threw an Exception.", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -33,6 +33,7 @@ import org.apache.hadoop.util.ReflectionUtils;
|
||||||
import org.apache.hadoop.util.ShutdownHookManager;
|
import org.apache.hadoop.util.ShutdownHookManager;
|
||||||
import org.apache.hadoop.util.StringUtils;
|
import org.apache.hadoop.util.StringUtils;
|
||||||
import org.apache.hadoop.yarn.YarnException;
|
import org.apache.hadoop.yarn.YarnException;
|
||||||
|
import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeHealthStatus;
|
import org.apache.hadoop.yarn.api.records.NodeHealthStatus;
|
||||||
|
@ -279,6 +280,7 @@ public class NodeManager extends CompositeService implements
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
|
Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());
|
||||||
StringUtils.startupShutdownMessage(NodeManager.class, args, LOG);
|
StringUtils.startupShutdownMessage(NodeManager.class, args, LOG);
|
||||||
NodeManager nodeManager = new NodeManager();
|
NodeManager nodeManager = new NodeManager();
|
||||||
nodeManager.initAndStartNodeManager(false);
|
nodeManager.initAndStartNodeManager(false);
|
||||||
|
|
|
@ -50,6 +50,7 @@ import org.apache.hadoop.security.Credentials;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
import org.apache.hadoop.security.token.Token;
|
import org.apache.hadoop.security.token.Token;
|
||||||
import org.apache.hadoop.security.token.TokenIdentifier;
|
import org.apache.hadoop.security.token.TokenIdentifier;
|
||||||
|
import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler;
|
||||||
import org.apache.hadoop.yarn.api.records.LocalResource;
|
import org.apache.hadoop.yarn.api.records.LocalResource;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
|
import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
|
||||||
import org.apache.hadoop.yarn.factories.RecordFactory;
|
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||||
|
@ -315,6 +316,7 @@ public class ContainerLocalizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void main(String[] argv) throws Throwable {
|
public static void main(String[] argv) throws Throwable {
|
||||||
|
Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());
|
||||||
// usage: $0 user appId locId host port app_log_dir user_dir [user_dir]*
|
// usage: $0 user appId locId host port app_log_dir user_dir [user_dir]*
|
||||||
// let $x = $x/usercache for $local.dir
|
// let $x = $x/usercache for $local.dir
|
||||||
// MKDIR $x/$user/appcache/$appid
|
// MKDIR $x/$user/appcache/$appid
|
||||||
|
|
|
@ -34,6 +34,7 @@ import org.apache.hadoop.util.ReflectionUtils;
|
||||||
import org.apache.hadoop.util.ShutdownHookManager;
|
import org.apache.hadoop.util.ShutdownHookManager;
|
||||||
import org.apache.hadoop.util.StringUtils;
|
import org.apache.hadoop.util.StringUtils;
|
||||||
import org.apache.hadoop.yarn.YarnException;
|
import org.apache.hadoop.yarn.YarnException;
|
||||||
|
import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
|
@ -622,6 +623,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void main(String argv[]) {
|
public static void main(String argv[]) {
|
||||||
|
Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());
|
||||||
StringUtils.startupShutdownMessage(ResourceManager.class, argv, LOG);
|
StringUtils.startupShutdownMessage(ResourceManager.class, argv, LOG);
|
||||||
try {
|
try {
|
||||||
Configuration conf = new YarnConfiguration();
|
Configuration conf = new YarnConfiguration();
|
||||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.hadoop.security.SecurityUtil;
|
||||||
import org.apache.hadoop.util.ShutdownHookManager;
|
import org.apache.hadoop.util.ShutdownHookManager;
|
||||||
import org.apache.hadoop.util.StringUtils;
|
import org.apache.hadoop.util.StringUtils;
|
||||||
import org.apache.hadoop.yarn.YarnException;
|
import org.apache.hadoop.yarn.YarnException;
|
||||||
|
import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.service.CompositeService;
|
import org.apache.hadoop.yarn.service.CompositeService;
|
||||||
|
|
||||||
|
@ -73,6 +74,7 @@ public class WebAppProxyServer extends CompositeService {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
|
Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());
|
||||||
StringUtils.startupShutdownMessage(WebAppProxyServer.class, args, LOG);
|
StringUtils.startupShutdownMessage(WebAppProxyServer.class, args, LOG);
|
||||||
try {
|
try {
|
||||||
WebAppProxyServer proxy = new WebAppProxyServer();
|
WebAppProxyServer proxy = new WebAppProxyServer();
|
||||||
|
|
Loading…
Reference in New Issue