MAPREDUCE-6002. Made MR task avoid reporting error to AM when the task process is shutting down. Contributed by Wangda Tan.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1613743 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9bce3eca42
commit
549bcc2c02
|
@ -325,6 +325,9 @@ Release 2.5.0 - UNRELEASED
|
|||
MAPREDUCE-5952. LocalContainerLauncher#renameMapOutputForReduce incorrectly
|
||||
assumes a single dir for mapOutIndex. (Gera Shegalov via kasha)
|
||||
|
||||
MAPREDUCE-6002. Made MR task avoid reporting error to AM when the task process
|
||||
is shutting down. (Wangda Tan via zjshen)
|
||||
|
||||
Release 2.4.1 - 2014-06-23
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -31,6 +31,7 @@ import java.util.concurrent.Future;
|
|||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.fs.FSError;
|
||||
|
@ -57,6 +58,7 @@ import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent;
|
|||
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerRemoteLaunchEvent;
|
||||
import org.apache.hadoop.service.AbstractService;
|
||||
import org.apache.hadoop.util.ExitUtil;
|
||||
import org.apache.hadoop.util.ShutdownHookManager;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||
|
@ -406,7 +408,9 @@ public class LocalContainerLauncher extends AbstractService implements
|
|||
} catch (FSError e) {
|
||||
LOG.fatal("FSError from child", e);
|
||||
// umbilical: MRAppMaster creates (taskAttemptListener), passes to us
|
||||
umbilical.fsError(classicAttemptID, e.getMessage());
|
||||
if (!ShutdownHookManager.get().isShutdownInProgress()) {
|
||||
umbilical.fsError(classicAttemptID, e.getMessage());
|
||||
}
|
||||
throw new RuntimeException();
|
||||
|
||||
} catch (Exception exception) {
|
||||
|
@ -429,11 +433,13 @@ public class LocalContainerLauncher extends AbstractService implements
|
|||
} catch (Throwable throwable) {
|
||||
LOG.fatal("Error running local (uberized) 'child' : "
|
||||
+ StringUtils.stringifyException(throwable));
|
||||
Throwable tCause = throwable.getCause();
|
||||
String cause = (tCause == null)
|
||||
? throwable.getMessage()
|
||||
: StringUtils.stringifyException(tCause);
|
||||
umbilical.fatalError(classicAttemptID, cause);
|
||||
if (!ShutdownHookManager.get().isShutdownInProgress()) {
|
||||
Throwable tCause = throwable.getCause();
|
||||
String cause =
|
||||
(tCause == null) ? throwable.getMessage() : StringUtils
|
||||
.stringifyException(tCause);
|
||||
umbilical.fatalError(classicAttemptID, cause);
|
||||
}
|
||||
throw new RuntimeException();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -56,6 +56,7 @@ import org.apache.hadoop.security.SecurityUtil;
|
|||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hadoop.security.token.Token;
|
||||
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
|
||||
import org.apache.hadoop.util.ShutdownHookManager;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler;
|
||||
import org.apache.hadoop.yarn.api.ApplicationConstants;
|
||||
|
@ -176,7 +177,9 @@ class YarnChild {
|
|||
});
|
||||
} catch (FSError e) {
|
||||
LOG.fatal("FSError from child", e);
|
||||
umbilical.fsError(taskid, e.getMessage());
|
||||
if (!ShutdownHookManager.get().isShutdownInProgress()) {
|
||||
umbilical.fsError(taskid, e.getMessage());
|
||||
}
|
||||
} catch (Exception exception) {
|
||||
LOG.warn("Exception running child : "
|
||||
+ StringUtils.stringifyException(exception));
|
||||
|
@ -201,17 +204,22 @@ class YarnChild {
|
|||
}
|
||||
// Report back any failures, for diagnostic purposes
|
||||
if (taskid != null) {
|
||||
umbilical.fatalError(taskid, StringUtils.stringifyException(exception));
|
||||
if (!ShutdownHookManager.get().isShutdownInProgress()) {
|
||||
umbilical.fatalError(taskid,
|
||||
StringUtils.stringifyException(exception));
|
||||
}
|
||||
}
|
||||
} catch (Throwable throwable) {
|
||||
LOG.fatal("Error running child : "
|
||||
+ StringUtils.stringifyException(throwable));
|
||||
if (taskid != null) {
|
||||
Throwable tCause = throwable.getCause();
|
||||
String cause = tCause == null
|
||||
? throwable.getMessage()
|
||||
: StringUtils.stringifyException(tCause);
|
||||
umbilical.fatalError(taskid, cause);
|
||||
if (!ShutdownHookManager.get().isShutdownInProgress()) {
|
||||
Throwable tCause = throwable.getCause();
|
||||
String cause =
|
||||
tCause == null ? throwable.getMessage() : StringUtils
|
||||
.stringifyException(tCause);
|
||||
umbilical.fatalError(taskid, cause);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
RPC.stopProxy(umbilical);
|
||||
|
|
|
@ -66,6 +66,7 @@ import org.apache.hadoop.net.NetUtils;
|
|||
import org.apache.hadoop.util.Progress;
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
import org.apache.hadoop.util.ReflectionUtils;
|
||||
import org.apache.hadoop.util.ShutdownHookManager;
|
||||
import org.apache.hadoop.util.StringInterner;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
|
||||
|
@ -322,6 +323,11 @@ abstract public class Task implements Writable, Configurable {
|
|||
protected void reportFatalError(TaskAttemptID id, Throwable throwable,
|
||||
String logMsg) {
|
||||
LOG.fatal(logMsg);
|
||||
|
||||
if (ShutdownHookManager.get().isShutdownInProgress()) {
|
||||
return;
|
||||
}
|
||||
|
||||
Throwable tCause = throwable.getCause();
|
||||
String cause = tCause == null
|
||||
? StringUtils.stringifyException(throwable)
|
||||
|
|
Loading…
Reference in New Issue