MAPREDUCE-6002. Made MR task avoid reporting error to AM when the task process is shutting down. Contributed by Wangda Tan.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1613743 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Zhijie Shen 2014-07-27 01:37:51 +00:00
parent 9bce3eca42
commit 549bcc2c02
4 changed files with 36 additions and 13 deletions

View File

@ -325,6 +325,9 @@ Release 2.5.0 - UNRELEASED
MAPREDUCE-5952. LocalContainerLauncher#renameMapOutputForReduce incorrectly MAPREDUCE-5952. LocalContainerLauncher#renameMapOutputForReduce incorrectly
assumes a single dir for mapOutIndex. (Gera Shegalov via kasha) assumes a single dir for mapOutIndex. (Gera Shegalov via kasha)
MAPREDUCE-6002. Made MR task avoid reporting error to AM when the task process
is shutting down. (Wangda Tan via zjshen)
Release 2.4.1 - 2014-06-23 Release 2.4.1 - 2014-06-23
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -31,6 +31,7 @@ import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.LinkedBlockingQueue;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSError; import org.apache.hadoop.fs.FSError;
@ -57,6 +58,7 @@ import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent;
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerRemoteLaunchEvent; import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerRemoteLaunchEvent;
import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.ShutdownHookManager;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment; import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
@ -406,7 +408,9 @@ public class LocalContainerLauncher extends AbstractService implements
} catch (FSError e) { } catch (FSError e) {
LOG.fatal("FSError from child", e); LOG.fatal("FSError from child", e);
// umbilical: MRAppMaster creates (taskAttemptListener), passes to us // umbilical: MRAppMaster creates (taskAttemptListener), passes to us
umbilical.fsError(classicAttemptID, e.getMessage()); if (!ShutdownHookManager.get().isShutdownInProgress()) {
umbilical.fsError(classicAttemptID, e.getMessage());
}
throw new RuntimeException(); throw new RuntimeException();
} catch (Exception exception) { } catch (Exception exception) {
@ -429,11 +433,13 @@ public class LocalContainerLauncher extends AbstractService implements
} catch (Throwable throwable) { } catch (Throwable throwable) {
LOG.fatal("Error running local (uberized) 'child' : " LOG.fatal("Error running local (uberized) 'child' : "
+ StringUtils.stringifyException(throwable)); + StringUtils.stringifyException(throwable));
Throwable tCause = throwable.getCause(); if (!ShutdownHookManager.get().isShutdownInProgress()) {
String cause = (tCause == null) Throwable tCause = throwable.getCause();
? throwable.getMessage() String cause =
: StringUtils.stringifyException(tCause); (tCause == null) ? throwable.getMessage() : StringUtils
umbilical.fatalError(classicAttemptID, cause); .stringifyException(tCause);
umbilical.fatalError(classicAttemptID, cause);
}
throw new RuntimeException(); throw new RuntimeException();
} }
} }

View File

@ -56,6 +56,7 @@ import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.DiskChecker.DiskErrorException; import org.apache.hadoop.util.DiskChecker.DiskErrorException;
import org.apache.hadoop.util.ShutdownHookManager;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler; import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler;
import org.apache.hadoop.yarn.api.ApplicationConstants; import org.apache.hadoop.yarn.api.ApplicationConstants;
@ -176,7 +177,9 @@ class YarnChild {
}); });
} catch (FSError e) { } catch (FSError e) {
LOG.fatal("FSError from child", e); LOG.fatal("FSError from child", e);
umbilical.fsError(taskid, e.getMessage()); if (!ShutdownHookManager.get().isShutdownInProgress()) {
umbilical.fsError(taskid, e.getMessage());
}
} catch (Exception exception) { } catch (Exception exception) {
LOG.warn("Exception running child : " LOG.warn("Exception running child : "
+ StringUtils.stringifyException(exception)); + StringUtils.stringifyException(exception));
@ -201,17 +204,22 @@ class YarnChild {
} }
// Report back any failures, for diagnostic purposes // Report back any failures, for diagnostic purposes
if (taskid != null) { if (taskid != null) {
umbilical.fatalError(taskid, StringUtils.stringifyException(exception)); if (!ShutdownHookManager.get().isShutdownInProgress()) {
umbilical.fatalError(taskid,
StringUtils.stringifyException(exception));
}
} }
} catch (Throwable throwable) { } catch (Throwable throwable) {
LOG.fatal("Error running child : " LOG.fatal("Error running child : "
+ StringUtils.stringifyException(throwable)); + StringUtils.stringifyException(throwable));
if (taskid != null) { if (taskid != null) {
Throwable tCause = throwable.getCause(); if (!ShutdownHookManager.get().isShutdownInProgress()) {
String cause = tCause == null Throwable tCause = throwable.getCause();
? throwable.getMessage() String cause =
: StringUtils.stringifyException(tCause); tCause == null ? throwable.getMessage() : StringUtils
umbilical.fatalError(taskid, cause); .stringifyException(tCause);
umbilical.fatalError(taskid, cause);
}
} }
} finally { } finally {
RPC.stopProxy(umbilical); RPC.stopProxy(umbilical);

View File

@ -66,6 +66,7 @@ import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.util.Progress; import org.apache.hadoop.util.Progress;
import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.ShutdownHookManager;
import org.apache.hadoop.util.StringInterner; import org.apache.hadoop.util.StringInterner;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
@ -322,6 +323,11 @@ abstract public class Task implements Writable, Configurable {
protected void reportFatalError(TaskAttemptID id, Throwable throwable, protected void reportFatalError(TaskAttemptID id, Throwable throwable,
String logMsg) { String logMsg) {
LOG.fatal(logMsg); LOG.fatal(logMsg);
if (ShutdownHookManager.get().isShutdownInProgress()) {
return;
}
Throwable tCause = throwable.getCause(); Throwable tCause = throwable.getCause();
String cause = tCause == null String cause = tCause == null
? StringUtils.stringifyException(throwable) ? StringUtils.stringifyException(throwable)