From 549bcc2c02983086ee6694982d5f3503f5f4517f Mon Sep 17 00:00:00 2001 From: Zhijie Shen Date: Sun, 27 Jul 2014 01:37:51 +0000 Subject: [PATCH] MAPREDUCE-6002. Made MR task avoid reporting error to AM when the task process is shutting down. Contributed by Wangda Tan. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1613743 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../hadoop/mapred/LocalContainerLauncher.java | 18 ++++++++++----- .../org/apache/hadoop/mapred/YarnChild.java | 22 +++++++++++++------ .../java/org/apache/hadoop/mapred/Task.java | 6 +++++ 4 files changed, 36 insertions(+), 13 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index c8a83bf64e5..5760cef3060 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -325,6 +325,9 @@ Release 2.5.0 - UNRELEASED MAPREDUCE-5952. LocalContainerLauncher#renameMapOutputForReduce incorrectly assumes a single dir for mapOutIndex. (Gera Shegalov via kasha) + MAPREDUCE-6002. Made MR task avoid reporting error to AM when the task process + is shutting down. (Wangda Tan via zjshen) + Release 2.4.1 - 2014-06-23 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/LocalContainerLauncher.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/LocalContainerLauncher.java index c7898ed966f..218ac835d27 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/LocalContainerLauncher.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/LocalContainerLauncher.java @@ -31,6 +31,7 @@ import java.util.concurrent.LinkedBlockingQueue; import com.google.common.annotations.VisibleForTesting; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FSError; @@ -57,6 +58,7 @@ import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerRemoteLaunchEvent; import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.util.ExitUtil; +import org.apache.hadoop.util.ShutdownHookManager; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.ApplicationConstants.Environment; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; @@ -406,7 +408,9 @@ private void runSubtask(org.apache.hadoop.mapred.Task task, } catch (FSError e) { LOG.fatal("FSError from child", e); // umbilical: MRAppMaster creates (taskAttemptListener), passes to us - umbilical.fsError(classicAttemptID, e.getMessage()); + if (!ShutdownHookManager.get().isShutdownInProgress()) { + umbilical.fsError(classicAttemptID, e.getMessage()); + } throw new RuntimeException(); } catch (Exception exception) { @@ -429,11 +433,13 @@ private void runSubtask(org.apache.hadoop.mapred.Task task, } catch (Throwable throwable) { LOG.fatal("Error running local (uberized) 'child' : " + StringUtils.stringifyException(throwable)); - Throwable tCause = throwable.getCause(); - String cause = (tCause == null) - ? throwable.getMessage() - : StringUtils.stringifyException(tCause); - umbilical.fatalError(classicAttemptID, cause); + if (!ShutdownHookManager.get().isShutdownInProgress()) { + Throwable tCause = throwable.getCause(); + String cause = + (tCause == null) ? throwable.getMessage() : StringUtils + .stringifyException(tCause); + umbilical.fatalError(classicAttemptID, cause); + } throw new RuntimeException(); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java index 9212bfd154a..4ba1991ed9b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java @@ -56,6 +56,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.DiskChecker.DiskErrorException; +import org.apache.hadoop.util.ShutdownHookManager; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler; import org.apache.hadoop.yarn.api.ApplicationConstants; @@ -176,7 +177,9 @@ public Object run() throws Exception { }); } catch (FSError e) { LOG.fatal("FSError from child", e); - umbilical.fsError(taskid, e.getMessage()); + if (!ShutdownHookManager.get().isShutdownInProgress()) { + umbilical.fsError(taskid, e.getMessage()); + } } catch (Exception exception) { LOG.warn("Exception running child : " + StringUtils.stringifyException(exception)); @@ -201,17 +204,22 @@ public Object run() throws Exception { } // Report back any failures, for diagnostic purposes if (taskid != null) { - umbilical.fatalError(taskid, StringUtils.stringifyException(exception)); + if (!ShutdownHookManager.get().isShutdownInProgress()) { + umbilical.fatalError(taskid, + StringUtils.stringifyException(exception)); + } } } catch (Throwable throwable) { LOG.fatal("Error running child : " + StringUtils.stringifyException(throwable)); if (taskid != null) { - Throwable tCause = throwable.getCause(); - String cause = tCause == null - ? throwable.getMessage() - : StringUtils.stringifyException(tCause); - umbilical.fatalError(taskid, cause); + if (!ShutdownHookManager.get().isShutdownInProgress()) { + Throwable tCause = throwable.getCause(); + String cause = + tCause == null ? throwable.getMessage() : StringUtils + .stringifyException(tCause); + umbilical.fatalError(taskid, cause); + } } } finally { RPC.stopProxy(umbilical); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java index 4815f191f7e..3a4c513f3f1 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java @@ -66,6 +66,7 @@ import org.apache.hadoop.util.Progress; import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.util.ShutdownHookManager; import org.apache.hadoop.util.StringInterner; import org.apache.hadoop.util.StringUtils; @@ -322,6 +323,11 @@ protected void setWriteSkipRecs(boolean writeSkipRecs) { protected void reportFatalError(TaskAttemptID id, Throwable throwable, String logMsg) { LOG.fatal(logMsg); + + if (ShutdownHookManager.get().isShutdownInProgress()) { + return; + } + Throwable tCause = throwable.getCause(); String cause = tCause == null ? StringUtils.stringifyException(throwable)