From 193d8d3667c1a6bfe024e9d02fdabc0d7638e7f7 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Wed, 3 Jun 2015 19:44:07 +0000 Subject: [PATCH] YARN-3585. NodeManager cannot exit on SHUTDOWN event triggered and NM recovery is enabled. Contributed by Rohith Sharmaks (cherry picked from commit e13b671aa510f553f4a6a232b4694b6a4cce88ae) (cherry picked from commit 752caa95a40d899e1bf98bc907e91aec2bb57073) (cherry picked from commit 13c4db632b0e7f19dcfa883c2492431c2c7d0799) --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../yarn/server/nodemanager/NodeManager.java | 17 +++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 4ce9f9fafc4..6b46619779d 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -153,6 +153,9 @@ Release 2.6.1 - UNRELEASED YARN-3725. App submission via REST API is broken in secure mode due to Timeline DT service address is empty. (Zhijie Shen via wangda) + YARN-3585. NodeManager cannot exit on SHUTDOWN event triggered and NM + recovery is enabled (Rohith Sharmaks via jlowe) + Release 2.6.0 - 2014-11-18 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index 063ae87f0ea..0ec86d03ec4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -37,6 +37,7 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.service.CompositeService; +import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.ShutdownHookManager; import org.apache.hadoop.util.StringUtils; @@ -88,6 +89,7 @@ public class NodeManager extends CompositeService private AtomicBoolean isStopping = new AtomicBoolean(false); private boolean rmWorkPreservingRestartEnabled; + private boolean shouldExitOnShutdownEvent = false; public NodeManager() { super(NodeManager.class.getName()); @@ -287,7 +289,16 @@ public class NodeManager extends CompositeService new Thread() { @Override public void run() { - NodeManager.this.stop(); + try { + NodeManager.this.stop(); + } catch (Throwable t) { + LOG.error("Error while shutting down NodeManager", t); + } finally { + if (shouldExitOnShutdownEvent + && !ShutdownHookManager.get().isShutdownInProgress()) { + ExitUtil.terminate(-1); + } + } } }.start(); } @@ -463,7 +474,9 @@ public class NodeManager extends CompositeService nodeManagerShutdownHook = new CompositeServiceShutdownHook(this); ShutdownHookManager.get().addShutdownHook(nodeManagerShutdownHook, SHUTDOWN_HOOK_PRIORITY); - + // System exit should be called only when NodeManager is instantiated from + // main() funtion + this.shouldExitOnShutdownEvent = true; this.init(conf); this.start(); } catch (Throwable t) {