From 752caa95a40d899e1bf98bc907e91aec2bb57073 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Wed, 3 Jun 2015 19:44:07 +0000 Subject: [PATCH] YARN-3585. NodeManager cannot exit on SHUTDOWN event triggered and NM recovery is enabled. Contributed by Rohith Sharmaks (cherry picked from commit e13b671aa510f553f4a6a232b4694b6a4cce88ae) --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../yarn/server/nodemanager/NodeManager.java | 17 +++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index ee3a197475c..d2fba364f37 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -571,6 +571,9 @@ Release 2.7.1 - UNRELEASED YARN-3725. App submission via REST API is broken in secure mode due to Timeline DT service address is empty. (Zhijie Shen via wangda) + YARN-3585. NodeManager cannot exit on SHUTDOWN event triggered and NM + recovery is enabled (Rohith Sharmaks via jlowe) + Release 2.7.0 - 2015-04-20 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index 03e17c83a79..185ba126a4d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -38,6 +38,7 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.service.CompositeService; +import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.util.NodeHealthScriptRunner; import org.apache.hadoop.util.ReflectionUtils; @@ -94,6 +95,7 @@ public class NodeManager extends CompositeService private AtomicBoolean isStopping = new AtomicBoolean(false); private boolean rmWorkPreservingRestartEnabled; + private boolean shouldExitOnShutdownEvent = false; public NodeManager() { super(NodeManager.class.getName()); @@ -344,7 +346,16 @@ public class NodeManager extends CompositeService new Thread() { @Override public void run() { - NodeManager.this.stop(); + try { + NodeManager.this.stop(); + } catch (Throwable t) { + LOG.error("Error while shutting down NodeManager", t); + } finally { + if (shouldExitOnShutdownEvent + && !ShutdownHookManager.get().isShutdownInProgress()) { + ExitUtil.terminate(-1); + } + } } }.start(); } @@ -530,7 +541,9 @@ public class NodeManager extends CompositeService nodeManagerShutdownHook = new CompositeServiceShutdownHook(this); ShutdownHookManager.get().addShutdownHook(nodeManagerShutdownHook, SHUTDOWN_HOOK_PRIORITY); - + // System exit should be called only when NodeManager is instantiated from + // main() funtion + this.shouldExitOnShutdownEvent = true; this.init(conf); this.start(); } catch (Throwable t) {