From a703952d3916d33b8524236e6c6372ae4ec959c5 Mon Sep 17 00:00:00 2001 From: Jian He Date: Mon, 9 Feb 2015 13:47:08 -0800 Subject: [PATCH] YARN-3094. Reset timer for liveness monitors after RM recovery. Contributed by Jun Gong (cherry picked from commit 0af6a99a3fcfa4b47d3bcba5e5cc5fe7b312a152) (cherry picked from commit 61466809552f96a83aa19446d4d59cecd0d2cad5) (cherry picked from commit ab654746fbad2da12b24b13425dc9bf17c46b50c) --- hadoop-yarn-project/CHANGES.txt | 3 + .../yarn/util/AbstractLivelinessMonitor.java | 8 ++ .../resourcemanager/ResourceManager.java | 2 + .../rmapp/attempt/AMLivelinessMonitor.java | 6 ++ .../attempt/TestAMLivelinessMonitor.java | 81 +++++++++++++++++++ 5 files changed, 100 insertions(+) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestAMLivelinessMonitor.java diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index d23fefae5f9..915f4763bfc 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -72,6 +72,9 @@ Release 2.6.1 - UNRELEASED YARN-3103. AMRMClientImpl does not update AMRM token properly. (Jason Lowe via jianhe) + YARN-3094. Reset timer for liveness monitors after RM recovery. (Jun Gong + via jianhe) + Release 2.6.0 - 2014-11-18 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AbstractLivelinessMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AbstractLivelinessMonitor.java index c1825319a73..4f587b348cf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AbstractLivelinessMonitor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AbstractLivelinessMonitor.java @@ -59,6 +59,7 @@ public abstract class AbstractLivelinessMonitor extends AbstractService { @Override protected void serviceStart() throws Exception { assert !stopped : "starting when already stopped"; + resetTimer(); checkerThread = new Thread(new PingChecker()); checkerThread.setName("Ping Checker"); checkerThread.start(); @@ -99,6 +100,13 @@ public abstract class AbstractLivelinessMonitor extends AbstractService { running.remove(ob); } + public synchronized void resetTimer() { + long time = clock.getTime(); + for (O ob : running.keySet()) { + running.put(ob, time); + } + } + private class PingChecker implements Runnable { @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java index 3ce42a36b17..ea762c0d0ed 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java @@ -571,12 +571,14 @@ public class ResourceManager extends CompositeService implements Recoverable { if(recoveryEnabled) { try { + LOG.info("Recovery started"); rmStore.checkVersion(); if (rmContext.isWorkPreservingRecoveryEnabled()) { rmContext.setEpoch(rmStore.getAndIncrementEpoch()); } RMState state = rmStore.loadState(); recover(state); + LOG.info("Recovery ended"); } catch (Exception e) { // the Exception from loadState() needs to be handled for // HA and we need to give up master status if we got fenced diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AMLivelinessMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AMLivelinessMonitor.java index 2c1f7f1f03e..76331bf7fec 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AMLivelinessMonitor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AMLivelinessMonitor.java @@ -24,6 +24,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.util.AbstractLivelinessMonitor; +import org.apache.hadoop.yarn.util.Clock; import org.apache.hadoop.yarn.util.SystemClock; public class AMLivelinessMonitor extends AbstractLivelinessMonitor { @@ -35,6 +36,11 @@ public class AMLivelinessMonitor extends AbstractLivelinessMonitor