From f218527fff9faa45e9399f716cc41dcad19b9029 Mon Sep 17 00:00:00 2001 From: Sanford Ryza Date: Thu, 10 Oct 2013 22:49:56 +0000 Subject: [PATCH] YARN-1265. Fair Scheduler chokes on unhealthy node reconnect (Sandy Ryza) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1531146 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 2 ++ .../server/resourcemanager/scheduler/fair/FairScheduler.java | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 34309c65d30..9d279f3a276 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -98,6 +98,8 @@ Release 2.2.1 - UNRELEASED YARN-879. Fixed tests w.r.t o.a.h.y.server.resourcemanager.Application. (Junping Du via devaraj) + YARN-1265. Fair Scheduler chokes on unhealthy node reconnect (Sandy Ryza) + Release 2.2.0 - 2013-10-13 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index f2ac6a699c8..a3d57369dce 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -788,6 +788,10 @@ public class FairScheduler implements ResourceScheduler { private synchronized void removeNode(RMNode rmNode) { FSSchedulerNode node = nodes.get(rmNode.getNodeID()); + // This can occur when an UNHEALTHY node reconnects + if (node == null) { + return; + } Resources.subtractFrom(clusterCapacity, rmNode.getTotalCapability()); updateRootQueueMetrics();