From eff5d9b17e0853e82968a695b498b4be37148a05 Mon Sep 17 00:00:00 2001 From: Bikas Saha Date: Thu, 4 Jul 2013 23:31:26 +0000 Subject: [PATCH] YARN-845. RM crash with NPE on NODE_UPDATE (Mayank Bansal via bikas) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1499886 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 2 ++ .../resourcemanager/scheduler/capacity/LeafQueue.java | 7 ++++--- .../scheduler/common/fica/FiCaSchedulerApp.java | 11 +++++++++++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 1221cdeeb61..a0e3a9b75e4 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -663,6 +663,8 @@ Release 2.1.0-beta - 2013-07-02 mechanisms are enabled and thus fix YARN/MR test failures after HADOOP-9421. (Daryn Sharp and Vinod Kumar Vavilapalli via vinodkv) + YARN-845. RM crash with NPE on NODE_UPDATE (Mayank Bansal via bikas) + BREAKDOWN OF HADOOP-8562 SUBTASKS AND RELATED JIRAS YARN-158. Yarn creating package-info.java must not depend on sh. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index 0e7469561d7..dbfa7444183 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -801,9 +801,10 @@ private synchronized FiCaSchedulerApp getApplication( if (reservedContainer != null) { FiCaSchedulerApp application = getApplication(reservedContainer.getApplicationAttemptId()); - return - assignReservedContainer(application, node, reservedContainer, - clusterResource); + synchronized (application) { + return assignReservedContainer(application, node, reservedContainer, + clusterResource); + } } // Try to assign containers to applications in order diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java index 4705f97dabf..8e2020abc79 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java @@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger; @@ -426,6 +427,16 @@ public synchronized void unreserve(FiCaSchedulerNode node, Priority priority) { this.reservedContainers.remove(priority); } + // reservedContainer should not be null here + if (reservedContainer == null) { + String errorMesssage = + "Application " + getApplicationId() + " is trying to unreserve " + + " on node " + node + ", currently has " + + reservedContainers.size() + " at priority " + priority + + "; currentReservation " + currentReservation; + LOG.warn(errorMesssage); + throw new YarnRuntimeException(errorMesssage); + } // Reset the re-reservation count resetReReservations(priority);