From 8b398a66ca3728f47363fc8b2fcf7e556e6bbf5a Mon Sep 17 00:00:00 2001 From: Jian He Date: Mon, 22 Dec 2014 21:53:22 -0800 Subject: [PATCH] YARN-2340. Fixed NPE when queue is stopped during RM restart. Contributed by Rohith Sharmaks (cherry picked from commit 0d89859b51157078cc504ac81dc8aa75ce6b1782) --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../scheduler/capacity/CapacityScheduler.java | 13 ++++++++----- .../TestWorkPreservingRMRestart.java | 2 ++ 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 4bcc1c55c71..3ed97af3cd4 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -234,6 +234,9 @@ Release 2.7.0 - UNRELEASED YARN-2920. Changed CapacityScheduler to kill containers on nodes where node labels are changed. (Wangda Tan via jianhe) + YARN-2340. Fixed NPE when queue is stopped during RM restart. + (Rohith Sharmaks via jianhe) + Release 2.6.0 - 2014-11-18 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index e865a650850..3648c5436d7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -706,11 +706,14 @@ public class CapacityScheduler extends try { queue.submitApplication(applicationId, user, queueName); } catch (AccessControlException ace) { - LOG.info("Failed to submit application " + applicationId + " to queue " - + queueName + " from user " + user, ace); - this.rmContext.getDispatcher().getEventHandler() - .handle(new RMAppRejectedEvent(applicationId, ace.toString())); - return; + // Ignore the exception for recovered app as the app was previously accepted + if (!isAppRecovering) { + LOG.info("Failed to submit application " + applicationId + " to queue " + + queueName + " from user " + user, ace); + this.rmContext.getDispatcher().getEventHandler() + .handle(new RMAppRejectedEvent(applicationId, ace.toString())); + return; + } } // update the metrics queue.getMetrics().submitApp(user); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java index 853e0a57e0b..842eaecad32 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java @@ -500,6 +500,8 @@ public class TestWorkPreservingRMRestart { rm1.clearQueueMetrics(app1_2); rm1.clearQueueMetrics(app2); + csConf.set("yarn.scheduler.capacity.root.Default.QueueB.state", "STOPPED"); + // Re-start RM rm2 = new MockRM(csConf, memStore); rm2.start();