From f24d1fac35447124e01f1a457bf9dac02b32d54c Mon Sep 17 00:00:00 2001 From: Sunil G Date: Tue, 19 Nov 2019 14:03:02 +0530 Subject: [PATCH] YARN-9984. FSPreemptionThread can cause NullPointerException while app is unregistered with containers running on a node. Contributed by Wilfred Spiegelenburg. (cherry picked from commit 215f2052fc3b7e366e8bd1bd332663966fa9206c) --- .../scheduler/fair/FSPreemptionThread.java | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSPreemptionThread.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSPreemptionThread.java index 6ed90f816a5..b3e066e9f28 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSPreemptionThread.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSPreemptionThread.java @@ -140,9 +140,13 @@ class FSPreemptionThread extends Thread { for (RMContainer container : containers) { FSAppAttempt app = scheduler.getSchedulerApp( container.getApplicationAttemptId()); - LOG.info("Preempting container " + container + - " from queue " + app.getQueueName()); - app.trackContainerForPreemption(container); + LOG.info("Preempting container " + container + " from queue: " + + (app != null ? app.getQueueName() : "unknown")); + // If the app has unregistered while building the container list + // the app might be null, skip notifying the app + if (app != null) { + app.trackContainerForPreemption(container); + } } } } @@ -204,6 +208,13 @@ class FSPreemptionThread extends Thread { for (RMContainer container : containersToCheck) { FSAppAttempt app = scheduler.getSchedulerApp(container.getApplicationAttemptId()); + // If the app has unregistered while building the container list the app + // might be null, just skip this container: it should be cleaned up soon + if (app == null) { + LOG.info("Found container " + container + " on node " + + node.getNodeName() + "without app, skipping preemption"); + continue; + } ApplicationId appId = app.getApplicationId(); if (app.canContainerBePreempted(container,