From 258ef9f9c34b54e7419e2763c0b8fae1b9bdeb5c Mon Sep 17 00:00:00 2001 From: Junping Du Date: Wed, 6 Jan 2016 05:49:24 -0800 Subject: [PATCH] YARN-4546. ResourceManager crash due to scheduling opportunity overflow. Contributed by Jason Lowe. (cherry picked from commit c1462a67ff7bb632df50e1c52de971cced56c6a3) (cherry picked from commit 1cc001db4c3767072b5d065d161bc5c6d1c480d4) Conflicts: hadoop-yarn-project/CHANGES.txt hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerApplicationAttempt.java --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../SchedulerApplicationAttempt.java | 12 ++++++++-- .../TestSchedulerApplicationAttempt.java | 22 +++++++++++++++++++ 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 4e46d2ba291..cc054ff669b 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -50,6 +50,9 @@ Release 2.7.3 - UNRELEASED YARN-4452. NPE when submit Unmanaged application. (Naganarasimha G R via junping_du) + + YARN-4546. ResourceManager crash due to scheduling opportunity overflow. + (Jason Lowe via junping_du) Release 2.7.2 - UNRELEASED diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java index ce7f9d1d0c4..038c82324dc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java @@ -57,6 +57,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerStat import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent; import org.apache.hadoop.yarn.util.resource.Resources; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.HashMultiset; import com.google.common.collect.Multiset; @@ -488,8 +489,10 @@ public class SchedulerApplicationAttempt { } public synchronized void addSchedulingOpportunity(Priority priority) { - schedulingOpportunities.setCount(priority, - schedulingOpportunities.count(priority) + 1); + int count = schedulingOpportunities.count(priority); + if (count < Integer.MAX_VALUE) { + schedulingOpportunities.setCount(priority, count + 1); + } } public synchronized void subtractSchedulingOpportunity(Priority priority) { @@ -523,6 +526,11 @@ public class SchedulerApplicationAttempt { schedulingOpportunities.setCount(priority, 0); } + @VisibleForTesting + void setSchedulingOpportunities(Priority priority, int count) { + schedulingOpportunities.setCount(priority, count); + } + synchronized AggregateAppResourceUsage getRunningAggregateAppResourceUsage() { long currentTimeMillis = System.currentTimeMillis(); // Don't walk the whole container list if the resources were computed diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerApplicationAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerApplicationAttempt.java index c648b83ad4b..973c0b61c34 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerApplicationAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerApplicationAttempt.java @@ -163,4 +163,26 @@ public class TestSchedulerApplicationAttempt { ApplicationAttemptId.newInstance(appIdImpl, attemptId); return attId; } + + @Test + public void testSchedulingOpportunityOverflow() throws Exception { + ApplicationAttemptId attemptId = createAppAttemptId(0, 0); + Queue queue = createQueue("test", null); + RMContext rmContext = mock(RMContext.class); + when(rmContext.getEpoch()).thenReturn(3L); + SchedulerApplicationAttempt app = new SchedulerApplicationAttempt( + attemptId, "user", queue, queue.getActiveUsersManager(), rmContext); + Priority priority = Priority.newInstance(1); + assertEquals(0, app.getSchedulingOpportunities(priority)); + app.addSchedulingOpportunity(priority); + assertEquals(1, app.getSchedulingOpportunities(priority)); + // verify the count is capped at MAX_VALUE and does not overflow + app.setSchedulingOpportunities(priority, Integer.MAX_VALUE - 1); + assertEquals(Integer.MAX_VALUE - 1, + app.getSchedulingOpportunities(priority)); + app.addSchedulingOpportunity(priority); + assertEquals(Integer.MAX_VALUE, app.getSchedulingOpportunities(priority)); + app.addSchedulingOpportunity(priority); + assertEquals(Integer.MAX_VALUE, app.getSchedulingOpportunities(priority)); + } }