From b196130c29936a6e5077eaabaa73cf515b36b5f1 Mon Sep 17 00:00:00 2001 From: Szilard Nemeth Date: Fri, 5 Feb 2021 17:08:16 +0100 Subject: [PATCH] YARN-10428. Zombie applications in the YARN queue using FAIR + sizebasedweight. Contributed by Guang Yang, Andras Gyori (cherry picked from commit 79a46599f76e470527ad94b0894dacb28db01465) Conflicts: hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/policy/TestFairOrderingPolicy.java (cherry picked from commit 7aea2e1b5c24cd6e2dffbe6942f0dadb6a45c48f) --- .../scheduler/policy/FairOrderingPolicy.java | 6 ++- .../policy/TestFairOrderingPolicy.java | 41 +++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/policy/FairOrderingPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/policy/FairOrderingPolicy.java index 3371df803f0..4f0ca06fdaf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/policy/FairOrderingPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/policy/FairOrderingPolicy.java @@ -68,10 +68,12 @@ public class FairOrderingPolicy extends AbstractCom private double getMagnitude(SchedulableEntity r) { double mag = r.getSchedulingResourceUsage().getCachedUsed( CommonNodeLabelsManager.ANY).getMemorySize(); - if (sizeBasedWeight) { + if (sizeBasedWeight && mag != 0) { double weight = Math.log1p(r.getSchedulingResourceUsage().getCachedDemand( CommonNodeLabelsManager.ANY).getMemorySize()) / Math.log(2); - mag = mag / weight; + if (weight != 0) { + mag = mag / weight; + } } return mag; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/policy/TestFairOrderingPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/policy/TestFairOrderingPolicy.java index e023e011e5b..099102f7675 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/policy/TestFairOrderingPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/policy/TestFairOrderingPolicy.java @@ -21,7 +21,9 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.policy; import java.util.*; import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.MockRM; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue; @@ -201,4 +203,43 @@ public class TestFairOrderingPolicy { } } + @Test + public void testRemoveEntitiesWithSizeBasedWeightAsCompletedJobs() { + FairOrderingPolicy policy = + new FairOrderingPolicy(); + policy.setSizeBasedWeight(true); + + // Add 10 different schedulable entities + List entities = new ArrayList<>(10); + for (int i = 1; i <= 10; i++) { + MockSchedulableEntity r = new MockSchedulableEntity(); + r.setApplicationPriority(Priority.newInstance(i)); + r.setUsed(Resources.createResource(4 * i)); + r.setPending(Resources.createResource(4 * i)); + AbstractComparatorOrderingPolicy.updateSchedulingResourceUsage( + r.getSchedulingResourceUsage()); + policy.addSchedulableEntity(r); + entities.add(r); + } + + // Mark the first 5 entities as completed by setting + // the resources to 0 + for (int i = 0; i < 5; i++) { + MockSchedulableEntity r = entities.get(i); + r.getSchedulingResourceUsage().setCachedUsed( + CommonNodeLabelsManager.ANY, Resources.createResource(0)); + r.getSchedulingResourceUsage().setCachedPending( + CommonNodeLabelsManager.ANY, Resources.createResource(0)); + policy.entityRequiresReordering(r); + } + + policy.reorderScheduleEntities(); + + // Remove the first 5 elements + for (int i = 0; i < 5; i++) { + policy.removeSchedulableEntity(entities.get(i)); + } + + Assert.assertEquals(5, policy.getNumSchedulableEntities()); + } }