From 35b8441fd945766ce7cad87be7209534f9acdad2 Mon Sep 17 00:00:00 2001 From: Andras Gyori Date: Wed, 20 Oct 2021 12:56:41 +0200 Subject: [PATCH] YARN-10949. Simplify AbstractCSQueue#updateMaxAppRelatedField and find a more meaningful name for this method. Contributed by Andras Gyori --- .../scheduler/capacity/AbstractCSQueue.java | 47 --------------- .../CapacitySchedulerConfiguration.java | 4 ++ .../scheduler/capacity/LeafQueue.java | 58 ++++++++++++++++++- .../capacity/TestApplicationLimits.java | 49 ++++++++-------- .../scheduler/capacity/TestLeafQueue.java | 7 +++ .../scheduler/capacity/TestParentQueue.java | 2 +- 6 files changed, 91 insertions(+), 76 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java index e25c6941a37..ca861fbaa87 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java @@ -1462,53 +1462,6 @@ public abstract class AbstractCSQueue implements CSQueue { configuredMaxResource, parentMaxResource)); } - void updateMaxAppRelatedField(CapacitySchedulerConfiguration conf, - LeafQueue leafQueue) { - int maxApplications = conf.getMaximumApplicationsPerQueue(queuePath); - int maxGlobalPerQueueApps = conf.getGlobalMaximumApplicationsPerQueue(); - String maxLabel = RMNodeLabelsManager.NO_LABEL; - - if (maxApplications < 0) { - for (String label : configuredNodeLabels) { - int maxApplicationsByLabel = 0; - if (maxGlobalPerQueueApps > 0) { - // In absolute mode, should - // shrink when change to corresponding label capacity. - maxApplicationsByLabel = this.capacityConfigType - != CapacityConfigType.ABSOLUTE_RESOURCE ? - maxGlobalPerQueueApps : - (int) (maxGlobalPerQueueApps * queueCapacities - .getAbsoluteCapacity(label)); - } else { - maxApplicationsByLabel = (int) (conf.getMaximumSystemApplications() - * queueCapacities.getAbsoluteCapacity(label)); - } - if (maxApplicationsByLabel > maxApplications) { - maxApplications = maxApplicationsByLabel; - maxLabel = label; - } - } - } - leafQueue.setMaxApplications(maxApplications); - - int maxApplicationsPerUser = Math.min(maxApplications, - (int) (maxApplications - * (leafQueue.getUsersManager().getUserLimit() / 100.0f) - * leafQueue.getUsersManager().getUserLimitFactor())); - if (leafQueue.getUsersManager().getUserLimitFactor() == -1) { - maxApplicationsPerUser = maxApplications; - } - - leafQueue.setMaxApplicationsPerUser(maxApplicationsPerUser); - LOG.info("LeafQueue:" + leafQueue.getQueuePath() + - "update max app related, maxApplications=" - + maxApplications + ", maxApplicationsPerUser=" - + maxApplicationsPerUser + ", Abs Cap:" + queueCapacities - .getAbsoluteCapacity(maxLabel) + ", Cap: " + queueCapacities - .getCapacity(maxLabel) + ", MaxCap : " + queueCapacities - .getMaximumCapacity(maxLabel)); - } - void deriveCapacityFromAbsoluteConfigurations(String label, Resource clusterResource) { // Update capacity with a float calculated from the parent's minResources diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java index 5e614f149ed..e7b1cbd26c5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java @@ -461,6 +461,10 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur return getQueuePrefix(queue) + ACCESSIBLE_NODE_LABELS + DOT + label + DOT; } + public void setMaximumSystemApplications(int numMaxApps) { + setInt(MAXIMUM_SYSTEM_APPLICATIONS, numMaxApps); + } + public int getMaximumSystemApplications() { int maxApplications = getInt(MAXIMUM_SYSTEM_APPLICATIONS, DEFAULT_MAXIMUM_SYSTEM_APPLICATIIONS); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index 5a3784dae63..c91fa1b7516 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -20,7 +20,6 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; import java.io.IOException; import java.util.*; -import java.util.Map.Entry; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; @@ -1939,8 +1938,9 @@ public class LeafQueue extends AbstractCSQueue { updateAbsoluteCapacities(); super.updateEffectiveResources(clusterResource); - super.updateMaxAppRelatedField(csContext.getConfiguration(), - this); + + // Update maximum applications for the queue and for users + updateMaximumApplications(csContext.getConfiguration()); updateCurrentResourceLimits(currentResourceLimits, clusterResource); @@ -2326,6 +2326,58 @@ public class LeafQueue extends AbstractCSQueue { } } + void updateMaximumApplications(CapacitySchedulerConfiguration conf) { + int maxAppsForQueue = conf.getMaximumApplicationsPerQueue(getQueuePath()); + + int maxDefaultPerQueueApps = conf.getGlobalMaximumApplicationsPerQueue(); + int maxSystemApps = conf.getMaximumSystemApplications(); + int baseMaxApplications = maxDefaultPerQueueApps > 0 ? + Math.min(maxDefaultPerQueueApps, maxSystemApps) + : maxSystemApps; + + String maxLabel = RMNodeLabelsManager.NO_LABEL; + if (maxAppsForQueue < 0) { + if (maxDefaultPerQueueApps > 0 && this.capacityConfigType + != CapacityConfigType.ABSOLUTE_RESOURCE) { + maxAppsForQueue = baseMaxApplications; + } else { + for (String label : configuredNodeLabels) { + int maxApplicationsByLabel = (int) (baseMaxApplications + * queueCapacities.getAbsoluteCapacity(label)); + if (maxApplicationsByLabel > maxAppsForQueue) { + maxAppsForQueue = maxApplicationsByLabel; + maxLabel = label; + } + } + } + } + + setMaxApplications(maxAppsForQueue); + + updateMaxAppsPerUser(); + + LOG.info("LeafQueue:" + getQueuePath() + + "update max app related, maxApplications=" + + maxAppsForQueue + ", maxApplicationsPerUser=" + + maxApplicationsPerUser + ", Abs Cap:" + queueCapacities + .getAbsoluteCapacity(maxLabel) + ", Cap: " + queueCapacities + .getCapacity(maxLabel) + ", MaxCap : " + queueCapacities + .getMaximumCapacity(maxLabel)); + } + + private void updateMaxAppsPerUser() { + int maxAppsPerUser = maxApplications; + if (getUsersManager().getUserLimitFactor() != -1) { + int maxApplicationsWithUserLimits = (int) (maxApplications + * (getUsersManager().getUserLimit() / 100.0f) + * getUsersManager().getUserLimitFactor()); + maxAppsPerUser = Math.min(maxApplications, + maxApplicationsWithUserLimits); + } + + setMaxApplicationsPerUser(maxAppsPerUser); + } + /** * Get all valid users in this queue. * @return user list diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java index a42e4af5dbf..a1252cfade7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java @@ -870,29 +870,7 @@ public class TestApplicationLimits { + "submission of application: " + app3.getApplicationId(), app3.getDiagnostics().toString()); - // based on Global limit of queue usert application is rejected - RMApp app11 = MockRMAppSubmitter.submit(rm, - MockRMAppSubmissionData.Builder.createWithMemory(GB, rm) - .withAppName("app") - .withUser("user") - .withAcls(null) - .withQueue("d") - .withWaitForAppAcceptedState(false) - .build()); - rm.drainEvents(); - rm.waitForState(app11.getApplicationId(), RMAppState.ACCEPTED); - assertEquals(RMAppState.ACCEPTED, app11.getState()); - RMApp app12 = MockRMAppSubmitter.submit(rm, - MockRMAppSubmissionData.Builder.createWithMemory(GB, rm) - .withAppName("app") - .withUser("user") - .withAcls(null) - .withQueue("d") - .withWaitForAppAcceptedState(false) - .build()); - rm.drainEvents(); - rm.waitForState(app12.getApplicationId(), RMAppState.ACCEPTED); - assertEquals(RMAppState.ACCEPTED, app12.getState()); + // based on per user max app settings, app should be rejected instantly RMApp app13 = MockRMAppSubmitter.submit(rm, MockRMAppSubmissionData.Builder.createWithMemory(GB, rm) .withAppName("app") @@ -906,10 +884,32 @@ public class TestApplicationLimits { assertEquals(RMAppState.FAILED, app13.getState()); assertEquals( "org.apache.hadoop.security.AccessControlException: Queue" - + " root.d already has 2 applications from user user cannot" + + " root.d already has 0 applications from user user cannot" + " accept submission of application: " + app13.getApplicationId(), app13.getDiagnostics().toString()); + RMApp app11 = MockRMAppSubmitter.submit(rm, + MockRMAppSubmissionData.Builder.createWithMemory(GB, rm) + .withAppName("app") + .withUser("user2") + .withAcls(null) + .withQueue("a2") + .withWaitForAppAcceptedState(false) + .build()); + rm.drainEvents(); + rm.waitForState(app11.getApplicationId(), RMAppState.ACCEPTED); + assertEquals(RMAppState.ACCEPTED, app11.getState()); + RMApp app12 = MockRMAppSubmitter.submit(rm, + MockRMAppSubmissionData.Builder.createWithMemory(GB, rm) + .withAppName("app") + .withUser("user2") + .withAcls(null) + .withQueue("a2") + .withWaitForAppAcceptedState(false) + .build()); + rm.drainEvents(); + rm.waitForState(app12.getApplicationId(), RMAppState.ACCEPTED); + assertEquals(RMAppState.ACCEPTED, app12.getState()); // based on system max limit application is rejected RMApp app14 = MockRMAppSubmitter.submit(rm, MockRMAppSubmissionData.Builder.createWithMemory(GB, rm) @@ -938,7 +938,6 @@ public class TestApplicationLimits { app15.getDiagnostics().toString()); rm.killApp(app2.getApplicationId()); - rm.killApp(app11.getApplicationId()); rm.killApp(app13.getApplicationId()); rm.killApp(app14.getApplicationId()); rm.stop(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java index 351fdbe3a53..7132582ae34 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java @@ -5137,6 +5137,13 @@ public class TestLeafQueue { assertEquals(1.0, leafQueue.getAbsoluteMaximumCapacity(), EPSILON); + // limit maximum apps by max system apps + csConf.setMaximumSystemApplications(15); + leafQueue.updateClusterResource(Resource.newInstance(0, 0), + new ResourceLimits(Resource.newInstance(0, 0))); + + assertEquals(15, leafQueue.getMaxApplications()); + } finally { //revert config changes csConf.setNodeLocalityDelay( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestParentQueue.java index 8b75a9c126f..fe90ca84435 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestParentQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestParentQueue.java @@ -1137,7 +1137,7 @@ public class TestParentQueue { assertEquals(b.getMaxApplications(), b.getMaxApplicationsPerUser()); // Set GlobalMaximumApplicationsPerQueue in csConf - csConf.setGlobalMaximumApplicationsPerQueue(20000); + csConf.setGlobalMaximumApplicationsPerQueue(8000); root.updateClusterResource(clusterResource, new ResourceLimits(clusterResource));