From ad1d40970ac250f4f3dc895033fdbe29f47c20b0 Mon Sep 17 00:00:00 2001
From: Weihao Zheng <11959289+FrankinRUC@users.noreply.github.com>
Date: Mon, 13 Sep 2021 11:06:53 +0800
Subject: [PATCH] YARN-10928. Support default queue config for
 minimum-user-limit-percent/user-limit-factor (#3389)

Contributed by Weihao Zheng
---
 .../CapacitySchedulerConfiguration.java       | 18 ++++-
 .../scheduler/capacity/TestQueueParsing.java  | 78 ++++++++++++++++++-
 .../src/site/markdown/CapacityScheduler.md    |  4 +-
 3 files changed, 94 insertions(+), 6 deletions(-)

diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
index 510559d940c..6e1b72feac0 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
@@ -645,8 +645,9 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
   }
 
   public float getUserLimit(String queue) {
+    float defaultUserLimit = getFloat(PREFIX + USER_LIMIT, DEFAULT_USER_LIMIT);
     float userLimit = getFloat(getQueuePrefix(queue) + USER_LIMIT,
-        DEFAULT_USER_LIMIT);
+        defaultUserLimit);
     return userLimit;
   }
 
@@ -694,16 +695,22 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
     return orderingPolicy;
   }
 
-    public void setUserLimit(String queue, float userLimit) {
+  public void setUserLimit(String queue, float userLimit) {
     setFloat(getQueuePrefix(queue) + USER_LIMIT, userLimit);
     LOG.debug("here setUserLimit: queuePrefix={}, userLimit={}",
         getQueuePrefix(queue), getUserLimit(queue));
   }
 
+  @VisibleForTesting
+  public void setDefaultUserLimit(float defaultUserLimit) {
+    setFloat(PREFIX + USER_LIMIT, defaultUserLimit);
+  }
+
   public float getUserLimitFactor(String queue) {
+    float defaultUserLimitFactor = getFloat(PREFIX + USER_LIMIT_FACTOR, DEFAULT_USER_LIMIT_FACTOR);
     float userLimitFactor =
         getFloat(getQueuePrefix(queue) + USER_LIMIT_FACTOR,
-            DEFAULT_USER_LIMIT_FACTOR);
+            defaultUserLimitFactor);
     return userLimitFactor;
   }
 
@@ -711,6 +718,11 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
     setFloat(getQueuePrefix(queue) + USER_LIMIT_FACTOR, userLimitFactor);
   }
 
+  @VisibleForTesting
+  public void setDefaultUserLimitFactor(float defaultUserLimitFactor) {
+    setFloat(PREFIX + USER_LIMIT_FACTOR, defaultUserLimitFactor);
+  }
+
   public QueueState getConfiguredState(String queue) {
     String state = get(getQueuePrefix(queue) + STATE);
     if (state == null) {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestQueueParsing.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestQueueParsing.java
index 236d271104e..cf5ae842fd0 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestQueueParsing.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestQueueParsing.java
@@ -484,7 +484,7 @@ public class TestQueueParsing {
     conf.setAccessibleNodeLabels(A, ImmutableSet.of("red", "blue"));
     conf.setCapacityByLabel(A, "red", 90);
     conf.setCapacityByLabel(A, "blue", 90);
-    
+
     // Set B configuraiton
     final String B = CapacitySchedulerConfiguration.ROOT + ".b";
     conf.setCapacity(B, 90);
@@ -1188,6 +1188,82 @@ public class TestQueueParsing {
     verifyQueueAbsCapacity(rm, A, "z", 1f);
   }
 
+  @Test
+  public void testQueueParsingWithDefaultUserLimitValues()
+          throws IOException {
+    YarnConfiguration conf = new YarnConfiguration();
+    CapacitySchedulerConfiguration csConf =
+            new CapacitySchedulerConfiguration(conf);
+    final String queueA = CapacitySchedulerConfiguration.ROOT + ".a";
+    final String queueB = CapacitySchedulerConfiguration.ROOT + ".b";
+
+    // Define top-level queues
+    csConf.setQueues(CapacitySchedulerConfiguration.ROOT, new String[] {"a", "b"});
+
+    // Set default value
+    csConf.setDefaultUserLimit(20);
+    csConf.setDefaultUserLimitFactor(2.0f);
+
+    // Set A configuration and let B use default values
+    csConf.setCapacity(queueA, 50);
+    csConf.setUserLimit(queueA, 15);
+    csConf.setUserLimitFactor(queueA, 1.5f);
+    csConf.setCapacity(queueB, 50);
+
+    // Test
+    CapacityScheduler capacityScheduler = new CapacityScheduler();
+    RMContextImpl rmContext =
+            new RMContextImpl(null, null, null, null, null, null,
+                    new RMContainerTokenSecretManager(csConf),
+                    new NMTokenSecretManagerInRM(csConf),
+                    new ClientToAMTokenSecretManagerInRM(), null);
+    rmContext.setNodeLabelManager(nodeLabelManager);
+    capacityScheduler.setConf(csConf);
+    capacityScheduler.setRMContext(rmContext);
+    capacityScheduler.init(csConf);
+    capacityScheduler.start();
+    Assert.assertEquals(15,
+            ((LeafQueue)capacityScheduler.getQueue(queueA)).getUserLimit(), DELTA);
+    Assert.assertEquals(1.5,
+            ((LeafQueue)capacityScheduler.getQueue(queueA)).getUserLimitFactor(), DELTA);
+    Assert.assertEquals(20,
+            ((LeafQueue)capacityScheduler.getQueue(queueB)).getUserLimit(), DELTA);
+    Assert.assertEquals(2.0,
+            ((LeafQueue)capacityScheduler.getQueue(queueB)).getUserLimitFactor(), DELTA);
+    ServiceOperations.stopQuietly(capacityScheduler);
+
+    // Use hadoop default value
+    conf = new YarnConfiguration();
+    csConf = new CapacitySchedulerConfiguration(conf);
+    csConf.setQueues(CapacitySchedulerConfiguration.ROOT, new String[] {"a", "b"});
+    csConf.setCapacity(queueA, 50);
+    csConf.setUserLimit(queueA, 15);
+    csConf.setUserLimitFactor(queueA, 1.5f);
+    csConf.setCapacity(queueB, 50);
+
+    // Test
+    capacityScheduler = new CapacityScheduler();
+    rmContext =
+            new RMContextImpl(null, null, null, null, null, null,
+                    new RMContainerTokenSecretManager(csConf),
+                    new NMTokenSecretManagerInRM(csConf),
+                    new ClientToAMTokenSecretManagerInRM(), null);
+    rmContext.setNodeLabelManager(nodeLabelManager);
+    capacityScheduler.setConf(csConf);
+    capacityScheduler.setRMContext(rmContext);
+    capacityScheduler.init(csConf);
+    capacityScheduler.start();
+    Assert.assertEquals(15,
+            ((LeafQueue)capacityScheduler.getQueue(queueA)).getUserLimit(), DELTA);
+    Assert.assertEquals(1.5,
+            ((LeafQueue)capacityScheduler.getQueue(queueA)).getUserLimitFactor(), DELTA);
+    Assert.assertEquals(100,
+            ((LeafQueue)capacityScheduler.getQueue(queueB)).getUserLimit(), DELTA);
+    Assert.assertEquals(1,
+            ((LeafQueue)capacityScheduler.getQueue(queueB)).getUserLimitFactor(), DELTA);
+    ServiceOperations.stopQuietly(capacityScheduler);
+  }
+
   private void verifyQueueAbsCapacity(MockRM rm, String queuePath, String label,
       float expectedAbsCapacity) {
     CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/CapacityScheduler.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/CapacityScheduler.md
index d35869c759d..6d87d4febb6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/CapacityScheduler.md
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/CapacityScheduler.md
@@ -125,8 +125,8 @@ Configuration
 |:---- |:---- |
 | `yarn.scheduler.capacity.<queue-path>.capacity` | Queue *capacity* in percentage (%) as a float (e.g. 12.5), weight as a float with the postfix *w* (e.g. 2.0w) or as absolute resource queue minimum capacity. When using percentage values the sum of capacities for all queues, at each level, must be equal to 100. If absolute resource is configured, sum of absolute resources of child queues could be less than its parent absolute resource capacity. Applications in the queue may consume more resources than the queue's capacity if there are free resources, providing elasticity. |
 | `yarn.scheduler.capacity.<queue-path>.maximum-capacity` | Maximum queue capacity in percentage (%) as a float (when the *capacity* property is defined with either percentages or weights) or as absolute resource queue maximum capacity. This limits the *elasticity* for applications in the queue. 1) Value is between 0 and 100. 2) Admin needs to make sure absolute maximum capacity >= absolute capacity for each queue. Also, setting this value to -1 sets maximum capacity to 100%. |
-| `yarn.scheduler.capacity.<queue-path>.minimum-user-limit-percent` | Each queue enforces a limit on the percentage of resources allocated to a user at any given time, if there is demand for resources. The user limit can vary between a minimum and maximum value. The former (the minimum value) is set to this property value and the latter (the maximum value) depends on the number of users who have submitted applications. For e.g., suppose the value of this property is 25. If two users have submitted applications to a queue, no single user can use more than 50% of the queue resources. If a third user submits an application, no single user can use more than 33% of the queue resources. With 4 or more users, no user can use more than 25% of the queues resources. A value of 100 implies no user limits are imposed. The default is 100. Value is specified as a integer. |
-| `yarn.scheduler.capacity.<queue-path>.user-limit-factor` | User limit factor provides a way to control the max amount of resources that a single user can consume. It is the multiple of the queue's capacity. By default this is set to 1 which ensures that a single user can never take more than the queue's configured capacity irrespective of how idle the cluster is. Increasing it means a single user can use more than the minimum capacity of the cluster, while decreasing it results in lower maximum resources. Setting this to -1 will disable the feature. Value is specified as a float. Note: using the flexible auto queue creation (yarn.scheduler.capacity.\<queue-path\>.auto-queue-creation-v2) with weights will automatically set this property to -1, as the dynamic queues will be created with the hardcoded weight of 1 and in idle cluster scenarios they should be able to use more resources than calculated. |
+| `yarn.scheduler.capacity.minimum-user-limit-percent` / `yarn.scheduler.capacity.<queue-path>.minimum-user-limit-percent` | Each queue enforces a limit on the percentage of resources allocated to a user at any given time, if there is demand for resources. The user limit can vary between a minimum and maximum value. The former (the minimum value) is set to this property value and the latter (the maximum value) depends on the number of users who have submitted applications. For e.g., suppose the value of this property is 25. If two users have submitted applications to a queue, no single user can use more than 50% of the queue resources. If a third user submits an application, no single user can use more than 33% of the queue resources. With 4 or more users, no user can use more than 25% of the queues resources. A value of 100 implies no user limits are imposed. The default is 100. Value is specified as an integer. This can be set for all queues with `yarn.scheduler.capacity.minimum-user-limit-percent` and can also be overridden on a per queue basis by setting `yarn.scheduler.capacity.<queue-path>.minimum-user-limit-percent`. |
+| `yarn.scheduler.capacity.user-limit-factor` / `yarn.scheduler.capacity.<queue-path>.user-limit-factor` | User limit factor provides a way to control the max amount of resources that a single user can consume. It is the multiple of the queue's capacity. By default this is set to 1 which ensures that a single user can never take more than the queue's configured capacity irrespective of how idle the cluster is. Increasing it means a single user can use more than the minimum capacity of the cluster, while decreasing it results in lower maximum resources. Setting this to -1 will disable the feature. Value is specified as a float. Note: using the flexible auto queue creation (yarn.scheduler.capacity.\<queue-path\>.auto-queue-creation-v2) with weights will automatically set this property to -1, as the dynamic queues will be created with the hardcoded weight of 1 and in idle cluster scenarios they should be able to use more resources than calculated. This can be set for all queues with `yarn.scheduler.capacity.user-limit-factor` and can also be overridden on a per queue basis by setting `yarn.scheduler.capacity.<queue-path>.user-limit-factor`. |
 | `yarn.scheduler.capacity.<queue-path>.maximum-allocation-mb` | The per queue maximum limit of memory to allocate to each container request at the Resource Manager. This setting overrides the cluster configuration `yarn.scheduler.maximum-allocation-mb`. This value must be smaller than or equal to the cluster maximum. |
 | `yarn.scheduler.capacity.<queue-path>.maximum-allocation-vcores` | The per queue maximum limit of virtual cores to allocate to each container request at the Resource Manager. This setting overrides the cluster configuration `yarn.scheduler.maximum-allocation-vcores`. This value must be smaller than or equal to the cluster maximum. |
 | `yarn.scheduler.capacity.<queue-path>.user-settings.<user-name>.weight` | This floating point value is used when calculating the user limit resource values for users in a queue. This value will weight each user more or less than the other users in the queue. For example, if user A should receive 50% more resources in a queue than users B and C, this property will be set to 1.5 for user A.  Users B and C will default to 1.0. |