From 80ef3e7581c25163a43ad4ae4d1b181379455a7a Mon Sep 17 00:00:00 2001 From: Jonathan Hung Date: Fri, 7 Dec 2018 10:32:53 -0800 Subject: [PATCH] YARN-9085. Add Guaranteed and MaxCapacity to CSQueueMetrics (cherry picked from commit 978ab3e958227220cb6f1a08ae6e7cdb8a46628b) (cherry picked from commit dca69d178dba21c41fd1293187f29143f7e81e19) --- .../scheduler/capacity/CSQueueMetrics.java | 39 ++++++++++++++++ .../scheduler/capacity/CSQueueUtils.java | 17 +++++++ .../scheduler/capacity/LeafQueue.java | 4 ++ .../scheduler/capacity/ParentQueue.java | 4 ++ .../capacity/TestCapacityScheduler.java | 44 +++++++++++++++++++ 5 files changed, 108 insertions(+) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueMetrics.java index 87fc23458a2..83826650414 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueMetrics.java @@ -24,6 +24,7 @@ import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MutableGaugeFloat; +import org.apache.hadoop.metrics2.lib.MutableGaugeInt; import org.apache.hadoop.metrics2.lib.MutableGaugeLong; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; @@ -46,6 +47,14 @@ public class CSQueueMetrics extends QueueMetrics { MutableGaugeFloat usedCapacity; @Metric("Percent of Absolute Capacity Used") MutableGaugeFloat absoluteUsedCapacity; + @Metric("Guaranteed memory in MB") + MutableGaugeLong guaranteedMB; + @Metric("Guaranteed CPU in virtual cores") + MutableGaugeInt guaranteedVCores; + @Metric("Maximum memory in MB") + MutableGaugeLong maxCapacityMB; + @Metric("Maximum CPU in virtual cores") + MutableGaugeInt maxCapacityVCores; CSQueueMetrics(MetricsSystem ms, String queueName, Queue parent, boolean enableUserMetrics, Configuration conf) { @@ -126,6 +135,36 @@ public class CSQueueMetrics extends QueueMetrics { } } + public long getGuaranteedMB() { + return guaranteedMB.value(); + } + + public int getGuaranteedVCores() { + return guaranteedVCores.value(); + } + + public void setGuaranteedResources(String partition, Resource res) { + if (partition == null || partition.equals(RMNodeLabelsManager.NO_LABEL)) { + guaranteedMB.set(res.getMemorySize()); + guaranteedVCores.set(res.getVirtualCores()); + } + } + + public long getMaxCapacityMB() { + return maxCapacityMB.value(); + } + + public int getMaxCapacityVCores() { + return maxCapacityVCores.value(); + } + + public void setMaxCapacityResources(String partition, Resource res) { + if (partition == null || partition.equals(RMNodeLabelsManager.NO_LABEL)) { + maxCapacityMB.set(res.getMemorySize()); + maxCapacityVCores.set(res.getVirtualCores()); + } + } + public synchronized static CSQueueMetrics forQueue(String queueName, Queue parent, boolean enableUserMetrics, Configuration conf) { MetricsSystem ms = DefaultMetricsSystem.instance(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java index 9554086d8ff..da1d3046f39 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java @@ -295,4 +295,21 @@ class CSQueueUtils { childQueue.getMetrics().setAvailableResourcesToQueue(nodePartition, getMaxAvailableResourceToQueue(rc, nlm, childQueue, cluster)); } + + /** + * Updated configured capacity/max-capacity for queue. + * @param rc resource calculator + * @param partitionResource total cluster resources for this partition + * @param partition partition being updated + * @param queue queue + */ + public static void updateConfiguredCapacityMetrics(ResourceCalculator rc, + Resource partitionResource, String partition, AbstractCSQueue queue) { + queue.getMetrics().setGuaranteedResources(partition, rc.multiplyAndNormalizeDown( + partitionResource, queue.getQueueCapacities().getAbsoluteCapacity(partition), + queue.getMinimumAllocation())); + queue.getMetrics().setMaxCapacityResources(partition, rc.multiplyAndNormalizeDown( + partitionResource, queue.getQueueCapacities().getAbsoluteMaximumCapacity(partition), + queue.getMinimumAllocation())); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index 350e5ea6a03..4ef26e639ac 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -1784,6 +1784,10 @@ public class LeafQueue extends AbstractCSQueue { // Update metrics CSQueueUtils.updateQueueStatistics(resourceCalculator, clusterResource, this, labelManager, null); + // Update configured capacity/max-capacity for default partition only + CSQueueUtils.updateConfiguredCapacityMetrics(resourceCalculator, + labelManager.getResourceByLabel(null, clusterResource), + RMNodeLabelsManager.NO_LABEL, this); // queue metrics are updated, more resource may be available // activate the pending applications if possible diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java index 7e5f7c27754..be966de261e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java @@ -842,6 +842,10 @@ public class ParentQueue extends AbstractCSQueue { CSQueueUtils.updateQueueStatistics(resourceCalculator, clusterResource, this, labelManager, null); + // Update configured capacity/max-capacity for default partition only + CSQueueUtils.updateConfiguredCapacityMetrics(resourceCalculator, + labelManager.getResourceByLabel(null, clusterResource), + RMNodeLabelsManager.NO_LABEL, this); } finally { writeLock.unlock(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java index 3a691505169..b7f69fc5462 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java @@ -5022,4 +5022,48 @@ public class TestCapacityScheduler { assertEquals(4, appsInB1.size()); rm.close(); } + + @Test + public void testCSQueueMetrics() throws Exception { + CapacityScheduler cs = new CapacityScheduler(); + cs.setConf(new YarnConfiguration()); + cs.setRMContext(resourceManager.getRMContext()); + CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration(); + setupQueueConfiguration(conf); + cs.init(conf); + cs.start(); + + RMNode n1 = MockNodes.newNodeInfo(0, MockNodes.newResource(50 * GB), 1, "n1"); + RMNode n2 = MockNodes.newNodeInfo(0, MockNodes.newResource(50 * GB), 2, "n2"); + cs.handle(new NodeAddedSchedulerEvent(n1)); + cs.handle(new NodeAddedSchedulerEvent(n2)); + + assertEquals(10240, ((CSQueueMetrics)cs.getQueue("a").getMetrics()).getGuaranteedMB()); + assertEquals(71680, ((CSQueueMetrics)cs.getQueue("b1").getMetrics()).getGuaranteedMB()); + assertEquals(102400, ((CSQueueMetrics)cs.getQueue("a").getMetrics()).getMaxCapacityMB()); + assertEquals(102400, ((CSQueueMetrics)cs.getQueue("b1").getMetrics()).getMaxCapacityMB()); + + // Remove a node, metrics should be updated + cs.handle(new NodeRemovedSchedulerEvent(n2)); + assertEquals(5120, ((CSQueueMetrics)cs.getQueue("a").getMetrics()).getGuaranteedMB()); + assertEquals(35840, ((CSQueueMetrics)cs.getQueue("b1").getMetrics()).getGuaranteedMB()); + assertEquals(51200, ((CSQueueMetrics)cs.getQueue("a").getMetrics()).getMaxCapacityMB()); + assertEquals(51200, ((CSQueueMetrics)cs.getQueue("b1").getMetrics()).getMaxCapacityMB()); + + // Add child queue to a, and reinitialize. Metrics should be updated + conf.setQueues(CapacitySchedulerConfiguration.ROOT + ".a", new String[] {"a1", "a2", "a3"} ); + conf.setCapacity(CapacitySchedulerConfiguration.ROOT + ".a.a2", 30.0f); + conf.setCapacity(CapacitySchedulerConfiguration.ROOT + ".a.a3", 40.0f); + conf.setMaximumCapacity(CapacitySchedulerConfiguration.ROOT + ".a.a3", 50.0f); + + cs.reinitialize(conf, new RMContextImpl(null, null, null, null, null, + null, new RMContainerTokenSecretManager(conf), + new NMTokenSecretManagerInRM(conf), + new ClientToAMTokenSecretManagerInRM(), null)); + + assertEquals(1024, ((CSQueueMetrics)cs.getQueue("a2").getMetrics()).getGuaranteedMB()); + assertEquals(2048, ((CSQueueMetrics)cs.getQueue("a3").getMetrics()).getGuaranteedMB()); + assertEquals(51200, ((CSQueueMetrics)cs.getQueue("a2").getMetrics()).getMaxCapacityMB()); + assertEquals(25600, ((CSQueueMetrics)cs.getQueue("a3").getMetrics()).getMaxCapacityMB()); + } }