From 59172ada9014f8c056f2bd37b25a26572ca643af Mon Sep 17 00:00:00 2001 From: zhuqi-lucas <821684824@qq.com> Date: Mon, 24 May 2021 23:12:07 +0800 Subject: [PATCH] YARN-10771. Add cluster metric for size of SchedulerEventQueue and RMEventQueue. Contributed by chaosju. --- .../hadoop/yarn/event/AsyncDispatcher.java | 4 ++++ .../hadoop/yarn/event/EventDispatcher.java | 4 ++++ .../resourcemanager/ClusterMetrics.java | 20 ++++++++++++++++ .../resourcemanager/ResourceManager.java | 24 +++++++++++++++++++ .../webapp/MetricsOverviewTable.java | 8 +++++++ .../webapp/dao/ClusterMetricsInfo.java | 13 ++++++++++ .../resourcemanager/webapp/TestNodesPage.java | 2 +- .../webapp/TestRMWebServices.java | 2 +- 8 files changed, 75 insertions(+), 2 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/event/AsyncDispatcher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/event/AsyncDispatcher.java index ba6bb435ec2..0915eb4a981 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/event/AsyncDispatcher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/event/AsyncDispatcher.java @@ -409,4 +409,8 @@ public class AsyncDispatcher extends AbstractService implements Dispatcher { Class eventClass) { eventTypeMetricsMap.put(eventClass, metrics); } + + public int getEventQueueSize() { + return eventQueue.size(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/event/EventDispatcher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/event/EventDispatcher.java index 11cdf150ddb..6731bdacac8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/event/EventDispatcher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/event/EventDispatcher.java @@ -161,4 +161,8 @@ public class EventDispatcher extends protected boolean isStopped() { return this.stopped; } + + public int getEventQueueSize() { + return eventQueue.size(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java index 67a3a620875..fa3c5c90949 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java @@ -72,6 +72,10 @@ public class ClusterMetrics { rmEventProcCPUMax; @Metric("# of Containers assigned in the last second") MutableGaugeInt containerAssignedPerSecond; + @Metric("# of rm dispatcher event queue size") + MutableGaugeInt rmDispatcherEventQueueSize; + @Metric("# of scheduler dispatcher event queue size") + MutableGaugeInt schedulerDispatcherEventQueueSize; private boolean rmEventProcMonitorEnable = false; @@ -356,4 +360,20 @@ public class ClusterMetrics { private ScheduledThreadPoolExecutor getAssignCounterExecutor(){ return assignCounterExecutor; } + + public int getRmEventQueueSize() { + return rmDispatcherEventQueueSize.value(); + } + + public void setRmEventQueueSize(int rmEventQueueSize) { + this.rmDispatcherEventQueueSize.set(rmEventQueueSize); + } + + public int getSchedulerEventQueueSize() { + return schedulerDispatcherEventQueueSize.value(); + } + + public void setSchedulerEventQueueSize(int schedulerEventQueueSize) { + this.schedulerDispatcherEventQueueSize.set(schedulerEventQueueSize); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java index b0dc218f3dd..a813a8524e6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.resourcemanager; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import com.sun.jersey.spi.container.servlet.ServletContainer; @@ -152,6 +153,8 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; /** @@ -715,6 +718,7 @@ public class ResourceManager extends CompositeService private boolean fromActive = false; private StandByTransitionRunnable standByTransitionRunnable; private RMNMInfo rmnmInfo; + private ScheduledThreadPoolExecutor eventQueueMetricExecutor; RMActiveServices(ResourceManager rm) { super("RMActiveServices"); @@ -937,6 +941,23 @@ public class ResourceManager extends CompositeService addIfService(volumeManager); } + eventQueueMetricExecutor = new ScheduledThreadPoolExecutor(1, + new ThreadFactoryBuilder(). + setDaemon(true).setNameFormat("EventQueueSizeMetricThread"). + build()); + eventQueueMetricExecutor.scheduleAtFixedRate(new Runnable() { + @Override + public void run() { + int rmEventQueueSize = ((AsyncDispatcher)getRMContext(). + getDispatcher()).getEventQueueSize(); + ClusterMetrics.getMetrics().setRmEventQueueSize(rmEventQueueSize); + int schedulerEventQueueSize = ((EventDispatcher)schedulerDispatcher). + getEventQueueSize(); + ClusterMetrics.getMetrics(). + setSchedulerEventQueueSize(schedulerEventQueueSize); + } + }, 1, 1, TimeUnit.SECONDS); + super.serviceInit(conf); } @@ -1012,6 +1033,9 @@ public class ResourceManager extends CompositeService LOG.error("Error closing store.", e); } } + if (eventQueueMetricExecutor != null) { + eventQueueMetricExecutor.shutdownNow(); + } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java index 3ce4f2b5185..c9922964ff9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java @@ -205,6 +205,8 @@ public class MetricsOverviewTable extends HtmlBlock { SchedulerInfo schedulerInfo = new SchedulerInfo(this.rm); int schedBusy = clusterMetrics.getRmSchedulerBusyPercent(); + int rmEventQueueSize = clusterMetrics.getRmEventQueueSize(); + int schedulerEventQueueSize = clusterMetrics.getSchedulerEventQueueSize(); div.h3("Scheduler Metrics"). table("#schedulermetricsoverview"). @@ -217,6 +219,10 @@ public class MetricsOverviewTable extends HtmlBlock { th().$class("ui-state-default") .__("Maximum Cluster Application Priority").__(). th().$class("ui-state-default").__("Scheduler Busy %").__(). + th().$class("ui-state-default") + .__("RM Dispatcher EventQueue Size").__(). + th().$class("ui-state-default") + .__("Scheduler Dispatcher EventQueue Size").__(). __(). __(). tbody().$class("ui-widget-content"). @@ -228,6 +234,8 @@ public class MetricsOverviewTable extends HtmlBlock { td(schedulerInfo.getMaxAllocation().toString()). td(String.valueOf(schedulerInfo.getMaxClusterLevelAppPriority())). td(schedBusy == -1 ? UNAVAILABLE : String.valueOf(schedBusy)). + td(String.valueOf(rmEventQueueSize)). + td(String.valueOf(schedulerEventQueueSize)). __(). __().__(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterMetricsInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterMetricsInfo.java index 7dc2d8ac1eb..e188fa05268 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterMetricsInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterMetricsInfo.java @@ -83,6 +83,9 @@ public class ClusterMetricsInfo { private boolean crossPartitionMetricsAvailable = false; + private int rmEventQueueSize; + private int schedulerEventQueueSize; + public ClusterMetricsInfo() { } // JAXB needs this @@ -162,6 +165,8 @@ public class ClusterMetricsInfo { + rebootedNodes + unhealthyNodes + decommissioningNodes + shutdownNodes; this.containerAssignedPerSecond = clusterMetrics .getContainerAssignedPerSecond(); + this.rmEventQueueSize = clusterMetrics.getRmEventQueueSize(); + this.schedulerEventQueueSize = clusterMetrics.getSchedulerEventQueueSize(); } public int getAppsSubmitted() { @@ -419,4 +424,12 @@ public class ClusterMetricsInfo { public int getContainerAssignedPerSecond() { return this.containerAssignedPerSecond; } + + public int getRmEventQueueSize() { + return rmEventQueueSize; + } + + public int getSchedulerEventQueueSize() { + return schedulerEventQueueSize; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java index dd271fd34d7..891c8d6c325 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java @@ -52,7 +52,7 @@ public class TestNodesPage { // Number of Actual Table Headers for NodesPage.NodesBlock might change in // future. In that case this value should be adjusted to the new value. - private final int numberOfThInMetricsTable = 23; + private final int numberOfThInMetricsTable = 25; private final int numberOfActualTableHeaders = 16; private final int numberOfThForOpportunisticContainers = 4; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java index b651c7959a4..673fbbe2ec0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java @@ -474,7 +474,7 @@ public class TestRMWebServices extends JerseyTestBase { Exception { assertEquals("incorrect number of elements", 1, json.length()); JSONObject clusterinfo = json.getJSONObject("clusterMetrics"); - assertEquals("incorrect number of elements", 33, clusterinfo.length()); + assertEquals("incorrect number of elements", 35, clusterinfo.length()); verifyClusterMetrics( clusterinfo.getInt("appsSubmitted"), clusterinfo.getInt("appsCompleted"), clusterinfo.getInt("reservedMB"), clusterinfo.getInt("availableMB"),