From cfce39023d6e77fb81393beba46ce2573361c011 Mon Sep 17 00:00:00 2001 From: Tao Yang Date: Sat, 7 Sep 2019 07:52:39 +0800 Subject: [PATCH] YARN-9795. ClusterMetrics to include AM allocation delay. Contributed by Fengnan Li. --- .../yarn/server/resourcemanager/ClusterMetrics.java | 9 +++++++++ .../resourcemanager/rmapp/attempt/RMAppAttemptImpl.java | 8 ++++++++ .../yarn/server/resourcemanager/TestClusterMetrics.java | 8 +++++++- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java index 5917b99dc8c..ba5fc4084fe 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java @@ -48,6 +48,8 @@ public class ClusterMetrics { @Metric("# of Shutdown NMs") MutableGaugeInt numShutdownNMs; @Metric("AM container launch delay") MutableRate aMLaunchDelay; @Metric("AM register delay") MutableRate aMRegisterDelay; + @Metric("AM container allocation delay") + private MutableRate aMContainerAllocationDelay; private static final MetricsInfo RECORD_INFO = info("ClusterMetrics", "Metrics for the Yarn Cluster"); @@ -190,4 +192,11 @@ public class ClusterMetrics { aMRegisterDelay.add(delay); } + public void addAMContainerAllocationDelay(long delay) { + aMContainerAllocationDelay.add(delay); + } + + public MutableRate getAMContainerAllocationDelay() { + return aMContainerAllocationDelay; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index ef854088025..862c43ad43c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -176,6 +176,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { private long finishTime = 0; private long launchAMStartTime = 0; private long launchAMEndTime = 0; + private long scheduledTime = 0; + private long containerAllocatedTime = 0; // Set to null initially. Will eventually get set // if an RMAppAttemptUnregistrationEvent occurs @@ -1170,6 +1172,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { && amContainerAllocation.getContainers() != null) { assert (amContainerAllocation.getContainers().size() == 0); } + appAttempt.scheduledTime = System.currentTimeMillis(); return RMAppAttemptState.SCHEDULED; } else { // save state and then go to LAUNCHED state @@ -1226,6 +1229,11 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { .clearNodeSetForAttempt(appAttempt.applicationAttemptId); appAttempt.getSubmissionContext().setResource( appAttempt.getMasterContainer().getResource()); + appAttempt.containerAllocatedTime = System.currentTimeMillis(); + long allocationDelay = + appAttempt.containerAllocatedTime - appAttempt.scheduledTime; + ClusterMetrics.getMetrics().addAMContainerAllocationDelay( + allocationDelay); appAttempt.storeAttempt(); return RMAppAttemptState.ALLOCATED_SAVING; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClusterMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClusterMetrics.java index 5b9105f84c7..d81e27860ec 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClusterMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClusterMetrics.java @@ -29,17 +29,23 @@ public class TestClusterMetrics { private ClusterMetrics metrics; /** - * Test aMLaunchDelay and aMRegisterDelay Metrics + * Test below metrics + * - aMLaunchDelay + * - aMRegisterDelay + * - aMContainerAllocationDelay */ @Test public void testAmMetrics() throws Exception { assert(metrics != null); Assert.assertTrue(!metrics.aMLaunchDelay.changed()); Assert.assertTrue(!metrics.aMRegisterDelay.changed()); + Assert.assertTrue(!metrics.getAMContainerAllocationDelay().changed()); metrics.addAMLaunchDelay(1); metrics.addAMRegisterDelay(1); + metrics.addAMContainerAllocationDelay(1); Assert.assertTrue(metrics.aMLaunchDelay.changed()); Assert.assertTrue(metrics.aMRegisterDelay.changed()); + Assert.assertTrue(metrics.getAMContainerAllocationDelay().changed()); } @Before