From 564deb75a864dfc5ef7b137eec100f35d0eccd7f Mon Sep 17 00:00:00 2001 From: Karthik Kambatla Date: Wed, 19 Nov 2014 19:49:36 -0800 Subject: [PATCH] YARN-2802. ClusterMetrics to include AM launch and register delays. (Zhihai Xu via kasha) (cherry picked from commit f9f8e750edbe6db453f0a845e2ed49ede66e0e8a) --- hadoop-yarn-project/CHANGES.txt | 3 + .../resourcemanager/ClusterMetrics.java | 13 +++- .../rmapp/attempt/RMAppAttemptImpl.java | 15 ++++- .../resourcemanager/TestClusterMetrics.java | 60 +++++++++++++++++++ 4 files changed, 88 insertions(+), 3 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClusterMetrics.java diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 3dd08b193de..c246b8ee753 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -51,6 +51,9 @@ Release 2.7.0 - UNRELEASED YARN-2157. Added YARN metrics in the documentaion. (Akira AJISAKA via jianhe) + YARN-2802. ClusterMetrics to include AM launch and register delays. + (Zhihai Xu via kasha) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java index 942ec811286..5fa36bc3d29 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java @@ -30,6 +30,7 @@ import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MetricsRegistry; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; +import org.apache.hadoop.metrics2.lib.MutableRate; import com.google.common.annotations.VisibleForTesting; @InterfaceAudience.Private @@ -43,7 +44,9 @@ public class ClusterMetrics { @Metric("# of lost NMs") MutableGaugeInt numLostNMs; @Metric("# of unhealthy NMs") MutableGaugeInt numUnhealthyNMs; @Metric("# of Rebooted NMs") MutableGaugeInt numRebootedNMs; - + @Metric("AM container launch delay") MutableRate aMLaunchDelay; + @Metric("AM register delay") MutableRate aMRegisterDelay; + private static final MetricsInfo RECORD_INFO = info("ClusterMetrics", "Metrics for the Yarn Cluster"); @@ -147,4 +150,12 @@ public class ClusterMetrics { numActiveNMs.decr(); } + public void addAMLaunchDelay(long delay) { + aMLaunchDelay.add(delay); + } + + public void addAMRegisterDelay(long delay) { + aMRegisterDelay.add(delay); + } + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 0d7e33451fd..a80167f9b2a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -65,6 +65,7 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.security.client.ClientToAMTokenIdentifier; import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService; +import org.apache.hadoop.yarn.server.resourcemanager.ClusterMetrics; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils; import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEvent; @@ -152,8 +153,10 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { private String proxiedTrackingUrl = "N/A"; private long startTime = 0; private long finishTime = 0; + private long launchAMStartTime = 0; + private long launchAMEndTime = 0; - // Set to null initially. Will eventually get set + // Set to null initially. Will eventually get set // if an RMAppAttemptUnregistrationEvent occurs private FinalApplicationStatus finalStatus = null; private final StringBuilder diagnostics = new StringBuilder(); @@ -1261,6 +1264,12 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { @Override public void transition(RMAppAttemptImpl appAttempt, RMAppAttemptEvent event) { + if (event.getType() == RMAppAttemptEventType.LAUNCHED) { + appAttempt.launchAMEndTime = System.currentTimeMillis(); + long delay = appAttempt.launchAMEndTime - + appAttempt.launchAMStartTime; + ClusterMetrics.getMetrics().addAMLaunchDelay(delay); + } // Register with AMLivelinessMonitor appAttempt.attemptLaunched(); @@ -1345,7 +1354,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { @Override public void transition(RMAppAttemptImpl appAttempt, RMAppAttemptEvent event) { - + long delay = System.currentTimeMillis() - appAttempt.launchAMEndTime; + ClusterMetrics.getMetrics().addAMRegisterDelay(delay); RMAppAttemptRegistrationEvent registrationEvent = (RMAppAttemptRegistrationEvent) event; appAttempt.host = registrationEvent.getHost(); @@ -1822,6 +1832,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { } private void launchAttempt(){ + launchAMStartTime = System.currentTimeMillis(); // Send event to launch the AM Container eventHandler.handle(new AMLauncherEvent(AMLauncherEventType.LAUNCH, this)); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClusterMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClusterMetrics.java new file mode 100644 index 00000000000..5b9105f84c7 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClusterMetrics.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager; + +import org.apache.hadoop.metrics2.MetricsSystem; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.junit.After; +import org.junit.Before; +import org.junit.Assert; +import org.junit.Test; + +public class TestClusterMetrics { + + private ClusterMetrics metrics; + /** + * Test aMLaunchDelay and aMRegisterDelay Metrics + */ + @Test + public void testAmMetrics() throws Exception { + assert(metrics != null); + Assert.assertTrue(!metrics.aMLaunchDelay.changed()); + Assert.assertTrue(!metrics.aMRegisterDelay.changed()); + metrics.addAMLaunchDelay(1); + metrics.addAMRegisterDelay(1); + Assert.assertTrue(metrics.aMLaunchDelay.changed()); + Assert.assertTrue(metrics.aMRegisterDelay.changed()); + } + + @Before + public void setup() { + DefaultMetricsSystem.initialize("ResourceManager"); + metrics = ClusterMetrics.getMetrics(); + } + + @After + public void tearDown() { + ClusterMetrics.destroy(); + + MetricsSystem ms = DefaultMetricsSystem.instance(); + if (ms.getSource("ClusterMetrics") != null) { + DefaultMetricsSystem.shutdown(); + } + } +}