YARN-2802. ClusterMetrics to include AM launch and register delays. (Zhihai Xu via kasha)

(cherry picked from commit f9f8e750edbe6db453f0a845e2ed49ede66e0e8a)
This commit is contained in:
Karthik Kambatla 2014-11-19 19:49:36 -08:00
parent db31ef7e7f
commit 564deb75a8
4 changed files with 88 additions and 3 deletions

View File

@ -51,6 +51,9 @@ Release 2.7.0 - UNRELEASED
YARN-2157. Added YARN metrics in the documentaion. (Akira AJISAKA via YARN-2157. Added YARN metrics in the documentaion. (Akira AJISAKA via
jianhe) jianhe)
YARN-2802. ClusterMetrics to include AM launch and register delays.
(Zhihai Xu via kasha)
OPTIMIZATIONS OPTIMIZATIONS
BUG FIXES BUG FIXES

View File

@ -30,6 +30,7 @@ import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.lib.MetricsRegistry; import org.apache.hadoop.metrics2.lib.MetricsRegistry;
import org.apache.hadoop.metrics2.lib.MutableGaugeInt; import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
import org.apache.hadoop.metrics2.lib.MutableRate;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
@InterfaceAudience.Private @InterfaceAudience.Private
@ -43,7 +44,9 @@ public class ClusterMetrics {
@Metric("# of lost NMs") MutableGaugeInt numLostNMs; @Metric("# of lost NMs") MutableGaugeInt numLostNMs;
@Metric("# of unhealthy NMs") MutableGaugeInt numUnhealthyNMs; @Metric("# of unhealthy NMs") MutableGaugeInt numUnhealthyNMs;
@Metric("# of Rebooted NMs") MutableGaugeInt numRebootedNMs; @Metric("# of Rebooted NMs") MutableGaugeInt numRebootedNMs;
@Metric("AM container launch delay") MutableRate aMLaunchDelay;
@Metric("AM register delay") MutableRate aMRegisterDelay;
private static final MetricsInfo RECORD_INFO = info("ClusterMetrics", private static final MetricsInfo RECORD_INFO = info("ClusterMetrics",
"Metrics for the Yarn Cluster"); "Metrics for the Yarn Cluster");
@ -147,4 +150,12 @@ public class ClusterMetrics {
numActiveNMs.decr(); numActiveNMs.decr();
} }
public void addAMLaunchDelay(long delay) {
aMLaunchDelay.add(delay);
}
public void addAMRegisterDelay(long delay) {
aMRegisterDelay.add(delay);
}
} }

View File

@ -65,6 +65,7 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
import org.apache.hadoop.yarn.security.client.ClientToAMTokenIdentifier; import org.apache.hadoop.yarn.security.client.ClientToAMTokenIdentifier;
import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService; import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService;
import org.apache.hadoop.yarn.server.resourcemanager.ClusterMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils; import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils;
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEvent; import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEvent;
@ -152,8 +153,10 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
private String proxiedTrackingUrl = "N/A"; private String proxiedTrackingUrl = "N/A";
private long startTime = 0; private long startTime = 0;
private long finishTime = 0; private long finishTime = 0;
private long launchAMStartTime = 0;
private long launchAMEndTime = 0;
// Set to null initially. Will eventually get set // Set to null initially. Will eventually get set
// if an RMAppAttemptUnregistrationEvent occurs // if an RMAppAttemptUnregistrationEvent occurs
private FinalApplicationStatus finalStatus = null; private FinalApplicationStatus finalStatus = null;
private final StringBuilder diagnostics = new StringBuilder(); private final StringBuilder diagnostics = new StringBuilder();
@ -1261,6 +1264,12 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
@Override @Override
public void transition(RMAppAttemptImpl appAttempt, public void transition(RMAppAttemptImpl appAttempt,
RMAppAttemptEvent event) { RMAppAttemptEvent event) {
if (event.getType() == RMAppAttemptEventType.LAUNCHED) {
appAttempt.launchAMEndTime = System.currentTimeMillis();
long delay = appAttempt.launchAMEndTime -
appAttempt.launchAMStartTime;
ClusterMetrics.getMetrics().addAMLaunchDelay(delay);
}
// Register with AMLivelinessMonitor // Register with AMLivelinessMonitor
appAttempt.attemptLaunched(); appAttempt.attemptLaunched();
@ -1345,7 +1354,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
@Override @Override
public void transition(RMAppAttemptImpl appAttempt, public void transition(RMAppAttemptImpl appAttempt,
RMAppAttemptEvent event) { RMAppAttemptEvent event) {
long delay = System.currentTimeMillis() - appAttempt.launchAMEndTime;
ClusterMetrics.getMetrics().addAMRegisterDelay(delay);
RMAppAttemptRegistrationEvent registrationEvent RMAppAttemptRegistrationEvent registrationEvent
= (RMAppAttemptRegistrationEvent) event; = (RMAppAttemptRegistrationEvent) event;
appAttempt.host = registrationEvent.getHost(); appAttempt.host = registrationEvent.getHost();
@ -1822,6 +1832,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
} }
private void launchAttempt(){ private void launchAttempt(){
launchAMStartTime = System.currentTimeMillis();
// Send event to launch the AM Container // Send event to launch the AM Container
eventHandler.handle(new AMLauncherEvent(AMLauncherEventType.LAUNCH, this)); eventHandler.handle(new AMLauncherEvent(AMLauncherEventType.LAUNCH, this));
} }

View File

@ -0,0 +1,60 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager;
import org.apache.hadoop.metrics2.MetricsSystem;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.junit.After;
import org.junit.Before;
import org.junit.Assert;
import org.junit.Test;
public class TestClusterMetrics {
private ClusterMetrics metrics;
/**
* Test aMLaunchDelay and aMRegisterDelay Metrics
*/
@Test
public void testAmMetrics() throws Exception {
assert(metrics != null);
Assert.assertTrue(!metrics.aMLaunchDelay.changed());
Assert.assertTrue(!metrics.aMRegisterDelay.changed());
metrics.addAMLaunchDelay(1);
metrics.addAMRegisterDelay(1);
Assert.assertTrue(metrics.aMLaunchDelay.changed());
Assert.assertTrue(metrics.aMRegisterDelay.changed());
}
@Before
public void setup() {
DefaultMetricsSystem.initialize("ResourceManager");
metrics = ClusterMetrics.getMetrics();
}
@After
public void tearDown() {
ClusterMetrics.destroy();
MetricsSystem ms = DefaultMetricsSystem.instance();
if (ms.getSource("ClusterMetrics") != null) {
DefaultMetricsSystem.shutdown();
}
}
}