YARN-2802. ClusterMetrics to include AM launch and register delays. (Zhihai Xu via kasha)
(cherry picked from commit f9f8e750edbe6db453f0a845e2ed49ede66e0e8a)
This commit is contained in:
parent
db31ef7e7f
commit
564deb75a8
|
@ -51,6 +51,9 @@ Release 2.7.0 - UNRELEASED
|
||||||
YARN-2157. Added YARN metrics in the documentaion. (Akira AJISAKA via
|
YARN-2157. Added YARN metrics in the documentaion. (Akira AJISAKA via
|
||||||
jianhe)
|
jianhe)
|
||||||
|
|
||||||
|
YARN-2802. ClusterMetrics to include AM launch and register delays.
|
||||||
|
(Zhihai Xu via kasha)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.hadoop.metrics2.annotation.Metrics;
|
||||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||||
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
|
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
|
||||||
import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
|
import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
|
||||||
|
import org.apache.hadoop.metrics2.lib.MutableRate;
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
|
@ -43,7 +44,9 @@ public class ClusterMetrics {
|
||||||
@Metric("# of lost NMs") MutableGaugeInt numLostNMs;
|
@Metric("# of lost NMs") MutableGaugeInt numLostNMs;
|
||||||
@Metric("# of unhealthy NMs") MutableGaugeInt numUnhealthyNMs;
|
@Metric("# of unhealthy NMs") MutableGaugeInt numUnhealthyNMs;
|
||||||
@Metric("# of Rebooted NMs") MutableGaugeInt numRebootedNMs;
|
@Metric("# of Rebooted NMs") MutableGaugeInt numRebootedNMs;
|
||||||
|
@Metric("AM container launch delay") MutableRate aMLaunchDelay;
|
||||||
|
@Metric("AM register delay") MutableRate aMRegisterDelay;
|
||||||
|
|
||||||
private static final MetricsInfo RECORD_INFO = info("ClusterMetrics",
|
private static final MetricsInfo RECORD_INFO = info("ClusterMetrics",
|
||||||
"Metrics for the Yarn Cluster");
|
"Metrics for the Yarn Cluster");
|
||||||
|
|
||||||
|
@ -147,4 +150,12 @@ public class ClusterMetrics {
|
||||||
numActiveNMs.decr();
|
numActiveNMs.decr();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void addAMLaunchDelay(long delay) {
|
||||||
|
aMLaunchDelay.add(delay);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addAMRegisterDelay(long delay) {
|
||||||
|
aMRegisterDelay.add(delay);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -65,6 +65,7 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||||
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
|
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
|
||||||
import org.apache.hadoop.yarn.security.client.ClientToAMTokenIdentifier;
|
import org.apache.hadoop.yarn.security.client.ClientToAMTokenIdentifier;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService;
|
import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.ClusterMetrics;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils;
|
import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEvent;
|
||||||
|
@ -152,8 +153,10 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
||||||
private String proxiedTrackingUrl = "N/A";
|
private String proxiedTrackingUrl = "N/A";
|
||||||
private long startTime = 0;
|
private long startTime = 0;
|
||||||
private long finishTime = 0;
|
private long finishTime = 0;
|
||||||
|
private long launchAMStartTime = 0;
|
||||||
|
private long launchAMEndTime = 0;
|
||||||
|
|
||||||
// Set to null initially. Will eventually get set
|
// Set to null initially. Will eventually get set
|
||||||
// if an RMAppAttemptUnregistrationEvent occurs
|
// if an RMAppAttemptUnregistrationEvent occurs
|
||||||
private FinalApplicationStatus finalStatus = null;
|
private FinalApplicationStatus finalStatus = null;
|
||||||
private final StringBuilder diagnostics = new StringBuilder();
|
private final StringBuilder diagnostics = new StringBuilder();
|
||||||
|
@ -1261,6 +1264,12 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
||||||
@Override
|
@Override
|
||||||
public void transition(RMAppAttemptImpl appAttempt,
|
public void transition(RMAppAttemptImpl appAttempt,
|
||||||
RMAppAttemptEvent event) {
|
RMAppAttemptEvent event) {
|
||||||
|
if (event.getType() == RMAppAttemptEventType.LAUNCHED) {
|
||||||
|
appAttempt.launchAMEndTime = System.currentTimeMillis();
|
||||||
|
long delay = appAttempt.launchAMEndTime -
|
||||||
|
appAttempt.launchAMStartTime;
|
||||||
|
ClusterMetrics.getMetrics().addAMLaunchDelay(delay);
|
||||||
|
}
|
||||||
// Register with AMLivelinessMonitor
|
// Register with AMLivelinessMonitor
|
||||||
appAttempt.attemptLaunched();
|
appAttempt.attemptLaunched();
|
||||||
|
|
||||||
|
@ -1345,7 +1354,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
||||||
@Override
|
@Override
|
||||||
public void transition(RMAppAttemptImpl appAttempt,
|
public void transition(RMAppAttemptImpl appAttempt,
|
||||||
RMAppAttemptEvent event) {
|
RMAppAttemptEvent event) {
|
||||||
|
long delay = System.currentTimeMillis() - appAttempt.launchAMEndTime;
|
||||||
|
ClusterMetrics.getMetrics().addAMRegisterDelay(delay);
|
||||||
RMAppAttemptRegistrationEvent registrationEvent
|
RMAppAttemptRegistrationEvent registrationEvent
|
||||||
= (RMAppAttemptRegistrationEvent) event;
|
= (RMAppAttemptRegistrationEvent) event;
|
||||||
appAttempt.host = registrationEvent.getHost();
|
appAttempt.host = registrationEvent.getHost();
|
||||||
|
@ -1822,6 +1832,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void launchAttempt(){
|
private void launchAttempt(){
|
||||||
|
launchAMStartTime = System.currentTimeMillis();
|
||||||
// Send event to launch the AM Container
|
// Send event to launch the AM Container
|
||||||
eventHandler.handle(new AMLauncherEvent(AMLauncherEventType.LAUNCH, this));
|
eventHandler.handle(new AMLauncherEvent(AMLauncherEventType.LAUNCH, this));
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,60 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager;
|
||||||
|
|
||||||
|
import org.apache.hadoop.metrics2.MetricsSystem;
|
||||||
|
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class TestClusterMetrics {
|
||||||
|
|
||||||
|
private ClusterMetrics metrics;
|
||||||
|
/**
|
||||||
|
* Test aMLaunchDelay and aMRegisterDelay Metrics
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testAmMetrics() throws Exception {
|
||||||
|
assert(metrics != null);
|
||||||
|
Assert.assertTrue(!metrics.aMLaunchDelay.changed());
|
||||||
|
Assert.assertTrue(!metrics.aMRegisterDelay.changed());
|
||||||
|
metrics.addAMLaunchDelay(1);
|
||||||
|
metrics.addAMRegisterDelay(1);
|
||||||
|
Assert.assertTrue(metrics.aMLaunchDelay.changed());
|
||||||
|
Assert.assertTrue(metrics.aMRegisterDelay.changed());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setup() {
|
||||||
|
DefaultMetricsSystem.initialize("ResourceManager");
|
||||||
|
metrics = ClusterMetrics.getMetrics();
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void tearDown() {
|
||||||
|
ClusterMetrics.destroy();
|
||||||
|
|
||||||
|
MetricsSystem ms = DefaultMetricsSystem.instance();
|
||||||
|
if (ms.getSource("ClusterMetrics") != null) {
|
||||||
|
DefaultMetricsSystem.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue