YARN-4983. JVM and UGI metrics disappear after RM transitioned to standby mode

This commit is contained in:
Jian He 2016-04-26 21:00:17 -07:00
parent ea5475d1c1
commit 4beff01354
4 changed files with 111 additions and 8 deletions

View File

@ -86,6 +86,10 @@ public class JvmMetrics implements MetricsSource {
new JvmMetrics(processName, sessionId));
}
public static void reattach(MetricsSystem ms, JvmMetrics jvmMetrics) {
ms.register(JvmMetrics.name(), JvmMetrics.description(), jvmMetrics);
}
public static JvmMetrics initSingleton(String processName, String sessionId) {
return Singleton.INSTANCE.init(processName, sessionId);
}

View File

@ -126,6 +126,10 @@ public class UserGroupInformation {
return DefaultMetricsSystem.instance().register(new UgiMetrics());
}
static void reattach() {
metrics = UgiMetrics.create();
}
void addGetGroups(long latency) {
getGroups.add(latency);
if (getGroupsQuantiles != null) {
@ -238,6 +242,13 @@ public class UserGroupInformation {
}
}
/**
* Reattach the class's metrics to a new metric system.
*/
public static void reattachMetrics() {
UgiMetrics.reattach();
}
/** Metrics to track UGI activity */
static UgiMetrics metrics = UgiMetrics.create();
/** The auth method to use */

View File

@ -30,6 +30,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ha.HAServiceProtocol;
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.http.lib.StaticUserWebFilter;
import org.apache.hadoop.metrics2.MetricsSystem;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.source.JvmMetrics;
import org.apache.hadoop.security.AuthenticationFilterInitializer;
@ -173,7 +174,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
private WebApp webApp;
private AppReportFetcher fetcher = null;
protected ResourceTrackerService resourceTracker;
private JvmPauseMonitor pauseMonitor;
private JvmMetrics jvmMetrics;
private boolean curatorEnabled = false;
private CuratorFramework curator;
private final String zkRootNodePassword =
@ -286,7 +287,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
rmContext.setYarnConfiguration(conf);
createAndInitActiveServices();
createAndInitActiveServices(false);
webAppAddress = WebAppUtils.getWebAppBindURL(this.conf,
YarnConfiguration.RM_BIND_HOST,
@ -491,6 +492,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
private ContainerAllocationExpirer containerAllocationExpirer;
private ResourceManager rm;
private RMActiveServiceContext activeServiceContext;
private boolean fromActive = false;
RMActiveServices(ResourceManager rm) {
super("RMActiveServices");
@ -598,11 +600,17 @@ public class ResourceManager extends CompositeService implements Recoverable {
addService(resourceTracker);
rmContext.setResourceTrackerService(resourceTracker);
DefaultMetricsSystem.initialize("ResourceManager");
JvmMetrics jm = JvmMetrics.initSingleton("ResourceManager", null);
pauseMonitor = new JvmPauseMonitor();
MetricsSystem ms = DefaultMetricsSystem.initialize("ResourceManager");
if (fromActive) {
JvmMetrics.reattach(ms, jvmMetrics);
UserGroupInformation.reattachMetrics();
} else {
jvmMetrics = JvmMetrics.initSingleton("ResourceManager", null);
}
JvmPauseMonitor pauseMonitor = new JvmPauseMonitor();
addService(pauseMonitor);
jm.setPauseMonitor(pauseMonitor);
jvmMetrics.setPauseMonitor(pauseMonitor);
// Initialize the Reservation system
if (conf.getBoolean(YarnConfiguration.RM_RESERVATION_SYSTEM_ENABLE,
@ -982,9 +990,13 @@ public class ResourceManager extends CompositeService implements Recoverable {
/**
* Helper method to create and init {@link #activeServices}. This creates an
* instance of {@link RMActiveServices} and initializes it.
*
* @param fromActive Indicates if the call is from the active state transition
* or the RM initialization.
*/
protected void createAndInitActiveServices() {
protected void createAndInitActiveServices(boolean fromActive) {
activeServices = new RMActiveServices(this);
activeServices.fromActive = fromActive;
activeServices.init(conf);
}
@ -1015,7 +1027,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
QueueMetrics.clearQueueMetrics();
if (initialize) {
resetDispatcher();
createAndInitActiveServices();
createAndInitActiveServices(true);
}
}

View File

@ -0,0 +1,76 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.conf.HAUtil;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.junit.Before;
import org.junit.Test;
import static junit.framework.TestCase.assertNotNull;
/**
* Metrics related RM HA testing. Metrics are mostly static singletons. To
* avoid interference with other RM HA tests, separating metric tests for RM HA
* into a separate file temporarily.
*/
public class TestRMHAMetrics {
private Configuration configuration;
private static final String RM1_ADDRESS = "1.1.1.1:1";
private static final String RM1_NODE_ID = "rm1";
private static final String RM2_ADDRESS = "0.0.0.0:0";
private static final String RM2_NODE_ID = "rm2";
@Before
public void setUp() throws Exception {
configuration = new Configuration();
UserGroupInformation.setConfiguration(configuration);
configuration.setBoolean(YarnConfiguration.RM_HA_ENABLED, true);
configuration.set(YarnConfiguration.RM_HA_IDS, RM1_NODE_ID + ","
+ RM2_NODE_ID);
for (String confKey : YarnConfiguration
.getServiceAddressConfKeys(configuration)) {
configuration.set(HAUtil.addSuffix(confKey, RM1_NODE_ID), RM1_ADDRESS);
configuration.set(HAUtil.addSuffix(confKey, RM2_NODE_ID), RM2_ADDRESS);
}
ClusterMetrics.destroy();
QueueMetrics.clearQueueMetrics();
DefaultMetricsSystem.shutdown();
}
@Test(timeout = 300000)
public void testMetricsAfterTransitionToStandby() throws Exception {
configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
Configuration conf = new YarnConfiguration(configuration);
MockRM rm = new MockRM(conf);
rm.init(conf);
rm.start();
rm.transitionToActive();
rm.transitionToStandby(true);
assertNotNull(DefaultMetricsSystem.instance().getSource("JvmMetrics"));
assertNotNull(DefaultMetricsSystem.instance().getSource("UgiMetrics"));
rm.stop();
}
}