YARN-4983. JVM and UGI metrics disappear after RM transitioned to standby mode

(cherry picked from commit 4beff01354)
This commit is contained in:
Jian He 2016-04-26 21:00:17 -07:00
parent dba737f1e5
commit 9d3ddb0b4d
4 changed files with 111 additions and 8 deletions

View File

@ -86,6 +86,10 @@ public class JvmMetrics implements MetricsSource {
new JvmMetrics(processName, sessionId)); new JvmMetrics(processName, sessionId));
} }
public static void reattach(MetricsSystem ms, JvmMetrics jvmMetrics) {
ms.register(JvmMetrics.name(), JvmMetrics.description(), jvmMetrics);
}
public static JvmMetrics initSingleton(String processName, String sessionId) { public static JvmMetrics initSingleton(String processName, String sessionId) {
return Singleton.INSTANCE.init(processName, sessionId); return Singleton.INSTANCE.init(processName, sessionId);
} }

View File

@ -124,6 +124,10 @@ public class UserGroupInformation {
return DefaultMetricsSystem.instance().register(new UgiMetrics()); return DefaultMetricsSystem.instance().register(new UgiMetrics());
} }
static void reattach() {
metrics = UgiMetrics.create();
}
void addGetGroups(long latency) { void addGetGroups(long latency) {
getGroups.add(latency); getGroups.add(latency);
if (getGroupsQuantiles != null) { if (getGroupsQuantiles != null) {
@ -236,6 +240,13 @@ public class UserGroupInformation {
} }
} }
/**
* Reattach the class's metrics to a new metric system.
*/
public static void reattachMetrics() {
UgiMetrics.reattach();
}
/** Metrics to track UGI activity */ /** Metrics to track UGI activity */
static UgiMetrics metrics = UgiMetrics.create(); static UgiMetrics metrics = UgiMetrics.create();
/** The auth method to use */ /** The auth method to use */

View File

@ -30,6 +30,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ha.HAServiceProtocol; import org.apache.hadoop.ha.HAServiceProtocol;
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.http.lib.StaticUserWebFilter; import org.apache.hadoop.http.lib.StaticUserWebFilter;
import org.apache.hadoop.metrics2.MetricsSystem;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.source.JvmMetrics; import org.apache.hadoop.metrics2.source.JvmMetrics;
import org.apache.hadoop.security.AuthenticationFilterInitializer; import org.apache.hadoop.security.AuthenticationFilterInitializer;
@ -170,7 +171,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
private WebApp webApp; private WebApp webApp;
private AppReportFetcher fetcher = null; private AppReportFetcher fetcher = null;
protected ResourceTrackerService resourceTracker; protected ResourceTrackerService resourceTracker;
private JvmPauseMonitor pauseMonitor; private JvmMetrics jvmMetrics;
private boolean curatorEnabled = false; private boolean curatorEnabled = false;
private CuratorFramework curator; private CuratorFramework curator;
private final String zkRootNodePassword = private final String zkRootNodePassword =
@ -283,7 +284,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
rmContext.setYarnConfiguration(conf); rmContext.setYarnConfiguration(conf);
createAndInitActiveServices(); createAndInitActiveServices(false);
webAppAddress = WebAppUtils.getWebAppBindURL(this.conf, webAppAddress = WebAppUtils.getWebAppBindURL(this.conf,
YarnConfiguration.RM_BIND_HOST, YarnConfiguration.RM_BIND_HOST,
@ -488,6 +489,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
private ContainerAllocationExpirer containerAllocationExpirer; private ContainerAllocationExpirer containerAllocationExpirer;
private ResourceManager rm; private ResourceManager rm;
private RMActiveServiceContext activeServiceContext; private RMActiveServiceContext activeServiceContext;
private boolean fromActive = false;
RMActiveServices(ResourceManager rm) { RMActiveServices(ResourceManager rm) {
super("RMActiveServices"); super("RMActiveServices");
@ -595,11 +597,17 @@ public class ResourceManager extends CompositeService implements Recoverable {
addService(resourceTracker); addService(resourceTracker);
rmContext.setResourceTrackerService(resourceTracker); rmContext.setResourceTrackerService(resourceTracker);
DefaultMetricsSystem.initialize("ResourceManager"); MetricsSystem ms = DefaultMetricsSystem.initialize("ResourceManager");
JvmMetrics jm = JvmMetrics.initSingleton("ResourceManager", null); if (fromActive) {
pauseMonitor = new JvmPauseMonitor(); JvmMetrics.reattach(ms, jvmMetrics);
UserGroupInformation.reattachMetrics();
} else {
jvmMetrics = JvmMetrics.initSingleton("ResourceManager", null);
}
JvmPauseMonitor pauseMonitor = new JvmPauseMonitor();
addService(pauseMonitor); addService(pauseMonitor);
jm.setPauseMonitor(pauseMonitor); jvmMetrics.setPauseMonitor(pauseMonitor);
// Initialize the Reservation system // Initialize the Reservation system
if (conf.getBoolean(YarnConfiguration.RM_RESERVATION_SYSTEM_ENABLE, if (conf.getBoolean(YarnConfiguration.RM_RESERVATION_SYSTEM_ENABLE,
@ -1081,9 +1089,13 @@ public class ResourceManager extends CompositeService implements Recoverable {
/** /**
* Helper method to create and init {@link #activeServices}. This creates an * Helper method to create and init {@link #activeServices}. This creates an
* instance of {@link RMActiveServices} and initializes it. * instance of {@link RMActiveServices} and initializes it.
*
* @param fromActive Indicates if the call is from the active state transition
* or the RM initialization.
*/ */
protected void createAndInitActiveServices() { protected void createAndInitActiveServices(boolean fromActive) {
activeServices = new RMActiveServices(this); activeServices = new RMActiveServices(this);
activeServices.fromActive = fromActive;
activeServices.init(conf); activeServices.init(conf);
} }
@ -1114,7 +1126,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
QueueMetrics.clearQueueMetrics(); QueueMetrics.clearQueueMetrics();
if (initialize) { if (initialize) {
resetDispatcher(); resetDispatcher();
createAndInitActiveServices(); createAndInitActiveServices(true);
} }
} }

View File

@ -0,0 +1,76 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.conf.HAUtil;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.junit.Before;
import org.junit.Test;
import static junit.framework.TestCase.assertNotNull;
/**
* Metrics related RM HA testing. Metrics are mostly static singletons. To
* avoid interference with other RM HA tests, separating metric tests for RM HA
* into a separate file temporarily.
*/
public class TestRMHAMetrics {
private Configuration configuration;
private static final String RM1_ADDRESS = "1.1.1.1:1";
private static final String RM1_NODE_ID = "rm1";
private static final String RM2_ADDRESS = "0.0.0.0:0";
private static final String RM2_NODE_ID = "rm2";
@Before
public void setUp() throws Exception {
configuration = new Configuration();
UserGroupInformation.setConfiguration(configuration);
configuration.setBoolean(YarnConfiguration.RM_HA_ENABLED, true);
configuration.set(YarnConfiguration.RM_HA_IDS, RM1_NODE_ID + ","
+ RM2_NODE_ID);
for (String confKey : YarnConfiguration
.getServiceAddressConfKeys(configuration)) {
configuration.set(HAUtil.addSuffix(confKey, RM1_NODE_ID), RM1_ADDRESS);
configuration.set(HAUtil.addSuffix(confKey, RM2_NODE_ID), RM2_ADDRESS);
}
ClusterMetrics.destroy();
QueueMetrics.clearQueueMetrics();
DefaultMetricsSystem.shutdown();
}
@Test(timeout = 300000)
public void testMetricsAfterTransitionToStandby() throws Exception {
configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
Configuration conf = new YarnConfiguration(configuration);
MockRM rm = new MockRM(conf);
rm.init(conf);
rm.start();
rm.transitionToActive();
rm.transitionToStandby(true);
assertNotNull(DefaultMetricsSystem.instance().getSource("JvmMetrics"));
assertNotNull(DefaultMetricsSystem.instance().getSource("UgiMetrics"));
rm.stop();
}
}