YARN-4983. JVM and UGI metrics disappear after RM transitioned to standby mode
(cherry picked from commit 4beff01354
)
This commit is contained in:
parent
dba737f1e5
commit
9d3ddb0b4d
|
@ -86,6 +86,10 @@ public class JvmMetrics implements MetricsSource {
|
||||||
new JvmMetrics(processName, sessionId));
|
new JvmMetrics(processName, sessionId));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void reattach(MetricsSystem ms, JvmMetrics jvmMetrics) {
|
||||||
|
ms.register(JvmMetrics.name(), JvmMetrics.description(), jvmMetrics);
|
||||||
|
}
|
||||||
|
|
||||||
public static JvmMetrics initSingleton(String processName, String sessionId) {
|
public static JvmMetrics initSingleton(String processName, String sessionId) {
|
||||||
return Singleton.INSTANCE.init(processName, sessionId);
|
return Singleton.INSTANCE.init(processName, sessionId);
|
||||||
}
|
}
|
||||||
|
|
|
@ -124,6 +124,10 @@ public class UserGroupInformation {
|
||||||
return DefaultMetricsSystem.instance().register(new UgiMetrics());
|
return DefaultMetricsSystem.instance().register(new UgiMetrics());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void reattach() {
|
||||||
|
metrics = UgiMetrics.create();
|
||||||
|
}
|
||||||
|
|
||||||
void addGetGroups(long latency) {
|
void addGetGroups(long latency) {
|
||||||
getGroups.add(latency);
|
getGroups.add(latency);
|
||||||
if (getGroupsQuantiles != null) {
|
if (getGroupsQuantiles != null) {
|
||||||
|
@ -236,6 +240,13 @@ public class UserGroupInformation {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reattach the class's metrics to a new metric system.
|
||||||
|
*/
|
||||||
|
public static void reattachMetrics() {
|
||||||
|
UgiMetrics.reattach();
|
||||||
|
}
|
||||||
|
|
||||||
/** Metrics to track UGI activity */
|
/** Metrics to track UGI activity */
|
||||||
static UgiMetrics metrics = UgiMetrics.create();
|
static UgiMetrics metrics = UgiMetrics.create();
|
||||||
/** The auth method to use */
|
/** The auth method to use */
|
||||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.ha.HAServiceProtocol;
|
import org.apache.hadoop.ha.HAServiceProtocol;
|
||||||
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
|
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
|
||||||
import org.apache.hadoop.http.lib.StaticUserWebFilter;
|
import org.apache.hadoop.http.lib.StaticUserWebFilter;
|
||||||
|
import org.apache.hadoop.metrics2.MetricsSystem;
|
||||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||||
import org.apache.hadoop.metrics2.source.JvmMetrics;
|
import org.apache.hadoop.metrics2.source.JvmMetrics;
|
||||||
import org.apache.hadoop.security.AuthenticationFilterInitializer;
|
import org.apache.hadoop.security.AuthenticationFilterInitializer;
|
||||||
|
@ -170,7 +171,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
private WebApp webApp;
|
private WebApp webApp;
|
||||||
private AppReportFetcher fetcher = null;
|
private AppReportFetcher fetcher = null;
|
||||||
protected ResourceTrackerService resourceTracker;
|
protected ResourceTrackerService resourceTracker;
|
||||||
private JvmPauseMonitor pauseMonitor;
|
private JvmMetrics jvmMetrics;
|
||||||
private boolean curatorEnabled = false;
|
private boolean curatorEnabled = false;
|
||||||
private CuratorFramework curator;
|
private CuratorFramework curator;
|
||||||
private final String zkRootNodePassword =
|
private final String zkRootNodePassword =
|
||||||
|
@ -283,7 +284,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
|
|
||||||
rmContext.setYarnConfiguration(conf);
|
rmContext.setYarnConfiguration(conf);
|
||||||
|
|
||||||
createAndInitActiveServices();
|
createAndInitActiveServices(false);
|
||||||
|
|
||||||
webAppAddress = WebAppUtils.getWebAppBindURL(this.conf,
|
webAppAddress = WebAppUtils.getWebAppBindURL(this.conf,
|
||||||
YarnConfiguration.RM_BIND_HOST,
|
YarnConfiguration.RM_BIND_HOST,
|
||||||
|
@ -488,6 +489,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
private ContainerAllocationExpirer containerAllocationExpirer;
|
private ContainerAllocationExpirer containerAllocationExpirer;
|
||||||
private ResourceManager rm;
|
private ResourceManager rm;
|
||||||
private RMActiveServiceContext activeServiceContext;
|
private RMActiveServiceContext activeServiceContext;
|
||||||
|
private boolean fromActive = false;
|
||||||
|
|
||||||
RMActiveServices(ResourceManager rm) {
|
RMActiveServices(ResourceManager rm) {
|
||||||
super("RMActiveServices");
|
super("RMActiveServices");
|
||||||
|
@ -595,11 +597,17 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
addService(resourceTracker);
|
addService(resourceTracker);
|
||||||
rmContext.setResourceTrackerService(resourceTracker);
|
rmContext.setResourceTrackerService(resourceTracker);
|
||||||
|
|
||||||
DefaultMetricsSystem.initialize("ResourceManager");
|
MetricsSystem ms = DefaultMetricsSystem.initialize("ResourceManager");
|
||||||
JvmMetrics jm = JvmMetrics.initSingleton("ResourceManager", null);
|
if (fromActive) {
|
||||||
pauseMonitor = new JvmPauseMonitor();
|
JvmMetrics.reattach(ms, jvmMetrics);
|
||||||
|
UserGroupInformation.reattachMetrics();
|
||||||
|
} else {
|
||||||
|
jvmMetrics = JvmMetrics.initSingleton("ResourceManager", null);
|
||||||
|
}
|
||||||
|
|
||||||
|
JvmPauseMonitor pauseMonitor = new JvmPauseMonitor();
|
||||||
addService(pauseMonitor);
|
addService(pauseMonitor);
|
||||||
jm.setPauseMonitor(pauseMonitor);
|
jvmMetrics.setPauseMonitor(pauseMonitor);
|
||||||
|
|
||||||
// Initialize the Reservation system
|
// Initialize the Reservation system
|
||||||
if (conf.getBoolean(YarnConfiguration.RM_RESERVATION_SYSTEM_ENABLE,
|
if (conf.getBoolean(YarnConfiguration.RM_RESERVATION_SYSTEM_ENABLE,
|
||||||
|
@ -1081,9 +1089,13 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
/**
|
/**
|
||||||
* Helper method to create and init {@link #activeServices}. This creates an
|
* Helper method to create and init {@link #activeServices}. This creates an
|
||||||
* instance of {@link RMActiveServices} and initializes it.
|
* instance of {@link RMActiveServices} and initializes it.
|
||||||
|
*
|
||||||
|
* @param fromActive Indicates if the call is from the active state transition
|
||||||
|
* or the RM initialization.
|
||||||
*/
|
*/
|
||||||
protected void createAndInitActiveServices() {
|
protected void createAndInitActiveServices(boolean fromActive) {
|
||||||
activeServices = new RMActiveServices(this);
|
activeServices = new RMActiveServices(this);
|
||||||
|
activeServices.fromActive = fromActive;
|
||||||
activeServices.init(conf);
|
activeServices.init(conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1114,7 +1126,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
QueueMetrics.clearQueueMetrics();
|
QueueMetrics.clearQueueMetrics();
|
||||||
if (initialize) {
|
if (initialize) {
|
||||||
resetDispatcher();
|
resetDispatcher();
|
||||||
createAndInitActiveServices();
|
createAndInitActiveServices(true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,76 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||||
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
|
import org.apache.hadoop.yarn.conf.HAUtil;
|
||||||
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import static junit.framework.TestCase.assertNotNull;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Metrics related RM HA testing. Metrics are mostly static singletons. To
|
||||||
|
* avoid interference with other RM HA tests, separating metric tests for RM HA
|
||||||
|
* into a separate file temporarily.
|
||||||
|
*/
|
||||||
|
public class TestRMHAMetrics {
|
||||||
|
private Configuration configuration;
|
||||||
|
|
||||||
|
private static final String RM1_ADDRESS = "1.1.1.1:1";
|
||||||
|
private static final String RM1_NODE_ID = "rm1";
|
||||||
|
|
||||||
|
private static final String RM2_ADDRESS = "0.0.0.0:0";
|
||||||
|
private static final String RM2_NODE_ID = "rm2";
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
configuration = new Configuration();
|
||||||
|
UserGroupInformation.setConfiguration(configuration);
|
||||||
|
configuration.setBoolean(YarnConfiguration.RM_HA_ENABLED, true);
|
||||||
|
configuration.set(YarnConfiguration.RM_HA_IDS, RM1_NODE_ID + ","
|
||||||
|
+ RM2_NODE_ID);
|
||||||
|
for (String confKey : YarnConfiguration
|
||||||
|
.getServiceAddressConfKeys(configuration)) {
|
||||||
|
configuration.set(HAUtil.addSuffix(confKey, RM1_NODE_ID), RM1_ADDRESS);
|
||||||
|
configuration.set(HAUtil.addSuffix(confKey, RM2_NODE_ID), RM2_ADDRESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
ClusterMetrics.destroy();
|
||||||
|
QueueMetrics.clearQueueMetrics();
|
||||||
|
DefaultMetricsSystem.shutdown();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 300000)
|
||||||
|
public void testMetricsAfterTransitionToStandby() throws Exception {
|
||||||
|
configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
|
||||||
|
Configuration conf = new YarnConfiguration(configuration);
|
||||||
|
MockRM rm = new MockRM(conf);
|
||||||
|
rm.init(conf);
|
||||||
|
rm.start();
|
||||||
|
rm.transitionToActive();
|
||||||
|
rm.transitionToStandby(true);
|
||||||
|
assertNotNull(DefaultMetricsSystem.instance().getSource("JvmMetrics"));
|
||||||
|
assertNotNull(DefaultMetricsSystem.instance().getSource("UgiMetrics"));
|
||||||
|
rm.stop();
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue