From 9d3ddb0b4d7dfe92bec5c8b016448e2f64079bba Mon Sep 17 00:00:00 2001 From: Jian He Date: Tue, 26 Apr 2016 21:00:17 -0700 Subject: [PATCH] YARN-4983. JVM and UGI metrics disappear after RM transitioned to standby mode (cherry picked from commit 4beff013546dbc29c004a1bad1b019dc9d2b751f) --- .../hadoop/metrics2/source/JvmMetrics.java | 4 + .../hadoop/security/UserGroupInformation.java | 11 +++ .../resourcemanager/ResourceManager.java | 28 +++++-- .../resourcemanager/TestRMHAMetrics.java | 76 +++++++++++++++++++ 4 files changed, 111 insertions(+), 8 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHAMetrics.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetrics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetrics.java index a6de50ba38d..caba170507a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetrics.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/source/JvmMetrics.java @@ -86,6 +86,10 @@ public class JvmMetrics implements MetricsSource { new JvmMetrics(processName, sessionId)); } + public static void reattach(MetricsSystem ms, JvmMetrics jvmMetrics) { + ms.register(JvmMetrics.name(), JvmMetrics.description(), jvmMetrics); + } + public static JvmMetrics initSingleton(String processName, String sessionId) { return Singleton.INSTANCE.init(processName, sessionId); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java index 2ea80dd8902..f37ffb8bf28 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java @@ -124,6 +124,10 @@ public class UserGroupInformation { return DefaultMetricsSystem.instance().register(new UgiMetrics()); } + static void reattach() { + metrics = UgiMetrics.create(); + } + void addGetGroups(long latency) { getGroups.add(latency); if (getGroupsQuantiles != null) { @@ -236,6 +240,13 @@ public class UserGroupInformation { } } + /** + * Reattach the class's metrics to a new metric system. + */ + public static void reattachMetrics() { + UgiMetrics.reattach(); + } + /** Metrics to track UGI activity */ static UgiMetrics metrics = UgiMetrics.create(); /** The auth method to use */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java index 2744bb455ff..fec6d8ef8e9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java @@ -30,6 +30,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.ha.HAServiceProtocol; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.http.lib.StaticUserWebFilter; +import org.apache.hadoop.metrics2.MetricsSystem; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.source.JvmMetrics; import org.apache.hadoop.security.AuthenticationFilterInitializer; @@ -170,7 +171,7 @@ public class ResourceManager extends CompositeService implements Recoverable { private WebApp webApp; private AppReportFetcher fetcher = null; protected ResourceTrackerService resourceTracker; - private JvmPauseMonitor pauseMonitor; + private JvmMetrics jvmMetrics; private boolean curatorEnabled = false; private CuratorFramework curator; private final String zkRootNodePassword = @@ -283,7 +284,7 @@ public class ResourceManager extends CompositeService implements Recoverable { rmContext.setYarnConfiguration(conf); - createAndInitActiveServices(); + createAndInitActiveServices(false); webAppAddress = WebAppUtils.getWebAppBindURL(this.conf, YarnConfiguration.RM_BIND_HOST, @@ -488,6 +489,7 @@ public class ResourceManager extends CompositeService implements Recoverable { private ContainerAllocationExpirer containerAllocationExpirer; private ResourceManager rm; private RMActiveServiceContext activeServiceContext; + private boolean fromActive = false; RMActiveServices(ResourceManager rm) { super("RMActiveServices"); @@ -595,11 +597,17 @@ public class ResourceManager extends CompositeService implements Recoverable { addService(resourceTracker); rmContext.setResourceTrackerService(resourceTracker); - DefaultMetricsSystem.initialize("ResourceManager"); - JvmMetrics jm = JvmMetrics.initSingleton("ResourceManager", null); - pauseMonitor = new JvmPauseMonitor(); + MetricsSystem ms = DefaultMetricsSystem.initialize("ResourceManager"); + if (fromActive) { + JvmMetrics.reattach(ms, jvmMetrics); + UserGroupInformation.reattachMetrics(); + } else { + jvmMetrics = JvmMetrics.initSingleton("ResourceManager", null); + } + + JvmPauseMonitor pauseMonitor = new JvmPauseMonitor(); addService(pauseMonitor); - jm.setPauseMonitor(pauseMonitor); + jvmMetrics.setPauseMonitor(pauseMonitor); // Initialize the Reservation system if (conf.getBoolean(YarnConfiguration.RM_RESERVATION_SYSTEM_ENABLE, @@ -1081,9 +1089,13 @@ public class ResourceManager extends CompositeService implements Recoverable { /** * Helper method to create and init {@link #activeServices}. This creates an * instance of {@link RMActiveServices} and initializes it. + * + * @param fromActive Indicates if the call is from the active state transition + * or the RM initialization. */ - protected void createAndInitActiveServices() { + protected void createAndInitActiveServices(boolean fromActive) { activeServices = new RMActiveServices(this); + activeServices.fromActive = fromActive; activeServices.init(conf); } @@ -1114,7 +1126,7 @@ public class ResourceManager extends CompositeService implements Recoverable { QueueMetrics.clearQueueMetrics(); if (initialize) { resetDispatcher(); - createAndInitActiveServices(); + createAndInitActiveServices(true); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHAMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHAMetrics.java new file mode 100644 index 00000000000..9f57cb104fb --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHAMetrics.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.yarn.conf.HAUtil; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; +import org.junit.Before; +import org.junit.Test; + +import static junit.framework.TestCase.assertNotNull; + +/** + * Metrics related RM HA testing. Metrics are mostly static singletons. To + * avoid interference with other RM HA tests, separating metric tests for RM HA + * into a separate file temporarily. + */ +public class TestRMHAMetrics { + private Configuration configuration; + + private static final String RM1_ADDRESS = "1.1.1.1:1"; + private static final String RM1_NODE_ID = "rm1"; + + private static final String RM2_ADDRESS = "0.0.0.0:0"; + private static final String RM2_NODE_ID = "rm2"; + + @Before + public void setUp() throws Exception { + configuration = new Configuration(); + UserGroupInformation.setConfiguration(configuration); + configuration.setBoolean(YarnConfiguration.RM_HA_ENABLED, true); + configuration.set(YarnConfiguration.RM_HA_IDS, RM1_NODE_ID + "," + + RM2_NODE_ID); + for (String confKey : YarnConfiguration + .getServiceAddressConfKeys(configuration)) { + configuration.set(HAUtil.addSuffix(confKey, RM1_NODE_ID), RM1_ADDRESS); + configuration.set(HAUtil.addSuffix(confKey, RM2_NODE_ID), RM2_ADDRESS); + } + + ClusterMetrics.destroy(); + QueueMetrics.clearQueueMetrics(); + DefaultMetricsSystem.shutdown(); + } + + @Test(timeout = 300000) + public void testMetricsAfterTransitionToStandby() throws Exception { + configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false); + Configuration conf = new YarnConfiguration(configuration); + MockRM rm = new MockRM(conf); + rm.init(conf); + rm.start(); + rm.transitionToActive(); + rm.transitionToStandby(true); + assertNotNull(DefaultMetricsSystem.instance().getSource("JvmMetrics")); + assertNotNull(DefaultMetricsSystem.instance().getSource("UgiMetrics")); + rm.stop(); + } +}