diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java index 5e7fe93a7c7..3c3da625739 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java @@ -21,7 +21,6 @@ package org.apache.hadoop.fs; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.http.lib.StaticUserWebFilter; -import org.apache.hadoop.security.authorize.Service; /** * This class contains constants for configuration keys used @@ -240,4 +239,7 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic { /** Default value for IPC_SERVER_CONNECTION_IDLE_SCAN_INTERVAL_KEY */ public static final int IPC_CLIENT_CONNECTION_IDLESCANINTERVAL_DEFAULT = 10000; + + public static final String HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS = + "hadoop.user.group.metrics.percentiles.intervals"; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java index 33659c6fada..097bc30dfe3 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java @@ -138,6 +138,7 @@ public class Groups { List groupList = impl.getGroups(user); long endMs = Time.monotonicNow(); long deltaMs = endMs - startMs ; + UserGroupInformation.metrics.addGetGroups(deltaMs); if (deltaMs > warningDeltaMs) { LOG.warn("Potential performance problem: getGroups(user=" + user +") " + "took " + deltaMs + " milliseconds."); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java index cb248464c82..729a014575d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.security; +import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS; import java.io.File; import java.io.IOException; @@ -56,6 +57,8 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.MetricsRegistry; +import org.apache.hadoop.metrics2.lib.MutableQuantiles; import org.apache.hadoop.metrics2.lib.MutableRate; import org.apache.hadoop.security.SaslRpcServer.AuthMethod; import org.apache.hadoop.security.authentication.util.KerberosUtil; @@ -90,14 +93,27 @@ public class UserGroupInformation { */ @Metrics(about="User and group related metrics", context="ugi") static class UgiMetrics { + final MetricsRegistry registry = new MetricsRegistry("UgiMetrics"); + @Metric("Rate of successful kerberos logins and latency (milliseconds)") MutableRate loginSuccess; @Metric("Rate of failed kerberos logins and latency (milliseconds)") MutableRate loginFailure; + @Metric("GetGroups") MutableRate getGroups; + MutableQuantiles[] getGroupsQuantiles; static UgiMetrics create() { return DefaultMetricsSystem.instance().register(new UgiMetrics()); } + + void addGetGroups(long latency) { + getGroups.add(latency); + if (getGroupsQuantiles != null) { + for (MutableQuantiles q : getGroupsQuantiles) { + q.add(latency); + } + } + } } /** @@ -239,6 +255,20 @@ public class UserGroupInformation { groups = Groups.getUserToGroupsMappingService(conf); } UserGroupInformation.conf = conf; + + if (metrics.getGroupsQuantiles == null) { + int[] intervals = conf.getInts(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS); + if (intervals != null && intervals.length > 0) { + final int length = intervals.length; + MutableQuantiles[] getGroupsQuantiles = new MutableQuantiles[length]; + for (int i = 0; i < length; i++) { + getGroupsQuantiles[i] = metrics.registry.newQuantiles( + "getGroups" + intervals[i] + "s", + "Get groups", "ops", "latency", intervals[i]); + } + metrics.getGroupsQuantiles = getGroupsQuantiles; + } + } } /** diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java index cdad4c2fce7..b6b8cc78901 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java @@ -19,7 +19,6 @@ package org.apache.hadoop.security; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.io.Text; -import org.apache.hadoop.ipc.TestSaslRPC; import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; import org.apache.hadoop.security.authentication.util.KerberosName; @@ -39,9 +38,9 @@ import java.util.Collection; import java.util.LinkedHashSet; import java.util.Set; +import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL; import static org.apache.hadoop.ipc.TestSaslRPC.*; -import static org.apache.hadoop.security.token.delegation.TestDelegationToken.TestDelegationTokenIdentifier; import static org.apache.hadoop.test.MetricsAsserts.*; import static org.junit.Assert.*; import static org.mockito.Mockito.mock; @@ -53,7 +52,9 @@ public class TestUserGroupInformation { final private static String GROUP2_NAME = "group2"; final private static String GROUP3_NAME = "group3"; final private static String[] GROUP_NAMES = - new String[]{GROUP1_NAME, GROUP2_NAME, GROUP3_NAME}; + new String[]{GROUP1_NAME, GROUP2_NAME, GROUP3_NAME}; + // Rollover interval of percentile metrics (in seconds) + private static final int PERCENTILES_INTERVAL = 1; private static Configuration conf; /** @@ -79,7 +80,8 @@ public class TestUserGroupInformation { // doesn't matter what it is, but getGroups needs it set... // use HADOOP_HOME environment variable to prevent interfering with logic // that finds winutils.exe - System.setProperty("hadoop.home.dir", System.getenv("HADOOP_HOME")); + String home = System.getenv("HADOOP_HOME"); + System.setProperty("hadoop.home.dir", (home != null ? home : ".")); // fake the realm is kerberos is enabled System.setProperty("java.security.krb5.kdc", ""); System.setProperty("java.security.krb5.realm", "DEFAULT.REALM"); @@ -149,11 +151,15 @@ public class TestUserGroupInformation { /** Test login method */ @Test (timeout = 30000) public void testLogin() throws Exception { + conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS, + String.valueOf(PERCENTILES_INTERVAL)); + UserGroupInformation.setConfiguration(conf); // login from unix UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); assertEquals(UserGroupInformation.getCurrentUser(), UserGroupInformation.getLoginUser()); assertTrue(ugi.getGroupNames().length >= 1); + verifyGroupMetrics(1); // ensure that doAs works correctly UserGroupInformation userGroupInfo = @@ -727,6 +733,21 @@ public class TestUserGroupInformation { } } + private static void verifyGroupMetrics( + long groups) throws InterruptedException { + MetricsRecordBuilder rb = getMetrics("UgiMetrics"); + if (groups > 0) { + assertCounter("GetGroupsNumOps", groups, rb); + double avg = getDoubleGauge("GetGroupsAvgTime", rb); + assertTrue(avg >= 0.0); + + // Sleep for an interval+slop to let the percentiles rollover + Thread.sleep((PERCENTILES_INTERVAL+1)*1000); + // Check that the percentiles were updated + assertQuantileGauges("GetGroups1s", rb); + } + } + /** * Test for the case that UserGroupInformation.getCurrentUser() * is called when the AccessControlContext has a Subject associated diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 0d7613087ef..cd830faf6bd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -187,6 +187,8 @@ Release 2.4.0 - UNRELEASED HDFS-5695. Clean up TestOfflineEditsViewer and OfflineEditsViewerHelper. (Haohui Mai via jing9) + HDFS-5220. Expose group resolution time as metric (jxiang via cmccabe) + OPTIMIZATIONS HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index 0b8d70bb9bf..2346a358d7e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -482,6 +482,14 @@ public class NameNode implements NameNodeStatusMXBean { * @param conf the configuration */ protected void initialize(Configuration conf) throws IOException { + if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) { + String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY); + if (intervals != null) { + conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS, + intervals); + } + } + UserGroupInformation.setConfiguration(conf); loginAsNameNodeUser(conf); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java index 1ef67f9ed4b..c00f369f9b4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java @@ -47,6 +47,8 @@ import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.metrics2.MetricsRecordBuilder; +import org.apache.hadoop.metrics2.MetricsSource; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.test.MetricsAsserts; import org.apache.hadoop.util.Time; import org.apache.log4j.Level; @@ -108,6 +110,12 @@ public class TestNameNodeMetrics { @After public void tearDown() throws Exception { + MetricsSource source = DefaultMetricsSystem.instance().getSource("UgiMetrics"); + if (source != null) { + // Run only once since the UGI metrics is cleaned up during teardown + MetricsRecordBuilder rb = getMetrics(source); + assertQuantileGauges("GetGroups1s", rb); + } cluster.shutdown(); }