HDFS-5220 Expose group resolution time as metric (jxiang via cmccabe)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1555986 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Colin McCabe 2014-01-06 19:30:13 +00:00
parent 0d0ccfd7c1
commit 5745523e98
7 changed files with 77 additions and 5 deletions

View File

@ -21,7 +21,6 @@ package org.apache.hadoop.fs;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.http.lib.StaticUserWebFilter; import org.apache.hadoop.http.lib.StaticUserWebFilter;
import org.apache.hadoop.security.authorize.Service;
/** /**
* This class contains constants for configuration keys used * This class contains constants for configuration keys used
@ -240,4 +239,7 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
/** Default value for IPC_SERVER_CONNECTION_IDLE_SCAN_INTERVAL_KEY */ /** Default value for IPC_SERVER_CONNECTION_IDLE_SCAN_INTERVAL_KEY */
public static final int IPC_CLIENT_CONNECTION_IDLESCANINTERVAL_DEFAULT = public static final int IPC_CLIENT_CONNECTION_IDLESCANINTERVAL_DEFAULT =
10000; 10000;
public static final String HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS =
"hadoop.user.group.metrics.percentiles.intervals";
} }

View File

@ -138,6 +138,7 @@ public class Groups {
List<String> groupList = impl.getGroups(user); List<String> groupList = impl.getGroups(user);
long endMs = Time.monotonicNow(); long endMs = Time.monotonicNow();
long deltaMs = endMs - startMs ; long deltaMs = endMs - startMs ;
UserGroupInformation.metrics.addGetGroups(deltaMs);
if (deltaMs > warningDeltaMs) { if (deltaMs > warningDeltaMs) {
LOG.warn("Potential performance problem: getGroups(user=" + user +") " + LOG.warn("Potential performance problem: getGroups(user=" + user +") " +
"took " + deltaMs + " milliseconds."); "took " + deltaMs + " milliseconds.");

View File

@ -17,6 +17,7 @@
*/ */
package org.apache.hadoop.security; package org.apache.hadoop.security;
import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
@ -56,6 +57,8 @@ import org.apache.hadoop.io.Text;
import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metric;
import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
import org.apache.hadoop.metrics2.lib.MutableQuantiles;
import org.apache.hadoop.metrics2.lib.MutableRate; import org.apache.hadoop.metrics2.lib.MutableRate;
import org.apache.hadoop.security.SaslRpcServer.AuthMethod; import org.apache.hadoop.security.SaslRpcServer.AuthMethod;
import org.apache.hadoop.security.authentication.util.KerberosUtil; import org.apache.hadoop.security.authentication.util.KerberosUtil;
@ -90,14 +93,27 @@ public class UserGroupInformation {
*/ */
@Metrics(about="User and group related metrics", context="ugi") @Metrics(about="User and group related metrics", context="ugi")
static class UgiMetrics { static class UgiMetrics {
final MetricsRegistry registry = new MetricsRegistry("UgiMetrics");
@Metric("Rate of successful kerberos logins and latency (milliseconds)") @Metric("Rate of successful kerberos logins and latency (milliseconds)")
MutableRate loginSuccess; MutableRate loginSuccess;
@Metric("Rate of failed kerberos logins and latency (milliseconds)") @Metric("Rate of failed kerberos logins and latency (milliseconds)")
MutableRate loginFailure; MutableRate loginFailure;
@Metric("GetGroups") MutableRate getGroups;
MutableQuantiles[] getGroupsQuantiles;
static UgiMetrics create() { static UgiMetrics create() {
return DefaultMetricsSystem.instance().register(new UgiMetrics()); return DefaultMetricsSystem.instance().register(new UgiMetrics());
} }
void addGetGroups(long latency) {
getGroups.add(latency);
if (getGroupsQuantiles != null) {
for (MutableQuantiles q : getGroupsQuantiles) {
q.add(latency);
}
}
}
} }
/** /**
@ -239,6 +255,20 @@ public class UserGroupInformation {
groups = Groups.getUserToGroupsMappingService(conf); groups = Groups.getUserToGroupsMappingService(conf);
} }
UserGroupInformation.conf = conf; UserGroupInformation.conf = conf;
if (metrics.getGroupsQuantiles == null) {
int[] intervals = conf.getInts(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS);
if (intervals != null && intervals.length > 0) {
final int length = intervals.length;
MutableQuantiles[] getGroupsQuantiles = new MutableQuantiles[length];
for (int i = 0; i < length; i++) {
getGroupsQuantiles[i] = metrics.registry.newQuantiles(
"getGroups" + intervals[i] + "s",
"Get groups", "ops", "latency", intervals[i]);
}
metrics.getGroupsQuantiles = getGroupsQuantiles;
}
}
} }
/** /**

View File

@ -19,7 +19,6 @@ package org.apache.hadoop.security;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.hadoop.ipc.TestSaslRPC;
import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
import org.apache.hadoop.security.authentication.util.KerberosName; import org.apache.hadoop.security.authentication.util.KerberosName;
@ -39,9 +38,9 @@ import java.util.Collection;
import java.util.LinkedHashSet; import java.util.LinkedHashSet;
import java.util.Set; import java.util.Set;
import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL;
import static org.apache.hadoop.ipc.TestSaslRPC.*; import static org.apache.hadoop.ipc.TestSaslRPC.*;
import static org.apache.hadoop.security.token.delegation.TestDelegationToken.TestDelegationTokenIdentifier;
import static org.apache.hadoop.test.MetricsAsserts.*; import static org.apache.hadoop.test.MetricsAsserts.*;
import static org.junit.Assert.*; import static org.junit.Assert.*;
import static org.mockito.Mockito.mock; import static org.mockito.Mockito.mock;
@ -54,6 +53,8 @@ public class TestUserGroupInformation {
final private static String GROUP3_NAME = "group3"; final private static String GROUP3_NAME = "group3";
final private static String[] GROUP_NAMES = final private static String[] GROUP_NAMES =
new String[]{GROUP1_NAME, GROUP2_NAME, GROUP3_NAME}; new String[]{GROUP1_NAME, GROUP2_NAME, GROUP3_NAME};
// Rollover interval of percentile metrics (in seconds)
private static final int PERCENTILES_INTERVAL = 1;
private static Configuration conf; private static Configuration conf;
/** /**
@ -79,7 +80,8 @@ public class TestUserGroupInformation {
// doesn't matter what it is, but getGroups needs it set... // doesn't matter what it is, but getGroups needs it set...
// use HADOOP_HOME environment variable to prevent interfering with logic // use HADOOP_HOME environment variable to prevent interfering with logic
// that finds winutils.exe // that finds winutils.exe
System.setProperty("hadoop.home.dir", System.getenv("HADOOP_HOME")); String home = System.getenv("HADOOP_HOME");
System.setProperty("hadoop.home.dir", (home != null ? home : "."));
// fake the realm is kerberos is enabled // fake the realm is kerberos is enabled
System.setProperty("java.security.krb5.kdc", ""); System.setProperty("java.security.krb5.kdc", "");
System.setProperty("java.security.krb5.realm", "DEFAULT.REALM"); System.setProperty("java.security.krb5.realm", "DEFAULT.REALM");
@ -149,11 +151,15 @@ public class TestUserGroupInformation {
/** Test login method */ /** Test login method */
@Test (timeout = 30000) @Test (timeout = 30000)
public void testLogin() throws Exception { public void testLogin() throws Exception {
conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
String.valueOf(PERCENTILES_INTERVAL));
UserGroupInformation.setConfiguration(conf);
// login from unix // login from unix
UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
assertEquals(UserGroupInformation.getCurrentUser(), assertEquals(UserGroupInformation.getCurrentUser(),
UserGroupInformation.getLoginUser()); UserGroupInformation.getLoginUser());
assertTrue(ugi.getGroupNames().length >= 1); assertTrue(ugi.getGroupNames().length >= 1);
verifyGroupMetrics(1);
// ensure that doAs works correctly // ensure that doAs works correctly
UserGroupInformation userGroupInfo = UserGroupInformation userGroupInfo =
@ -727,6 +733,21 @@ public class TestUserGroupInformation {
} }
} }
private static void verifyGroupMetrics(
long groups) throws InterruptedException {
MetricsRecordBuilder rb = getMetrics("UgiMetrics");
if (groups > 0) {
assertCounter("GetGroupsNumOps", groups, rb);
double avg = getDoubleGauge("GetGroupsAvgTime", rb);
assertTrue(avg >= 0.0);
// Sleep for an interval+slop to let the percentiles rollover
Thread.sleep((PERCENTILES_INTERVAL+1)*1000);
// Check that the percentiles were updated
assertQuantileGauges("GetGroups1s", rb);
}
}
/** /**
* Test for the case that UserGroupInformation.getCurrentUser() * Test for the case that UserGroupInformation.getCurrentUser()
* is called when the AccessControlContext has a Subject associated * is called when the AccessControlContext has a Subject associated

View File

@ -187,6 +187,8 @@ Release 2.4.0 - UNRELEASED
HDFS-5695. Clean up TestOfflineEditsViewer and OfflineEditsViewerHelper. HDFS-5695. Clean up TestOfflineEditsViewer and OfflineEditsViewerHelper.
(Haohui Mai via jing9) (Haohui Mai via jing9)
HDFS-5220. Expose group resolution time as metric (jxiang via cmccabe)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn) HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn)

View File

@ -482,6 +482,14 @@ public class NameNode implements NameNodeStatusMXBean {
* @param conf the configuration * @param conf the configuration
*/ */
protected void initialize(Configuration conf) throws IOException { protected void initialize(Configuration conf) throws IOException {
if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) {
String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY);
if (intervals != null) {
conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
intervals);
}
}
UserGroupInformation.setConfiguration(conf); UserGroupInformation.setConfiguration(conf);
loginAsNameNodeUser(conf); loginAsNameNodeUser(conf);

View File

@ -47,6 +47,8 @@ import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.metrics2.MetricsSource;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.test.MetricsAsserts; import org.apache.hadoop.test.MetricsAsserts;
import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Time;
import org.apache.log4j.Level; import org.apache.log4j.Level;
@ -108,6 +110,12 @@ public class TestNameNodeMetrics {
@After @After
public void tearDown() throws Exception { public void tearDown() throws Exception {
MetricsSource source = DefaultMetricsSystem.instance().getSource("UgiMetrics");
if (source != null) {
// Run only once since the UGI metrics is cleaned up during teardown
MetricsRecordBuilder rb = getMetrics(source);
assertQuantileGauges("GetGroups1s", rb);
}
cluster.shutdown(); cluster.shutdown();
} }