HDFS-5220. Expose group resolution time as metric (jxiang via cmccabe)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1555976 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Colin McCabe 2014-01-06 18:59:10 +00:00
parent f342dbcfc7
commit 2a1ecd00da
7 changed files with 76 additions and 4 deletions

View File

@ -21,7 +21,6 @@ package org.apache.hadoop.fs;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.http.lib.StaticUserWebFilter;
import org.apache.hadoop.security.authorize.Service;
/**
* This class contains constants for configuration keys used
@ -240,4 +239,7 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
/** Default value for IPC_SERVER_CONNECTION_IDLE_SCAN_INTERVAL_KEY */
public static final int IPC_CLIENT_CONNECTION_IDLESCANINTERVAL_DEFAULT =
10000;
public static final String HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS =
"hadoop.user.group.metrics.percentiles.intervals";
}

View File

@ -138,6 +138,7 @@ public class Groups {
List<String> groupList = impl.getGroups(user);
long endMs = Time.monotonicNow();
long deltaMs = endMs - startMs ;
UserGroupInformation.metrics.addGetGroups(deltaMs);
if (deltaMs > warningDeltaMs) {
LOG.warn("Potential performance problem: getGroups(user=" + user +") " +
"took " + deltaMs + " milliseconds.");

View File

@ -19,6 +19,7 @@ package org.apache.hadoop.security;
import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN;
import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN_DEFAULT;
import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS;
import java.io.File;
import java.io.IOException;
@ -58,6 +59,8 @@ import org.apache.hadoop.io.Text;
import org.apache.hadoop.metrics2.annotation.Metric;
import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
import org.apache.hadoop.metrics2.lib.MutableQuantiles;
import org.apache.hadoop.metrics2.lib.MutableRate;
import org.apache.hadoop.security.SaslRpcServer.AuthMethod;
import org.apache.hadoop.security.authentication.util.KerberosUtil;
@ -92,14 +95,27 @@ public class UserGroupInformation {
*/
@Metrics(about="User and group related metrics", context="ugi")
static class UgiMetrics {
final MetricsRegistry registry = new MetricsRegistry("UgiMetrics");
@Metric("Rate of successful kerberos logins and latency (milliseconds)")
MutableRate loginSuccess;
@Metric("Rate of failed kerberos logins and latency (milliseconds)")
MutableRate loginFailure;
@Metric("GetGroups") MutableRate getGroups;
MutableQuantiles[] getGroupsQuantiles;
static UgiMetrics create() {
return DefaultMetricsSystem.instance().register(new UgiMetrics());
}
void addGetGroups(long latency) {
getGroups.add(latency);
if (getGroupsQuantiles != null) {
for (MutableQuantiles q : getGroupsQuantiles) {
q.add(latency);
}
}
}
}
/**
@ -250,6 +266,20 @@ public class UserGroupInformation {
groups = Groups.getUserToGroupsMappingService(conf);
}
UserGroupInformation.conf = conf;
if (metrics.getGroupsQuantiles == null) {
int[] intervals = conf.getInts(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS);
if (intervals != null && intervals.length > 0) {
final int length = intervals.length;
MutableQuantiles[] getGroupsQuantiles = new MutableQuantiles[length];
for (int i = 0; i < length; i++) {
getGroupsQuantiles[i] = metrics.registry.newQuantiles(
"getGroups" + intervals[i] + "s",
"Get groups", "ops", "latency", intervals[i]);
}
metrics.getGroupsQuantiles = getGroupsQuantiles;
}
}
}
/**

View File

@ -19,7 +19,6 @@ package org.apache.hadoop.security;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.ipc.TestSaslRPC;
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
import org.apache.hadoop.security.authentication.util.KerberosName;
@ -40,9 +39,9 @@ import java.util.Collection;
import java.util.LinkedHashSet;
import java.util.Set;
import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL;
import static org.apache.hadoop.ipc.TestSaslRPC.*;
import static org.apache.hadoop.security.token.delegation.TestDelegationToken.TestDelegationTokenIdentifier;
import static org.apache.hadoop.test.MetricsAsserts.*;
import static org.junit.Assert.*;
import static org.mockito.Mockito.mock;
@ -55,6 +54,8 @@ public class TestUserGroupInformation {
final private static String GROUP3_NAME = "group3";
final private static String[] GROUP_NAMES =
new String[]{GROUP1_NAME, GROUP2_NAME, GROUP3_NAME};
// Rollover interval of percentile metrics (in seconds)
private static final int PERCENTILES_INTERVAL = 1;
private static Configuration conf;
/**
@ -80,7 +81,8 @@ public class TestUserGroupInformation {
// doesn't matter what it is, but getGroups needs it set...
// use HADOOP_HOME environment variable to prevent interfering with logic
// that finds winutils.exe
System.setProperty("hadoop.home.dir", System.getenv("HADOOP_HOME"));
String home = System.getenv("HADOOP_HOME");
System.setProperty("hadoop.home.dir", (home != null ? home : "."));
// fake the realm is kerberos is enabled
System.setProperty("java.security.krb5.kdc", "");
System.setProperty("java.security.krb5.realm", "DEFAULT.REALM");
@ -150,11 +152,15 @@ public class TestUserGroupInformation {
/** Test login method */
@Test (timeout = 30000)
public void testLogin() throws Exception {
conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
String.valueOf(PERCENTILES_INTERVAL));
UserGroupInformation.setConfiguration(conf);
// login from unix
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
assertEquals(UserGroupInformation.getCurrentUser(),
UserGroupInformation.getLoginUser());
assertTrue(ugi.getGroupNames().length >= 1);
verifyGroupMetrics(1);
// ensure that doAs works correctly
UserGroupInformation userGroupInfo =
@ -728,6 +734,21 @@ public class TestUserGroupInformation {
}
}
private static void verifyGroupMetrics(
long groups) throws InterruptedException {
MetricsRecordBuilder rb = getMetrics("UgiMetrics");
if (groups > 0) {
assertCounter("GetGroupsNumOps", groups, rb);
double avg = getDoubleGauge("GetGroupsAvgTime", rb);
assertTrue(avg >= 0.0);
// Sleep for an interval+slop to let the percentiles rollover
Thread.sleep((PERCENTILES_INTERVAL+1)*1000);
// Check that the percentiles were updated
assertQuantileGauges("GetGroups1s", rb);
}
}
/**
* Test for the case that UserGroupInformation.getCurrentUser()
* is called when the AccessControlContext has a Subject associated

View File

@ -788,6 +788,8 @@ Release 2.4.0 - UNRELEASED
HDFS-5695. Clean up TestOfflineEditsViewer and OfflineEditsViewerHelper.
(Haohui Mai via jing9)
HDFS-5220. Expose group resolution time as metric (jxiang via cmccabe)
OPTIMIZATIONS
HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn)

View File

@ -480,6 +480,14 @@ public class NameNode implements NameNodeStatusMXBean {
* @param conf the configuration
*/
protected void initialize(Configuration conf) throws IOException {
if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) {
String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY);
if (intervals != null) {
conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
intervals);
}
}
UserGroupInformation.setConfiguration(conf);
loginAsNameNodeUser(conf);

View File

@ -47,6 +47,8 @@ import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.metrics2.MetricsSource;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.test.MetricsAsserts;
import org.apache.hadoop.util.Time;
import org.apache.log4j.Level;
@ -108,6 +110,12 @@ public class TestNameNodeMetrics {
@After
public void tearDown() throws Exception {
MetricsSource source = DefaultMetricsSystem.instance().getSource("UgiMetrics");
if (source != null) {
// Run only once since the UGI metrics is cleaned up during teardown
MetricsRecordBuilder rb = getMetrics(source);
assertQuantileGauges("GetGroups1s", rb);
}
cluster.shutdown();
}