HDFS-16881. Warn if AccessControlEnforcer runs for a long time to check permission. (#5268)

This commit is contained in:
Tsz-Wo Nicholas Sze 2023-01-05 09:31:52 +08:00 committed by GitHub
parent 4511c360b9
commit 5022003e0f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 147 additions and 7 deletions

View File

@ -628,6 +628,12 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final String DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_KEY =
"dfs.namenode.read-lock-reporting-threshold-ms";
public static final long DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_DEFAULT = 5000L;
public static final String DFS_NAMENODE_ACCESS_CONTROL_ENFORCER_REPORTING_THRESHOLD_MS_KEY
= "dfs.namenode.access-control-enforcer-reporting-threshold-ms";
public static final long DFS_NAMENODE_ACCESS_CONTROL_ENFORCER_REPORTING_THRESHOLD_MS_DEFAULT
= 1000L;
// Threshold for how long the lock warnings must be suppressed
public static final String DFS_LOCK_SUPPRESS_WARNING_INTERVAL_KEY =
"dfs.lock.suppress.warning.interval";

View File

@ -87,6 +87,8 @@
import static org.apache.hadoop.fs.CommonConfigurationKeys.FS_PROTECTED_DIRECTORIES;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESS_CONTROL_ENFORCER_REPORTING_THRESHOLD_MS_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESS_CONTROL_ENFORCER_REPORTING_THRESHOLD_MS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_QUOTA_BY_STORAGETYPE_ENABLED_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_QUOTA_BY_STORAGETYPE_ENABLED_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PROTECTED_SUBDIRECTORIES_ENABLE;
@ -181,6 +183,8 @@ private static INodeDirectory createRoot(FSNamesystem namesystem) {
* ACL-related operations.
*/
private final boolean aclsEnabled;
/** Threshold to print a warning. */
private final long accessControlEnforcerReportingThresholdMs;
/**
* Support for POSIX ACL inheritance. Not final for testing purpose.
*/
@ -388,6 +392,10 @@ public enum DirOp {
DFS_PROTECTED_SUBDIRECTORIES_ENABLE,
DFS_PROTECTED_SUBDIRECTORIES_ENABLE_DEFAULT);
this.accessControlEnforcerReportingThresholdMs = conf.getLong(
DFS_NAMENODE_ACCESS_CONTROL_ENFORCER_REPORTING_THRESHOLD_MS_KEY,
DFS_NAMENODE_ACCESS_CONTROL_ENFORCER_REPORTING_THRESHOLD_MS_DEFAULT);
Preconditions.checkArgument(this.inodeXAttrsLimit >= 0,
"Cannot set a negative limit on the number of xattrs per inode (%s).",
DFSConfigKeys.DFS_NAMENODE_MAX_XATTRS_PER_INODE_KEY);
@ -1869,7 +1877,8 @@ FSPermissionChecker getPermissionChecker(String fsOwner, String superGroup,
UserGroupInformation ugi) throws AccessControlException {
return new FSPermissionChecker(
fsOwner, superGroup, ugi, getUserFilteredAttributeProvider(ugi),
useAuthorizationWithContextAPI);
useAuthorizationWithContextAPI,
accessControlEnforcerReportingThresholdMs);
}
void checkOwner(FSPermissionChecker pc, INodesInPath iip)

View File

@ -21,10 +21,13 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.Stack;
import java.util.function.LongFunction;
import org.apache.hadoop.util.Preconditions;
import org.apache.hadoop.ipc.CallerContext;
import org.apache.hadoop.util.Time;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.FSExceptionMessages;
@ -87,19 +90,21 @@ private String toAccessControlString(INodeAttributes inodeAttrib,
private final boolean isSuper;
private final INodeAttributeProvider attributeProvider;
private final boolean authorizeWithContext;
private final long accessControlEnforcerReportingThresholdMs;
private static ThreadLocal<String> operationType = new ThreadLocal<>();
protected FSPermissionChecker(String fsOwner, String supergroup,
UserGroupInformation callerUgi,
INodeAttributeProvider attributeProvider) {
this(fsOwner, supergroup, callerUgi, attributeProvider, false);
this(fsOwner, supergroup, callerUgi, attributeProvider, false, 0);
}
protected FSPermissionChecker(String fsOwner, String supergroup,
UserGroupInformation callerUgi,
INodeAttributeProvider attributeProvider,
boolean useAuthorizationWithContextAPI) {
boolean useAuthorizationWithContextAPI,
long accessControlEnforcerReportingThresholdMs) {
this.fsOwner = fsOwner;
this.supergroup = supergroup;
this.callerUgi = callerUgi;
@ -117,6 +122,38 @@ protected FSPermissionChecker(String fsOwner, String supergroup,
} else {
authorizeWithContext = useAuthorizationWithContextAPI;
}
this.accessControlEnforcerReportingThresholdMs
= accessControlEnforcerReportingThresholdMs;
}
private String checkAccessControlEnforcerSlowness(
long elapsedMs, AccessControlEnforcer ace,
boolean checkSuperuser, AuthorizationContext context) {
return checkAccessControlEnforcerSlowness(elapsedMs,
accessControlEnforcerReportingThresholdMs, ace.getClass(), checkSuperuser,
context.getPath(), context.getOperationName(),
context.getCallerContext());
}
/** @return the warning message if there is any. */
static String checkAccessControlEnforcerSlowness(
long elapsedMs, long thresholdMs, Class<? extends AccessControlEnforcer> clazz,
boolean checkSuperuser, String path, String op, Object caller) {
if (!LOG.isWarnEnabled()) {
return null;
}
if (thresholdMs <= 0) {
return null;
}
if (elapsedMs > thresholdMs) {
final String message = clazz + " ran for "
+ elapsedMs + "ms (threshold=" + thresholdMs + "ms) to check "
+ (checkSuperuser ? "superuser" : "permission")
+ " on " + path + " for " + op + " from caller " + caller;
LOG.warn(message, new Throwable("TRACE"));
return message;
}
return null;
}
public static void setOperationType(String opType) {
@ -139,9 +176,70 @@ public INodeAttributeProvider getAttributesProvider() {
return attributeProvider;
}
@FunctionalInterface
interface CheckPermission {
void run() throws AccessControlException;
}
static String runCheckPermission(CheckPermission checker,
LongFunction<String> checkElapsedMs) throws AccessControlException {
final String message;
final long start = Time.monotonicNow();
try {
checker.run();
} finally {
final long end = Time.monotonicNow();
message = checkElapsedMs.apply(end - start);
}
return message;
}
private AccessControlEnforcer getAccessControlEnforcer() {
return (attributeProvider != null)
? attributeProvider.getExternalAccessControlEnforcer(this) : this;
final AccessControlEnforcer e = Optional.ofNullable(attributeProvider)
.map(p -> p.getExternalAccessControlEnforcer(this))
.orElse(this);
if (e == this) {
return this;
}
// For an external AccessControlEnforcer, check for slowness.
return new AccessControlEnforcer() {
@Override
public void checkPermission(
String filesystemOwner, String superGroup, UserGroupInformation ugi,
INodeAttributes[] inodeAttrs, INode[] inodes, byte[][] pathByNameArr,
int snapshotId, String path, int ancestorIndex, boolean doCheckOwner,
FsAction ancestorAccess, FsAction parentAccess, FsAction access,
FsAction subAccess, boolean ignoreEmptyDir)
throws AccessControlException {
runCheckPermission(
() -> e.checkPermission(filesystemOwner, superGroup, ugi,
inodeAttrs, inodes, pathByNameArr, snapshotId, path,
ancestorIndex, doCheckOwner, ancestorAccess, parentAccess,
access, subAccess, ignoreEmptyDir),
elapsedMs -> checkAccessControlEnforcerSlowness(elapsedMs,
accessControlEnforcerReportingThresholdMs,
e.getClass(), false, path, operationType.get(),
CallerContext.getCurrent()));
}
@Override
public void checkPermissionWithContext(AuthorizationContext context)
throws AccessControlException {
runCheckPermission(
() -> e.checkPermissionWithContext(context),
elapsedMs -> checkAccessControlEnforcerSlowness(elapsedMs,
e, false, context));
}
@Override
public void checkSuperUserPermissionWithContext(
AuthorizationContext context) throws AccessControlException {
runCheckPermission(
() -> e.checkSuperUserPermissionWithContext(context),
elapsedMs -> checkAccessControlEnforcerSlowness(elapsedMs,
e, true, context));
}
};
}
private AuthorizationContext getAuthorizationContextForSuperUser(

View File

@ -103,7 +103,7 @@ public void testLegacyAPI() throws IOException {
thenReturn(mockEnforcer);
FSPermissionChecker checker = new FSPermissionChecker(
fsOwner, superGroup, ugi, mockINodeAttributeProvider, false);
fsOwner, superGroup, ugi, mockINodeAttributeProvider, false, 0);
when(iip.getPathSnapshotId()).thenReturn(snapshotId);
when(iip.getINodesArray()).thenReturn(inodes);
@ -128,7 +128,7 @@ public void testCheckPermissionWithContextAPI() throws IOException {
// force it to use the new, checkPermissionWithContext API.
FSPermissionChecker checker = new FSPermissionChecker(
fsOwner, superGroup, ugi, mockINodeAttributeProvider, true);
fsOwner, superGroup, ugi, mockINodeAttributeProvider, true, 0);
String operationName = "abc";
FSPermissionChecker.setOperationType(operationName);

View File

@ -40,6 +40,7 @@
import java.io.IOException;
import java.util.Arrays;
import java.util.function.LongFunction;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
@ -52,6 +53,7 @@
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UserGroupInformation;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.mockito.invocation.InvocationOnMock;
@ -446,4 +448,29 @@ private static INodeFile createINodeFile(INodeDirectory parent, String name,
parent.addChild(inodeFile);
return inodeFile;
}
@Test
public void testCheckAccessControlEnforcerSlowness() throws Exception {
final long thresholdMs = 10;
final LongFunction<String> checkAccessControlEnforcerSlowness =
elapsedMs -> FSPermissionChecker.checkAccessControlEnforcerSlowness(
elapsedMs, thresholdMs, INodeAttributeProvider.AccessControlEnforcer.class,
false, "/foo", "mkdir", "client");
final String m1 = FSPermissionChecker.runCheckPermission(
() -> FSPermissionChecker.LOG.info("Fast runner"),
checkAccessControlEnforcerSlowness);
Assert.assertNull(m1);
final String m2 = FSPermissionChecker.runCheckPermission(() -> {
FSPermissionChecker.LOG.info("Slow runner");
try {
Thread.sleep(20);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IllegalStateException(e);
}
}, checkAccessControlEnforcerSlowness);
Assert.assertNotNull(m2);
}
}