From c6e7b631bdcfbb9141343538a3f84f712e0f9ae7 Mon Sep 17 00:00:00 2001 From: Zhe Zhang Date: Mon, 14 Nov 2016 16:33:15 -0800 Subject: [PATCH] HDFS-10872. Add MutableRate metrics for FSNamesystemLock operations. Contributed by Erik Krogen. (cherry picked from commit 818915aabb22653e212f4646b1b3cd0673330328) --- .../hadoop/metrics2/lib/MetricsRegistry.java | 8 + .../metrics2/lib/MutableMetricsFactory.java | 4 +- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 4 + .../hdfs/server/namenode/CacheManager.java | 2 +- .../hdfs/server/namenode/Checkpointer.java | 2 +- .../ContentSummaryComputationContext.java | 2 +- .../hdfs/server/namenode/FSEditLogLoader.java | 4 +- .../hdfs/server/namenode/FSNamesystem.java | 427 ++++++++++-------- .../server/namenode/FSNamesystemLock.java | 59 ++- .../hdfs/server/namenode/LeaseManager.java | 2 +- .../hdfs/server/namenode/NamenodeFsck.java | 2 +- .../src/main/resources/hdfs-default.xml | 8 + .../server/namenode/TestFSNamesystemLock.java | 59 ++- .../mapreduce/MiniHadoopClusterManager.java | 1 + 14 files changed, 383 insertions(+), 201 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsRegistry.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsRegistry.java index 6e7e5ab7cc1..1ef74f41c03 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsRegistry.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MetricsRegistry.java @@ -281,6 +281,14 @@ public synchronized MutableRate newRate(String name, String desc, return ret; } + public synchronized MutableRatesWithAggregation newRatesWithAggregation( + String name) { + checkMetricName(name); + MutableRatesWithAggregation rates = new MutableRatesWithAggregation(); + metricsMap.put(name, rates); + return rates; + } + synchronized void add(String name, MutableMetric metric) { checkMetricName(name); metricsMap.put(name, metric); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableMetricsFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableMetricsFactory.java index ac05132dab6..4712ccfa65b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableMetricsFactory.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableMetricsFactory.java @@ -68,9 +68,7 @@ MutableMetric newForField(Field field, Metric annotation, return new MutableRates(registry); } if (cls == MutableRatesWithAggregation.class) { - MutableRatesWithAggregation rates = new MutableRatesWithAggregation(); - registry.add(info.name(), rates); - return rates; + return registry.newRatesWithAggregation(info.name()); } if (cls == MutableStat.class) { return registry.newStat(info.name(), info.description(), diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index fb0854f234c..c289f500360 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -382,6 +382,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final long DFS_NAMENODE_MAX_LOCK_HOLD_TO_RELEASE_LEASE_MS_DEFAULT = 25; + public static final String DFS_NAMENODE_LOCK_DETAILED_METRICS_KEY = + "dfs.namenode.lock.detailed-metrics.enabled"; + public static final boolean DFS_NAMENODE_LOCK_DETAILED_METRICS_DEFAULT = + false; // Threshold for how long namenode locks must be held for the // event to be logged public static final String DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY = diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java index d628d608a6d..12e64f62032 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java @@ -951,7 +951,7 @@ public final void processCacheReport(final DatanodeID datanodeID, processCacheReportImpl(datanode, blockIds); } finally { endTime = Time.monotonicNow(); - namesystem.writeUnlock(); + namesystem.writeUnlock("processCacheReport"); } // Log the block report processing stats from Namenode perspective diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java index 83d835aca81..84915ccc8ad 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java @@ -263,7 +263,7 @@ void doCheckpoint() throws IOException { bnStorage.writeAll(); } } finally { - backupNode.namesystem.writeUnlock(); + backupNode.namesystem.writeUnlock("doCheckpoint"); } if(cpCmd.needToReturnImage()) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java index 4208b53dfa5..53fa552077e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ContentSummaryComputationContext.java @@ -115,7 +115,7 @@ public boolean yield() { // unlock dir.readUnlock(); - fsn.readUnlock(); + fsn.readUnlock("contentSummary"); try { Thread.sleep(sleepMilliSec, sleepNanoSec); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index 7dc43458b93..7d3dc99ca0f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -148,7 +148,7 @@ long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId, return numEdits; } finally { edits.close(); - fsNamesys.writeUnlock(); + fsNamesys.writeUnlock("loadFSEdits"); prog.endStep(Phase.LOADING_EDITS, step); } } @@ -285,7 +285,7 @@ long loadEditRecords(EditLogInputStream in, boolean closeOnExit, in.close(); } fsDir.writeUnlock(); - fsNamesys.writeUnlock(); + fsNamesys.writeUnlock("loadEditRecords"); if (LOG.isTraceEnabled()) { LOG.trace("replaying edit log finished"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 2e283bc4f5c..490c32ab560 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -279,6 +279,8 @@ import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.MetricsRegistry; +import org.apache.hadoop.metrics2.lib.MutableRatesWithAggregation; import org.apache.hadoop.metrics2.util.MBeans; import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.net.Node; @@ -323,6 +325,9 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, NameNodeMXBean { public static final Log LOG = LogFactory.getLog(FSNamesystem.class); + private final MetricsRegistry registry = new MetricsRegistry("FSNamesystem"); + @Metric final MutableRatesWithAggregation detailedLockHoldTimeMetrics = + registry.newRatesWithAggregation("detailedLockHoldTimeMetrics"); private final BlockIdManager blockIdManager; @@ -553,7 +558,7 @@ void setImageLoaded() { dir.markNameCacheInitialized(); cond.signalAll(); } finally { - writeUnlock(); + writeUnlock("setImageLoaded"); } } @@ -724,7 +729,7 @@ static FSNamesystem loadFromDisk(Configuration conf) throws IOException { LOG.info("Enabling async auditlog"); enableAsyncAuditLog(); } - fsLock = new FSNamesystemLock(conf); + fsLock = new FSNamesystemLock(conf, detailedLockHoldTimeMetrics); cond = fsLock.newWriteLockCondition(); cpLock = new ReentrantLock(); @@ -1033,7 +1038,7 @@ private void loadFSImage(StartupOption startOpt) throws IOException { if (!success) { fsImage.close(); } - writeUnlock(); + writeUnlock("loadFSImage"); } imageLoadComplete(); } @@ -1084,7 +1089,7 @@ void startCommonServices(Configuration conf, HAContext haContext) throws IOExcep setBlockTotal(completeBlocksTotal); blockManager.activate(conf); } finally { - writeUnlock(); + writeUnlock("startCommonServices"); } registerMXBean(); @@ -1108,7 +1113,7 @@ void stopCommonServices() { try { if (blockManager != null) blockManager.close(); } finally { - writeUnlock(); + writeUnlock("stopCommonServices"); } RetryCache.clear(retryCache); } @@ -1203,7 +1208,7 @@ void startActiveServices() throws IOException { } finally { startingActiveService = false; checkSafeMode(); - writeUnlock(); + writeUnlock("startActiveServices"); } } @@ -1269,7 +1274,7 @@ void stopActiveServices() { blockManager.setInitializedReplQueues(false); } } finally { - writeUnlock(); + writeUnlock("stopActiveServices"); } } @@ -1513,6 +1518,9 @@ public void readLock() { public void readUnlock() { this.fsLock.readUnlock(); } + public void readUnlock(String opName) { + this.fsLock.readUnlock(opName); + } @Override public void writeLock() { this.fsLock.writeLock(); @@ -1525,6 +1533,9 @@ public void writeLockInterruptibly() throws InterruptedException { public void writeUnlock() { this.fsLock.writeUnlock(); } + public void writeUnlock(String opName) { + this.fsLock.writeUnlock(opName); + } @Override public boolean hasWriteLock() { return this.fsLock.isWriteLockedByCurrentThread(); @@ -1563,7 +1574,7 @@ NamespaceInfo getNamespaceInfo() { try { return unprotectedGetNamespaceInfo(); } finally { - readUnlock(); + readUnlock("getNamespaceInfo"); } } @@ -1644,7 +1655,7 @@ void metaSave(String filename) throws IOException { out.flush(); out.close(); } finally { - writeUnlock(); + writeUnlock("metaSave"); } } @@ -1681,6 +1692,7 @@ FsServerDefaults getServerDefaults() throws StandbyException { * @throws IOException */ void setPermission(String src, FsPermission permission) throws IOException { + final String operationName = "setPermission"; HdfsFileStatus auditStat; checkOperation(OperationCategory.WRITE); writeLock(); @@ -1689,13 +1701,13 @@ void setPermission(String src, FsPermission permission) throws IOException { checkNameNodeSafeMode("Cannot set permission for " + src); auditStat = FSDirAttrOp.setPermission(dir, src, permission); } catch (AccessControlException e) { - logAuditEvent(false, "setPermission", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "setPermission", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } /** @@ -1704,6 +1716,7 @@ void setPermission(String src, FsPermission permission) throws IOException { */ void setOwner(String src, String username, String group) throws IOException { + final String operationName = "setOwner"; HdfsFileStatus auditStat; checkOperation(OperationCategory.WRITE); writeLock(); @@ -1712,13 +1725,13 @@ void setOwner(String src, String username, String group) checkNameNodeSafeMode("Cannot set owner for " + src); auditStat = FSDirAttrOp.setOwner(dir, src, username, group); } catch (AccessControlException e) { - logAuditEvent(false, "setOwner", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "setOwner", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } /** @@ -1727,6 +1740,7 @@ void setOwner(String src, String username, String group) */ LocatedBlocks getBlockLocations(String clientMachine, String srcArg, long offset, long length) throws IOException { + final String operationName = "open"; checkOperation(OperationCategory.READ); GetBlockLocationsResult res = null; FSPermissionChecker pc = getPermissionChecker(); @@ -1751,13 +1765,13 @@ LocatedBlocks getBlockLocations(String clientMachine, String srcArg, } } } catch (AccessControlException e) { - logAuditEvent(false, "open", srcArg); + logAuditEvent(false, operationName, srcArg); throw e; } finally { - readUnlock(); + readUnlock(operationName); } - logAuditEvent(true, "open", srcArg); + logAuditEvent(true, operationName, srcArg); if (!isInSafeMode() && res.updateAccessTime()) { String src = srcArg; @@ -1798,7 +1812,7 @@ LocatedBlocks getBlockLocations(String clientMachine, String srcArg, } catch (Throwable e) { LOG.warn("Failed to update the access time of " + src, e); } finally { - writeUnlock(); + writeUnlock(operationName); } } @@ -1831,6 +1845,7 @@ LocatedBlocks getBlockLocations(String clientMachine, String srcArg, void concat(String target, String [] srcs, boolean logRetryCache) throws IOException { waitForLoadingFSImage(); + final String operationName = "concat"; HdfsFileStatus stat = null; boolean success = false; writeLock(); @@ -1840,11 +1855,12 @@ void concat(String target, String [] srcs, boolean logRetryCache) stat = FSDirConcatOp.concat(dir, target, srcs, logRetryCache); success = true; } finally { - writeUnlock(); + writeUnlock(operationName); if (success) { getEditLog().logSync(); } - logAuditEvent(success, "concat", Arrays.toString(srcs), target, stat); + logAuditEvent(success, operationName, Arrays.toString(srcs), + target, stat); } } @@ -1854,6 +1870,7 @@ void concat(String target, String [] srcs, boolean logRetryCache) * written to the edits log but is not flushed. */ void setTimes(String src, long mtime, long atime) throws IOException { + final String operationName = "setTimes"; HdfsFileStatus auditStat; checkOperation(OperationCategory.WRITE); writeLock(); @@ -1862,13 +1879,13 @@ void setTimes(String src, long mtime, long atime) throws IOException { checkNameNodeSafeMode("Cannot set times " + src); auditStat = FSDirAttrOp.setTimes(dir, src, mtime, atime); } catch (AccessControlException e) { - logAuditEvent(false, "setTimes", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "setTimes", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } /** @@ -1878,6 +1895,7 @@ void setTimes(String src, long mtime, long atime) throws IOException { void createSymlink(String target, String link, PermissionStatus dirPerms, boolean createParent, boolean logRetryCache) throws IOException { + final String operationName = "createSymlink"; if (!FileSystem.areSymlinksEnabled()) { throw new UnsupportedOperationException("Symlinks not supported"); } @@ -1890,13 +1908,13 @@ void createSymlink(String target, String link, auditStat = FSDirSymlinkOp.createSymlinkInt(this, target, link, dirPerms, createParent, logRetryCache); } catch (AccessControlException e) { - logAuditEvent(false, "createSymlink", link, target, null); + logAuditEvent(false, operationName, link, target, null); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "createSymlink", link, target, auditStat); + logAuditEvent(true, operationName, link, target, auditStat); } /** @@ -1914,6 +1932,7 @@ void createSymlink(String target, String link, */ boolean setReplication(final String src, final short replication) throws IOException { + final String operationName = "setReplication"; boolean success = false; waitForLoadingFSImage(); checkOperation(OperationCategory.WRITE); @@ -1923,14 +1942,14 @@ boolean setReplication(final String src, final short replication) checkNameNodeSafeMode("Cannot set replication for " + src); success = FSDirAttrOp.setReplication(dir, blockManager, src, replication); } catch (AccessControlException e) { - logAuditEvent(false, "setReplication", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } if (success) { getEditLog().logSync(); - logAuditEvent(true, "setReplication", src); + logAuditEvent(true, operationName, src); } return success; } @@ -1948,6 +1967,7 @@ boolean truncate(String src, long newLength, String clientName, String clientMachine, long mtime) throws IOException, UnresolvedLinkException { + String operationName = "truncate"; requireEffectiveLayoutVersionForFeature(Feature.TRUNCATE); final FSDirTruncateOp.TruncateResult r; try { @@ -1967,16 +1987,16 @@ boolean truncate(String src, long newLength, String clientName, r = FSDirTruncateOp.truncate(this, src, newLength, clientName, clientMachine, mtime, toRemoveBlocks, pc); } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); if (!toRemoveBlocks.getToDeleteList().isEmpty()) { removeBlocks(toRemoveBlocks); toRemoveBlocks.clear(); } - logAuditEvent(true, "truncate", src, null, r.getFileStatus()); + logAuditEvent(true, operationName, src, null, r.getFileStatus()); } catch (AccessControlException e) { - logAuditEvent(false, "truncate", src); + logAuditEvent(false, operationName, src); throw e; } return r.getResult(); @@ -1989,6 +2009,7 @@ boolean truncate(String src, long newLength, String clientName, * @param policyName storage policy name */ void setStoragePolicy(String src, String policyName) throws IOException { + final String operationName = "setStoragePolicy"; HdfsFileStatus auditStat; waitForLoadingFSImage(); checkOperation(OperationCategory.WRITE); @@ -1999,13 +2020,13 @@ void setStoragePolicy(String src, String policyName) throws IOException { auditStat = FSDirAttrOp.setStoragePolicy(dir, blockManager, src, policyName); } catch (AccessControlException e) { - logAuditEvent(false, "setStoragePolicy", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "setStoragePolicy", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } /** @@ -2014,6 +2035,7 @@ void setStoragePolicy(String src, String policyName) throws IOException { * @param src file/directory path */ void unsetStoragePolicy(String src) throws IOException { + final String operationName = "unsetStoragePolicy"; HdfsFileStatus auditStat; checkOperation(OperationCategory.WRITE); writeLock(); @@ -2022,13 +2044,13 @@ void unsetStoragePolicy(String src) throws IOException { checkNameNodeSafeMode("Cannot unset storage policy for " + src); auditStat = FSDirAttrOp.unsetStoragePolicy(dir, blockManager, src); } catch (AccessControlException e) { - logAuditEvent(false, "unsetStoragePolicy", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "unsetStoragePolicy", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } /** * Get the storage policy for a file or a directory. @@ -2045,7 +2067,7 @@ BlockStoragePolicy getStoragePolicy(String src) throws IOException { checkOperation(OperationCategory.READ); return FSDirAttrOp.getStoragePolicy(dir, blockManager, src); } finally { - readUnlock(); + readUnlock("getStoragePolicy"); } } @@ -2060,7 +2082,7 @@ BlockStoragePolicy[] getStoragePolicies() throws IOException { checkOperation(OperationCategory.READ); return FSDirAttrOp.getStoragePolicies(blockManager); } finally { - readUnlock(); + readUnlock("getStoragePolicies"); } } @@ -2071,7 +2093,7 @@ long getPreferredBlockSize(String src) throws IOException { checkOperation(OperationCategory.READ); return FSDirAttrOp.getPreferredBlockSize(dir, src); } finally { - readUnlock(); + readUnlock("getPreferredBlockSize"); } } @@ -2220,7 +2242,7 @@ private HdfsFileStatus startFileInt(String src, dir.writeUnlock(); } } finally { - writeUnlock(); + writeUnlock("create"); // There might be transactions logged while trying to recover the lease. // They need to be sync'ed even when an exception was thrown. if (!skipSync) { @@ -2272,7 +2294,7 @@ boolean recoverLease(String src, String holder, String clientMachine) skipSync = true; throw se; } finally { - writeUnlock(); + writeUnlock("recoverLease"); // There might be transactions logged while trying to recover the lease. // They need to be sync'ed even when an exception was thrown. if (!skipSync) { @@ -2378,6 +2400,7 @@ boolean recoverLeaseInternal(RecoverLeaseOp op, INodesInPath iip, LastBlockWithStatus appendFile(String srcArg, String holder, String clientMachine, EnumSet flag, boolean logRetryCache) throws IOException { + final String operationName = "append"; boolean newBlock = flag.contains(CreateFlag.NEW_BLOCK); if (newBlock) { requireEffectiveLayoutVersionForFeature(Feature.APPEND_NEW_BLOCK); @@ -2407,17 +2430,17 @@ LastBlockWithStatus appendFile(String srcArg, String holder, skipSync = true; throw se; } finally { - writeUnlock(); + writeUnlock(operationName); // There might be transactions logged while trying to recover the lease // They need to be sync'ed even when an exception was thrown. if (!skipSync) { getEditLog().logSync(); } } - logAuditEvent(true, "append", srcArg); + logAuditEvent(true, operationName, srcArg); return lbs; } catch (AccessControlException e) { - logAuditEvent(false, "append", srcArg); + logAuditEvent(false, operationName, srcArg); throw e; } } @@ -2446,6 +2469,7 @@ LocatedBlock getAdditionalBlock( String src, long fileId, String clientName, ExtendedBlock previous, DatanodeInfo[] excludedNodes, String[] favoredNodes, EnumSet flags) throws IOException { + final String operationName = "getAdditionalBlock"; NameNode.stateChangeLog.debug("BLOCK* getAdditionalBlock: {} inodeId {}" + " for {}", src, fileId, clientName); @@ -2460,7 +2484,7 @@ LocatedBlock getAdditionalBlock( r = FSDirWriteFileOp.validateAddBlock(this, pc, src, fileId, clientName, previous, onRetryBlock); } finally { - readUnlock(); + readUnlock(operationName); } if (r == null) { @@ -2480,7 +2504,7 @@ LocatedBlock getAdditionalBlock( lb = FSDirWriteFileOp.storeAllocatedBlock( this, src, fileId, clientName, previous, targets); } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); return lb; @@ -2524,7 +2548,7 @@ LocatedBlock getAdditionalDatanode(String src, long fileId, "src=%s, fileId=%d, blk=%s, clientName=%s, clientMachine=%s", src, fileId, blk, clientName, clientMachine)); } finally { - readUnlock(); + readUnlock("getAdditionalDatanode"); } if (clientnode == null) { @@ -2559,7 +2583,7 @@ void abandonBlock(ExtendedBlock b, long fileId, String src, String holder) NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: {} is" + " removed from pendingCreates", b); } finally { - writeUnlock(); + writeUnlock("abandonBlock"); } getEditLog().logSync(); } @@ -2624,7 +2648,7 @@ boolean completeFile(final String src, String holder, success = FSDirWriteFileOp.completeFile(this, pc, src, holder, last, fileId); } finally { - writeUnlock(); + writeUnlock("completeFile"); } getEditLog().logSync(); return success; @@ -2688,6 +2712,7 @@ private boolean checkBlocksComplete(String src, boolean allowCommittedBlock, boolean renameTo(String src, String dst, boolean logRetryCache) throws IOException { waitForLoadingFSImage(); + final String operationName = "rename"; FSDirRenameOp.RenameResult ret = null; writeLock(); try { @@ -2695,16 +2720,16 @@ boolean renameTo(String src, String dst, boolean logRetryCache) checkNameNodeSafeMode("Cannot rename " + src); ret = FSDirRenameOp.renameToInt(dir, src, dst, logRetryCache); } catch (AccessControlException e) { - logAuditEvent(false, "rename", src, dst, null); + logAuditEvent(false, operationName, src, dst, null); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } boolean success = ret.success; if (success) { getEditLog().logSync(); } - logAuditEvent(success, "rename", src, dst, ret.auditStat); + logAuditEvent(success, operationName, src, dst, ret.auditStat); return success; } @@ -2712,6 +2737,7 @@ void renameTo(final String src, final String dst, boolean logRetryCache, Options.Rename... options) throws IOException { waitForLoadingFSImage(); + final String operationName = "rename"; FSDirRenameOp.RenameResult res = null; writeLock(); try { @@ -2719,11 +2745,11 @@ void renameTo(final String src, final String dst, checkNameNodeSafeMode("Cannot rename " + src); res = FSDirRenameOp.renameToInt(dir, src, dst, logRetryCache, options); } catch (AccessControlException e) { - logAuditEvent(false, "rename (options=" + Arrays.toString(options) + - ")", src, dst, null); + logAuditEvent(false, operationName + " (options=" + + Arrays.toString(options) + ")", src, dst, null); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); @@ -2734,8 +2760,8 @@ void renameTo(final String src, final String dst, collectedBlocks.clear(); } - logAuditEvent(true, "rename (options=" + Arrays.toString(options) + - ")", src, dst, res.auditStat); + logAuditEvent(true, operationName + " (options=" + + Arrays.toString(options) + ")", src, dst, res.auditStat); } /** @@ -2747,6 +2773,7 @@ void renameTo(final String src, final String dst, boolean delete(String src, boolean recursive, boolean logRetryCache) throws IOException { waitForLoadingFSImage(); + final String operationName = "delete"; BlocksMapUpdateInfo toRemovedBlocks = null; writeLock(); boolean ret = false; @@ -2757,16 +2784,16 @@ boolean delete(String src, boolean recursive, boolean logRetryCache) this, src, recursive, logRetryCache); ret = toRemovedBlocks != null; } catch (AccessControlException e) { - logAuditEvent(false, "delete", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); if (toRemovedBlocks != null) { removeBlocks(toRemovedBlocks); // Incremental deletion of blocks } - logAuditEvent(true, "delete", src); + logAuditEvent(true, operationName, src); return ret; } @@ -2794,7 +2821,7 @@ void removeBlocks(BlocksMapUpdateInfo blocks) { blockManager.removeBlock(iter.next()); } } finally { - writeUnlock(); + writeUnlock("removeBlocks"); } } } @@ -2893,6 +2920,7 @@ private boolean isSafeModeTrackingBlocks() { */ HdfsFileStatus getFileInfo(final String src, boolean resolveLink) throws IOException { + final String operationName = "getfileinfo"; checkOperation(OperationCategory.READ); HdfsFileStatus stat = null; readLock(); @@ -2900,12 +2928,12 @@ HdfsFileStatus getFileInfo(final String src, boolean resolveLink) checkOperation(OperationCategory.READ); stat = FSDirStatAndListingOp.getFileInfo(dir, src, resolveLink); } catch (AccessControlException e) { - logAuditEvent(false, "getfileinfo", src); + logAuditEvent(false, operationName, src); throw e; } finally { - readUnlock(); + readUnlock(operationName); } - logAuditEvent(true, "getfileinfo", src); + logAuditEvent(true, operationName, src); return stat; } @@ -2913,16 +2941,17 @@ HdfsFileStatus getFileInfo(final String src, boolean resolveLink) * Returns true if the file is closed */ boolean isFileClosed(final String src) throws IOException { + final String operationName = "isFileClosed"; checkOperation(OperationCategory.READ); readLock(); try { checkOperation(OperationCategory.READ); return FSDirStatAndListingOp.isFileClosed(dir, src); } catch (AccessControlException e) { - logAuditEvent(false, "isFileClosed", src); + logAuditEvent(false, operationName, src); throw e; } finally { - readUnlock(); + readUnlock(operationName); } } @@ -2931,6 +2960,7 @@ boolean isFileClosed(final String src) throws IOException { */ boolean mkdirs(String src, PermissionStatus permissions, boolean createParent) throws IOException { + final String operationName = "mkdirs"; HdfsFileStatus auditStat = null; checkOperation(OperationCategory.WRITE); writeLock(); @@ -2939,13 +2969,13 @@ boolean mkdirs(String src, PermissionStatus permissions, checkNameNodeSafeMode("Cannot create directory " + src); auditStat = FSDirMkdirOp.mkdirs(this, src, permissions, createParent); } catch (AccessControlException e) { - logAuditEvent(false, "mkdirs", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "mkdirs", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); return true; } @@ -2965,6 +2995,7 @@ boolean mkdirs(String src, PermissionStatus permissions, */ ContentSummary getContentSummary(final String src) throws IOException { checkOperation(OperationCategory.READ); + final String operationName = "contentSummary"; readLock(); boolean success = true; try { @@ -2974,8 +3005,8 @@ ContentSummary getContentSummary(final String src) throws IOException { success = false; throw ace; } finally { - readUnlock(); - logAuditEvent(success, "contentSummary", src); + readUnlock(operationName); + logAuditEvent(success, operationName, src); } } @@ -2995,6 +3026,7 @@ ContentSummary getContentSummary(final String src) throws IOException { */ QuotaUsage getQuotaUsage(final String src) throws IOException { checkOperation(OperationCategory.READ); + final String operationName = "quotaUsage"; readLock(); boolean success = true; try { @@ -3004,8 +3036,8 @@ QuotaUsage getQuotaUsage(final String src) throws IOException { success = false; throw ace; } finally { - readUnlock(); - logAuditEvent(success, "quotaUsage", src); + readUnlock(operationName); + logAuditEvent(success, operationName, src); } } @@ -3022,6 +3054,7 @@ void setQuota(String src, long nsQuota, long ssQuota, StorageType type) requireEffectiveLayoutVersionForFeature(Feature.QUOTA_BY_STORAGE_TYPE); } checkOperation(OperationCategory.WRITE); + final String operationName = "setQuota"; writeLock(); boolean success = false; try { @@ -3030,11 +3063,11 @@ void setQuota(String src, long nsQuota, long ssQuota, StorageType type) FSDirAttrOp.setQuota(dir, src, nsQuota, ssQuota, type); success = true; } finally { - writeUnlock(); + writeUnlock(operationName); if (success) { getEditLog().logSync(); } - logAuditEvent(success, "setQuota", src); + logAuditEvent(success, operationName, src); } } @@ -3067,7 +3100,7 @@ void fsync(String src, long fileId, String clientName, long lastBlockLength) } FSDirWriteFileOp.persistBlocks(dir, src, pendingFile, false); } finally { - writeUnlock(); + writeUnlock("fsync"); } getEditLog().logSync(); } @@ -3502,7 +3535,7 @@ void commitBlockSynchronization(ExtendedBlock oldBlock, FSDirWriteFileOp.persistBlocks(dir, src, iFile, false); } } finally { - writeUnlock(); + writeUnlock("commitBlockSynchronization"); } getEditLog().logSync(); if (closeFile) { @@ -3546,7 +3579,7 @@ void renewLease(String holder) throws IOException { checkNameNodeSafeMode("Cannot renew lease for " + holder); leaseManager.renewLease(holder); } finally { - readUnlock(); + readUnlock("renewLease"); } } @@ -3566,18 +3599,19 @@ DirectoryListing getListing(String src, byte[] startAfter, boolean needLocation) throws IOException { checkOperation(OperationCategory.READ); + final String operationName = "listStatus"; DirectoryListing dl = null; readLock(); try { checkOperation(NameNode.OperationCategory.READ); dl = getListingInt(dir, src, startAfter, needLocation); } catch (AccessControlException e) { - logAuditEvent(false, "listStatus", src); + logAuditEvent(false, operationName, src); throw e; } finally { - readUnlock(); + readUnlock(operationName); } - logAuditEvent(true, "listStatus", src); + logAuditEvent(true, operationName, src); return dl; } @@ -3615,7 +3649,7 @@ void registerDatanode(DatanodeRegistration nodeReg) throws IOException { getBlockManager().getDatanodeManager().registerDatanode(nodeReg); checkSafeMode(); } finally { - writeUnlock(); + writeUnlock("registerDatanode"); } } @@ -3665,7 +3699,7 @@ HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg, return new HeartbeatResponse(cmds, haState, rollingUpgradeInfo, blockReportLeaseId); } finally { - readUnlock(); + readUnlock("handleHeartbeat"); } } @@ -3860,7 +3894,7 @@ private void clearCorruptLazyPersistFiles() } } } finally { - writeUnlock(); + writeUnlock("clearCorruptLazyPersistFiles"); } if (changed) { getEditLog().logSync(); @@ -4066,7 +4100,7 @@ int getNumberOfDatanodes(DatanodeReportType type) { return getBlockManager().getDatanodeManager().getDatanodeListForReport( type).size(); } finally { - readUnlock(); + readUnlock("getNumberOfDatanodes"); } } @@ -4086,7 +4120,7 @@ DatanodeInfo[] datanodeReport(final DatanodeReportType type } return arr; } finally { - readUnlock(); + readUnlock("datanodeReport"); } } @@ -4108,7 +4142,7 @@ DatanodeStorageReport[] getDatanodeStorageReport(final DatanodeReportType type } return reports; } finally { - readUnlock(); + readUnlock("getDatanodeStorageReport"); } } @@ -4135,7 +4169,7 @@ void saveNamespace() throws AccessControlException, IOException { } getFSImage().saveNamespace(this); } finally { - readUnlock(); + readUnlock("saveNamespace"); cpUnlock(); } LOG.info("New namespace image has been created"); @@ -4165,7 +4199,7 @@ boolean restoreFailedStorage(String arg) throws AccessControlException, return val; } finally { - writeUnlock(); + writeUnlock("restoreFailedStorage"); cpUnlock(); } } @@ -4183,7 +4217,7 @@ void finalizeUpgrade() throws IOException { checkOperation(OperationCategory.UNCHECKED); getFSImage().finalizeUpgrade(this.isHaEnabled() && inActiveState()); } finally { - writeUnlock(); + writeUnlock("finalizeUpgrade"); cpUnlock(); } } @@ -4918,7 +4952,7 @@ public long getCompleteBlocksTotal() { numUCBlocks = leaseManager.getNumUnderConstructionBlocks(); return getBlocksTotal() - numUCBlocks; } finally { - readUnlock(); + readUnlock("getCompleteBlocksTotal"); } } @@ -4957,7 +4991,7 @@ void enterSafeMode(boolean resourcesLow) throws IOException { NameNode.stateChangeLog.info("STATE* Safe mode is ON" + safeMode.getTurnOffTip()); } finally { - writeUnlock(); + writeUnlock("enterSafeMode"); } } @@ -4973,7 +5007,7 @@ void leaveSafeMode() { } safeMode.leave(false); } finally { - writeUnlock(); + writeUnlock("leaveSafeMode"); } } @@ -5008,7 +5042,7 @@ CheckpointSignature rollEditLog() throws IOException { } return getFSImage().rollEditLog(getEffectiveLayoutVersion()); } finally { - writeUnlock(); + writeUnlock("rollEditLog"); } } @@ -5026,7 +5060,7 @@ NamenodeCommand startCheckpoint(NamenodeRegistration backupNode, getEditLog().logSync(); return cmd; } finally { - writeUnlock(); + writeUnlock("startCheckpoint"); } } @@ -5037,7 +5071,7 @@ public void processIncrementalBlockReport(final DatanodeID nodeID, try { blockManager.processIncrementalBlockReport(nodeID, srdb); } finally { - writeUnlock(); + writeUnlock("processIncrementalBlockReport"); } } @@ -5051,7 +5085,7 @@ void endCheckpoint(NamenodeRegistration registration, LOG.info("End checkpoint for " + registration.getAddress()); getFSImage().endCheckpoint(sig); } finally { - readUnlock(); + readUnlock("endCheckpoint"); } } @@ -5446,7 +5480,7 @@ void reportBadBlocks(LocatedBlock[] blocks) throws IOException { } } } finally { - writeUnlock(); + writeUnlock("reportBadBlocks"); } } @@ -5478,7 +5512,7 @@ LocatedBlock updateBlockForPipeline(ExtendedBlock block, locatedBlock = new LocatedBlock(block, new DatanodeInfo[0]); blockManager.setBlockToken(locatedBlock, BlockTokenIdentifier.AccessMode.WRITE); } finally { - writeUnlock(); + writeUnlock("bumpBlockGenerationStamp"); } // Ensure we record the new generation stamp getEditLog().logSync(); @@ -5514,7 +5548,7 @@ void updatePipeline( updatePipelineInternal(clientName, oldBlock, newBlock, newNodes, newStorageIDs, logRetryCache); } finally { - writeUnlock(); + writeUnlock("updatePipeline"); } getEditLog().logSync(); LOG.info("updatePipeline(" + oldBlock.getLocalBlock() + " => " @@ -5582,7 +5616,7 @@ void registerBackupNode(NamenodeRegistration bnReg, bnReg, nnReg); } } finally { - writeUnlock(); + writeUnlock("registerBackupNode"); } } @@ -5607,7 +5641,7 @@ void releaseBackupNode(NamenodeRegistration registration) " node namespaceID = " + registration.getNamespaceID()); getEditLog().releaseBackupStream(registration); } finally { - writeUnlock(); + writeUnlock("releaseBackupNode"); } } @@ -5692,7 +5726,7 @@ Collection listCorruptFileBlocks(String path, } return corruptFiles; } finally { - readUnlock(); + readUnlock("listCorruptFileBlocks"); } } @@ -5776,7 +5810,7 @@ Token getDelegationToken(Text renewer) long expiryTime = dtSecretManager.getTokenExpiryTime(dtId); getEditLog().logGetDelegationToken(dtId, expiryTime); } finally { - writeUnlock(); + writeUnlock("getDelegationToken"); } getEditLog().logSync(); return token; @@ -5810,7 +5844,7 @@ long renewDelegationToken(Token token) id.readFields(in); getEditLog().logRenewDelegationToken(id, expiryTime); } finally { - writeUnlock(); + writeUnlock("renewDelegationToken"); } getEditLog().logSync(); return expiryTime; @@ -5834,7 +5868,7 @@ void cancelDelegationToken(Token token) .cancelToken(token, canceller); getEditLog().logCancelDelegationToken(id); } finally { - writeUnlock(); + writeUnlock("cancelDelegationToken"); } getEditLog().logSync(); } @@ -6498,6 +6532,7 @@ public SnapshotManager getSnapshotManager() { /** Allow snapshot on a directory. */ void allowSnapshot(String path) throws IOException { checkOperation(OperationCategory.WRITE); + final String operationName = "allowSnapshot"; boolean success = false; writeLock(); try { @@ -6507,15 +6542,16 @@ void allowSnapshot(String path) throws IOException { FSDirSnapshotOp.allowSnapshot(dir, snapshotManager, path); success = true; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(success, "allowSnapshot", path, null, null); + logAuditEvent(success, operationName, path, null, null); } /** Disallow snapshot on a directory. */ void disallowSnapshot(String path) throws IOException { checkOperation(OperationCategory.WRITE); + final String operationName = "disallowSnapshot"; boolean success = false; writeLock(); try { @@ -6525,10 +6561,10 @@ void disallowSnapshot(String path) throws IOException { FSDirSnapshotOp.disallowSnapshot(dir, snapshotManager, path); success = true; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(success, "disallowSnapshot", path, null, null); + logAuditEvent(success, operationName, path, null, null); } /** @@ -6538,6 +6574,7 @@ void disallowSnapshot(String path) throws IOException { */ String createSnapshot(String snapshotRoot, String snapshotName, boolean logRetryCache) throws IOException { + final String operationName = "createSnapshot"; String snapshotPath = null; writeLock(); try { @@ -6546,10 +6583,10 @@ String createSnapshot(String snapshotRoot, String snapshotName, snapshotPath = FSDirSnapshotOp.createSnapshot(dir, snapshotManager, snapshotRoot, snapshotName, logRetryCache); } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(snapshotPath != null, "createSnapshot", snapshotRoot, + logAuditEvent(snapshotPath != null, operationName, snapshotRoot, snapshotPath, null); return snapshotPath; } @@ -6565,6 +6602,7 @@ String createSnapshot(String snapshotRoot, String snapshotName, void renameSnapshot( String path, String snapshotOldName, String snapshotNewName, boolean logRetryCache) throws IOException { + final String operationName = "renameSnapshot"; boolean success = false; writeLock(); try { @@ -6574,12 +6612,12 @@ void renameSnapshot( snapshotOldName, snapshotNewName, logRetryCache); success = true; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); String oldSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotOldName); String newSnapshotRoot = Snapshot.getSnapshotPath(path, snapshotNewName); - logAuditEvent(success, "renameSnapshot", oldSnapshotRoot, + logAuditEvent(success, operationName, oldSnapshotRoot, newSnapshotRoot, null); } @@ -6592,6 +6630,7 @@ void renameSnapshot( */ public SnapshottableDirectoryStatus[] getSnapshottableDirListing() throws IOException { + final String operationName = "listSnapshottableDirectory"; SnapshottableDirectoryStatus[] status = null; checkOperation(OperationCategory.READ); boolean success = false; @@ -6601,9 +6640,9 @@ public SnapshottableDirectoryStatus[] getSnapshottableDirListing() status = FSDirSnapshotOp.getSnapshottableDirListing(dir, snapshotManager); success = true; } finally { - readUnlock(); + readUnlock(operationName); } - logAuditEvent(success, "listSnapshottableDirectory", null, null, null); + logAuditEvent(success, operationName, null, null, null); return status; } @@ -6624,6 +6663,7 @@ public SnapshottableDirectoryStatus[] getSnapshottableDirListing() */ SnapshotDiffReport getSnapshotDiffReport(String path, String fromSnapshot, String toSnapshot) throws IOException { + final String operationName = "computeSnapshotDiff"; SnapshotDiffReport diffs = null; checkOperation(OperationCategory.READ); readLock(); @@ -6632,13 +6672,13 @@ SnapshotDiffReport getSnapshotDiffReport(String path, diffs = FSDirSnapshotOp.getSnapshotDiffReport(dir, snapshotManager, path, fromSnapshot, toSnapshot); } finally { - readUnlock(); + readUnlock(operationName); } String fromSnapshotRoot = (fromSnapshot == null || fromSnapshot.isEmpty()) ? path : Snapshot.getSnapshotPath(path, fromSnapshot); String toSnapshotRoot = (toSnapshot == null || toSnapshot.isEmpty()) ? path : Snapshot.getSnapshotPath(path, toSnapshot); - logAuditEvent(diffs != null, "computeSnapshotDiff", fromSnapshotRoot, + logAuditEvent(diffs != null, operationName, fromSnapshotRoot, toSnapshotRoot, null); return diffs; } @@ -6652,6 +6692,7 @@ SnapshotDiffReport getSnapshotDiffReport(String path, */ void deleteSnapshot(String snapshotRoot, String snapshotName, boolean logRetryCache) throws IOException { + final String operationName = "deleteSnapshot"; boolean success = false; writeLock(); BlocksMapUpdateInfo blocksToBeDeleted = null; @@ -6663,7 +6704,7 @@ void deleteSnapshot(String snapshotRoot, String snapshotName, snapshotRoot, snapshotName, logRetryCache); success = true; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); @@ -6674,7 +6715,7 @@ void deleteSnapshot(String snapshotRoot, String snapshotName, } String rootPath = Snapshot.getSnapshotPath(snapshotRoot, snapshotName); - logAuditEvent(success, "deleteSnapshot", rootPath, null, null); + logAuditEvent(success, operationName, rootPath, null, null); } /** @@ -6700,11 +6741,12 @@ RollingUpgradeInfo queryRollingUpgrade() throws IOException { rollingUpgradeInfo.setCreatedRollbackImages(hasRollbackImage); return rollingUpgradeInfo; } finally { - readUnlock(); + readUnlock("queryRollingUpgrade"); } } RollingUpgradeInfo startRollingUpgrade() throws IOException { + final String operationName = "startRollingUpgrade"; checkSuperuserPrivilege(); checkOperation(OperationCategory.WRITE); writeLock(); @@ -6727,12 +6769,12 @@ RollingUpgradeInfo startRollingUpgrade() throws IOException { getFSImage().rollEditLog(getEffectiveLayoutVersion()); } } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); if (auditLog.isInfoEnabled() && isExternalInvocation()) { - logAuditEvent(true, "startRollingUpgrade", null, null, null); + logAuditEvent(true, operationName, null, null, null); } return rollingUpgradeInfo; } @@ -6817,7 +6859,7 @@ public RollingUpgradeInfo.Bean getRollingUpgradeStatus() { } catch (IOException ioe) { LOG.warn("Encountered exception setting Rollback Image", ioe); } finally { - readUnlock(); + readUnlock("getRollingUpgradeStatus"); } return new RollingUpgradeInfo.Bean(upgradeInfo); } @@ -6897,6 +6939,7 @@ void checkRollingUpgrade(String action) throws RollingUpgradeException { } RollingUpgradeInfo finalizeRollingUpgrade() throws IOException { + final String operationName = "finalizeRollingUpgrade"; checkSuperuserPrivilege(); checkOperation(OperationCategory.WRITE); writeLock(); @@ -6917,7 +6960,7 @@ RollingUpgradeInfo finalizeRollingUpgrade() throws IOException { getFSImage().renameCheckpoint(NameNodeFile.IMAGE_ROLLBACK, NameNodeFile.IMAGE); } finally { - writeUnlock(); + writeUnlock(operationName); } if (!haEnabled) { @@ -6926,7 +6969,7 @@ RollingUpgradeInfo finalizeRollingUpgrade() throws IOException { } if (auditLog.isInfoEnabled() && isExternalInvocation()) { - logAuditEvent(true, "finalizeRollingUpgrade", null, null, null); + logAuditEvent(true, operationName, null, null, null); } return rollingUpgradeInfo; } @@ -6939,6 +6982,7 @@ void finalizeRollingUpgradeInternal(long finalizeTime) { long addCacheDirective(CacheDirectiveInfo directive, EnumSet flags, boolean logRetryCache) throws IOException { + final String operationName = "addCacheDirective"; CacheDirectiveInfo effectiveDirective = null; if (!flags.contains(CacheFlag.FORCE)) { cacheManager.waitForRescanIfNeeded(); @@ -6950,7 +6994,7 @@ long addCacheDirective(CacheDirectiveInfo directive, effectiveDirective = FSNDNCacheOp.addCacheDirective(this, cacheManager, directive, flags, logRetryCache); } finally { - writeUnlock(); + writeUnlock(operationName); boolean success = effectiveDirective != null; if (success) { getEditLog().logSync(); @@ -6958,7 +7002,7 @@ long addCacheDirective(CacheDirectiveInfo directive, String effectiveDirectiveStr = effectiveDirective != null ? effectiveDirective.toString() : null; - logAuditEvent(success, "addCacheDirective", effectiveDirectiveStr, + logAuditEvent(success, operationName, effectiveDirectiveStr, null, null); } return effectiveDirective != null ? effectiveDirective.getId() : 0; @@ -6966,6 +7010,7 @@ long addCacheDirective(CacheDirectiveInfo directive, void modifyCacheDirective(CacheDirectiveInfo directive, EnumSet flags, boolean logRetryCache) throws IOException { + final String operationName = "modifyCacheDirective"; boolean success = false; if (!flags.contains(CacheFlag.FORCE)) { cacheManager.waitForRescanIfNeeded(); @@ -6978,17 +7023,18 @@ void modifyCacheDirective(CacheDirectiveInfo directive, logRetryCache); success = true; } finally { - writeUnlock(); + writeUnlock(operationName); if (success) { getEditLog().logSync(); } final String idStr = "{id: " + directive.getId() + "}"; - logAuditEvent(success, "modifyCacheDirective", idStr, + logAuditEvent(success, operationName, idStr, directive.toString(), null); } } void removeCacheDirective(long id, boolean logRetryCache) throws IOException { + final String operationName = "removeCacheDirective"; boolean success = false; writeLock(); try { @@ -6997,9 +7043,9 @@ void removeCacheDirective(long id, boolean logRetryCache) throws IOException { FSNDNCacheOp.removeCacheDirective(this, cacheManager, id, logRetryCache); success = true; } finally { - writeUnlock(); + writeUnlock(operationName); String idStr = "{id: " + Long.toString(id) + "}"; - logAuditEvent(success, "removeCacheDirective", idStr, null, + logAuditEvent(success, operationName, idStr, null, null); } getEditLog().logSync(); @@ -7007,6 +7053,7 @@ void removeCacheDirective(long id, boolean logRetryCache) throws IOException { BatchedListEntries listCacheDirectives( long startId, CacheDirectiveInfo filter) throws IOException { + final String operationName = "listCacheDirectives"; checkOperation(OperationCategory.READ); BatchedListEntries results; cacheManager.waitForRescanIfNeeded(); @@ -7018,8 +7065,8 @@ BatchedListEntries listCacheDirectives( filter); success = true; } finally { - readUnlock(); - logAuditEvent(success, "listCacheDirectives", filter.toString(), null, + readUnlock(operationName); + logAuditEvent(success, operationName, filter.toString(), null, null); } return results; @@ -7027,6 +7074,7 @@ BatchedListEntries listCacheDirectives( void addCachePool(CachePoolInfo req, boolean logRetryCache) throws IOException { + final String operationName = "addCachePool"; writeLock(); boolean success = false; String poolInfoStr = null; @@ -7039,8 +7087,8 @@ void addCachePool(CachePoolInfo req, boolean logRetryCache) poolInfoStr = info.toString(); success = true; } finally { - writeUnlock(); - logAuditEvent(success, "addCachePool", poolInfoStr, null, null); + writeUnlock(operationName); + logAuditEvent(success, operationName, poolInfoStr, null, null); } getEditLog().logSync(); @@ -7048,6 +7096,7 @@ void addCachePool(CachePoolInfo req, boolean logRetryCache) void modifyCachePool(CachePoolInfo req, boolean logRetryCache) throws IOException { + final String operationName = "modifyCachePool"; writeLock(); boolean success = false; try { @@ -7057,10 +7106,10 @@ void modifyCachePool(CachePoolInfo req, boolean logRetryCache) FSNDNCacheOp.modifyCachePool(this, cacheManager, req, logRetryCache); success = true; } finally { - writeUnlock(); + writeUnlock(operationName); String poolNameStr = "{poolName: " + (req == null ? null : req.getPoolName()) + "}"; - logAuditEvent(success, "modifyCachePool", poolNameStr, + logAuditEvent(success, operationName, poolNameStr, req == null ? null : req.toString(), null); } @@ -7069,6 +7118,7 @@ void modifyCachePool(CachePoolInfo req, boolean logRetryCache) void removeCachePool(String cachePoolName, boolean logRetryCache) throws IOException { + final String operationName = "removeCachePool"; writeLock(); boolean success = false; try { @@ -7078,9 +7128,9 @@ void removeCachePool(String cachePoolName, boolean logRetryCache) logRetryCache); success = true; } finally { - writeUnlock(); + writeUnlock(operationName); String poolNameStr = "{poolName: " + cachePoolName + "}"; - logAuditEvent(success, "removeCachePool", poolNameStr, null, null); + logAuditEvent(success, operationName, poolNameStr, null, null); } getEditLog().logSync(); @@ -7088,6 +7138,7 @@ void removeCachePool(String cachePoolName, boolean logRetryCache) BatchedListEntries listCachePools(String prevKey) throws IOException { + final String operationName = "listCachePools"; BatchedListEntries results; checkOperation(OperationCategory.READ); boolean success = false; @@ -7098,14 +7149,15 @@ BatchedListEntries listCachePools(String prevKey) results = FSNDNCacheOp.listCachePools(this, cacheManager, prevKey); success = true; } finally { - readUnlock(); - logAuditEvent(success, "listCachePools", null, null, null); + readUnlock(operationName); + logAuditEvent(success, operationName, null, null, null); } return results; } void modifyAclEntries(final String src, List aclSpec) throws IOException { + final String operationName = "modifyAclEntries"; HdfsFileStatus auditStat = null; checkOperation(OperationCategory.WRITE); writeLock(); @@ -7114,17 +7166,18 @@ void modifyAclEntries(final String src, List aclSpec) checkNameNodeSafeMode("Cannot modify ACL entries on " + src); auditStat = FSDirAclOp.modifyAclEntries(dir, src, aclSpec); } catch (AccessControlException e) { - logAuditEvent(false, "modifyAclEntries", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "modifyAclEntries", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } void removeAclEntries(final String src, List aclSpec) throws IOException { + final String operationName = "removeAclEntries"; checkOperation(OperationCategory.WRITE); HdfsFileStatus auditStat = null; writeLock(); @@ -7133,16 +7186,17 @@ void removeAclEntries(final String src, List aclSpec) checkNameNodeSafeMode("Cannot remove ACL entries on " + src); auditStat = FSDirAclOp.removeAclEntries(dir, src, aclSpec); } catch (AccessControlException e) { - logAuditEvent(false, "removeAclEntries", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "removeAclEntries", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } void removeDefaultAcl(final String src) throws IOException { + final String operationName = "removeDefaultAcl"; HdfsFileStatus auditStat = null; checkOperation(OperationCategory.WRITE); writeLock(); @@ -7151,16 +7205,17 @@ void removeDefaultAcl(final String src) throws IOException { checkNameNodeSafeMode("Cannot remove default ACL entries on " + src); auditStat = FSDirAclOp.removeDefaultAcl(dir, src); } catch (AccessControlException e) { - logAuditEvent(false, "removeDefaultAcl", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "removeDefaultAcl", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } void removeAcl(final String src) throws IOException { + final String operationName = "removeAcl"; HdfsFileStatus auditStat = null; checkOperation(OperationCategory.WRITE); writeLock(); @@ -7169,16 +7224,17 @@ void removeAcl(final String src) throws IOException { checkNameNodeSafeMode("Cannot remove ACL on " + src); auditStat = FSDirAclOp.removeAcl(dir, src); } catch (AccessControlException e) { - logAuditEvent(false, "removeAcl", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "removeAcl", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } void setAcl(final String src, List aclSpec) throws IOException { + final String operationName = "setAcl"; HdfsFileStatus auditStat = null; checkOperation(OperationCategory.WRITE); writeLock(); @@ -7187,16 +7243,17 @@ void setAcl(final String src, List aclSpec) throws IOException { checkNameNodeSafeMode("Cannot set ACL on " + src); auditStat = FSDirAclOp.setAcl(dir, src, aclSpec); } catch (AccessControlException e) { - logAuditEvent(false, "setAcl", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "setAcl", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } AclStatus getAclStatus(String src) throws IOException { + final String operationName = "getAclStatus"; checkOperation(OperationCategory.READ); boolean success = false; readLock(); @@ -7206,8 +7263,8 @@ AclStatus getAclStatus(String src) throws IOException { success = true; return ret; } finally { - readUnlock(); - logAuditEvent(success, "getAclStatus", src); + readUnlock(operationName); + logAuditEvent(success, operationName, src); } } @@ -7225,6 +7282,7 @@ AclStatus getAclStatus(String src) throws IOException { void createEncryptionZone(final String src, final String keyName, boolean logRetryCache) throws IOException, UnresolvedLinkException, SafeModeException, AccessControlException { + final String operationName = "createEncryptionZone"; try { Metadata metadata = FSDirEncryptionZoneOp.ensureKeyIsInitialized(dir, keyName, src); @@ -7240,13 +7298,13 @@ void createEncryptionZone(final String src, final String keyName, resultingStat = FSDirEncryptionZoneOp.createEncryptionZone(dir, src, pc, metadata.getCipher(), keyName, logRetryCache); } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "createEncryptionZone", src, null, resultingStat); + logAuditEvent(true, operationName, src, null, resultingStat); } catch (AccessControlException e) { - logAuditEvent(false, "createEncryptionZone", src); + logAuditEvent(false, operationName, src); throw e; } } @@ -7261,6 +7319,7 @@ void createEncryptionZone(final String src, final String keyName, */ EncryptionZone getEZForPath(final String srcArg) throws AccessControlException, UnresolvedLinkException, IOException { + final String operationName = "getEZForPath"; HdfsFileStatus resultingStat = null; boolean success = false; final FSPermissionChecker pc = getPermissionChecker(); @@ -7274,13 +7333,14 @@ EncryptionZone getEZForPath(final String srcArg) resultingStat = ezForPath.getValue(); return ezForPath.getKey(); } finally { - readUnlock(); - logAuditEvent(success, "getEZForPath", srcArg, null, resultingStat); + readUnlock(operationName); + logAuditEvent(success, operationName, srcArg, null, resultingStat); } } BatchedListEntries listEncryptionZones(long prevId) throws IOException { + final String operationName = "listEncryptionZones"; boolean success = false; checkSuperuserPrivilege(); checkOperation(OperationCategory.READ); @@ -7293,14 +7353,15 @@ BatchedListEntries listEncryptionZones(long prevId) success = true; return ret; } finally { - readUnlock(); - logAuditEvent(success, "listEncryptionZones", null); + readUnlock(operationName); + logAuditEvent(success, operationName, null); } } void setXAttr(String src, XAttr xAttr, EnumSet flag, boolean logRetryCache) throws IOException { + final String operationName = "setXAttr"; HdfsFileStatus auditStat = null; writeLock(); try { @@ -7308,46 +7369,49 @@ void setXAttr(String src, XAttr xAttr, EnumSet flag, checkNameNodeSafeMode("Cannot set XAttr on " + src); auditStat = FSDirXAttrOp.setXAttr(dir, src, xAttr, flag, logRetryCache); } catch (AccessControlException e) { - logAuditEvent(false, "setXAttr", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "setXAttr", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } List getXAttrs(final String src, List xAttrs) throws IOException { + final String operationName = "getXAttrs"; checkOperation(OperationCategory.READ); readLock(); try { checkOperation(OperationCategory.READ); return FSDirXAttrOp.getXAttrs(dir, src, xAttrs); } catch (AccessControlException e) { - logAuditEvent(false, "getXAttrs", src); + logAuditEvent(false, operationName, src); throw e; } finally { - readUnlock(); + readUnlock(operationName); } } List listXAttrs(String src) throws IOException { + final String operationName = "listXAttrs"; checkOperation(OperationCategory.READ); readLock(); try { checkOperation(OperationCategory.READ); return FSDirXAttrOp.listXAttrs(dir, src); } catch (AccessControlException e) { - logAuditEvent(false, "listXAttrs", src); + logAuditEvent(false, operationName, src); throw e; } finally { - readUnlock(); + readUnlock(operationName); } } void removeXAttr(String src, XAttr xAttr, boolean logRetryCache) throws IOException { + final String operationName = "removeXAttr"; HdfsFileStatus auditStat = null; writeLock(); try { @@ -7355,16 +7419,17 @@ void removeXAttr(String src, XAttr xAttr, boolean logRetryCache) checkNameNodeSafeMode("Cannot remove XAttr entry on " + src); auditStat = FSDirXAttrOp.removeXAttr(dir, src, xAttr, logRetryCache); } catch (AccessControlException e) { - logAuditEvent(false, "removeXAttr", src); + logAuditEvent(false, operationName, src); throw e; } finally { - writeUnlock(); + writeUnlock(operationName); } getEditLog().logSync(); - logAuditEvent(true, "removeXAttr", src, null, auditStat); + logAuditEvent(true, operationName, src, null, auditStat); } void checkAccess(String src, FsAction mode) throws IOException { + final String operationName = "checkAccess"; checkOperation(OperationCategory.READ); FSPermissionChecker pc = getPermissionChecker(); readLock(); @@ -7380,10 +7445,10 @@ void checkAccess(String src, FsAction mode) throws IOException { dir.checkPathAccess(pc, iip, mode); } } catch (AccessControlException e) { - logAuditEvent(false, "checkAccess", src); + logAuditEvent(false, operationName, src); throw e; } finally { - readUnlock(); + readUnlock(operationName); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java index 043f5693829..c7dde2ba61c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java @@ -27,11 +27,14 @@ import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.metrics2.lib.MutableRatesWithAggregation; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Timer; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_LOCK_SUPPRESS_WARNING_INTERVAL_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_LOCK_SUPPRESS_WARNING_INTERVAL_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LOCK_DETAILED_METRICS_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LOCK_DETAILED_METRICS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_DEFAULT; @@ -40,11 +43,20 @@ /** * Mimics a ReentrantReadWriteLock but does not directly implement the interface * so more sophisticated locking capabilities and logging/metrics are possible. + * If {@link org.apache.hadoop.hdfs.DFSConfigKeys#DFS_NAMENODE_LOCK_DETAILED_METRICS_KEY} + * is set to be true, metrics will be emitted into the FSNamesystem metrics + * registry for each operation which acquires this lock indicating how long + * the operation held the lock for. Note that if a thread dies, metrics produced + * after the most recent snapshot will be lost due to the use of + * {@link MutableRatesWithAggregation}. However since threads are re-used + * between operations this should not generally be an issue. */ class FSNamesystemLock { @VisibleForTesting protected ReentrantReadWriteLock coarseLock; + private final boolean metricsEnabled; + private final MutableRatesWithAggregation detailedHoldTimeMetrics; private final Timer timer; /** @@ -80,12 +92,19 @@ public Long initialValue() { private final AtomicLong timeStampOfLastReadLockReport = new AtomicLong(0); private final AtomicLong longestReadLockHeldInterval = new AtomicLong(0); - FSNamesystemLock(Configuration conf) { - this(conf, new Timer()); + @VisibleForTesting + static final String OP_NAME_OTHER = "OTHER"; + private static final String READ_LOCK_METRIC_PREFIX = "FSNReadLock"; + private static final String WRITE_LOCK_METRIC_PREFIX = "FSNWriteLock"; + + FSNamesystemLock(Configuration conf, + MutableRatesWithAggregation detailedHoldTimeMetrics) { + this(conf, detailedHoldTimeMetrics, new Timer()); } @VisibleForTesting - FSNamesystemLock(Configuration conf, Timer timer) { + FSNamesystemLock(Configuration conf, + MutableRatesWithAggregation detailedHoldTimeMetrics, Timer timer) { boolean fair = conf.getBoolean("dfs.namenode.fslock.fair", true); FSNamesystem.LOG.info("fsLock is fair: " + fair); this.coarseLock = new ReentrantReadWriteLock(fair); @@ -100,6 +119,12 @@ public Long initialValue() { this.lockSuppressWarningInterval = conf.getTimeDuration( DFS_LOCK_SUPPRESS_WARNING_INTERVAL_KEY, DFS_LOCK_SUPPRESS_WARNING_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); + this.metricsEnabled = conf.getBoolean( + DFS_NAMENODE_LOCK_DETAILED_METRICS_KEY, + DFS_NAMENODE_LOCK_DETAILED_METRICS_DEFAULT); + FSNamesystem.LOG.info("Detailed lock hold time metrics enabled: " + + this.metricsEnabled); + this.detailedHoldTimeMetrics = detailedHoldTimeMetrics; } public void readLock() { @@ -110,12 +135,17 @@ public void readLock() { } public void readUnlock() { + readUnlock(OP_NAME_OTHER); + } + + public void readUnlock(String opName) { final boolean needReport = coarseLock.getReadHoldCount() == 1; final long readLockInterval = timer.monotonicNow() - readLockHeldTimeStamp.get(); coarseLock.readLock().unlock(); if (needReport) { + addMetric(opName, readLockInterval, false); readLockHeldTimeStamp.remove(); } if (needReport && readLockInterval >= this.readLockReportingThreshold) { @@ -164,6 +194,10 @@ public void writeLockInterruptibly() throws InterruptedException { } public void writeUnlock() { + writeUnlock(OP_NAME_OTHER); + } + + public void writeUnlock(String opName) { final boolean needReport = coarseLock.getWriteHoldCount() == 1 && coarseLock.isWriteLockedByCurrentThread(); final long currentTime = timer.monotonicNow(); @@ -191,6 +225,10 @@ public void writeUnlock() { coarseLock.writeLock().unlock(); + if (needReport) { + addMetric(opName, writeLockInterval, true); + } + if (logReport) { FSNamesystem.LOG.info("FSNamesystem write lock held for " + writeLockInterval + " ms via\n" + @@ -227,4 +265,19 @@ public Condition newWriteLockCondition() { public int getQueueLength() { return coarseLock.getQueueLength(); } + + /** + * Add the lock hold time for a recent operation to the metrics. + * @param operationName Name of the operation for which to record the time + * @param value Length of time the lock was held + */ + private void addMetric(String operationName, long value, boolean isWrite) { + if (metricsEnabled) { + String metricName = + (isWrite ? WRITE_LOCK_METRIC_PREFIX : READ_LOCK_METRIC_PREFIX) + + org.apache.commons.lang.StringUtils.capitalize(operationName); + detailedHoldTimeMetrics.add(metricName, value); + } + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java index 69d161948b4..3cc7bd1f06b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java @@ -329,7 +329,7 @@ public void run() { needSync = checkLeases(); } } finally { - fsnamesystem.writeUnlock(); + fsnamesystem.writeUnlock("leaseManager"); // lease reassignments should to be sync'ed. if (needSync) { fsnamesystem.getEditLog().logSync(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java index 3286447cf64..5eb3b2351de 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java @@ -493,7 +493,7 @@ private LocatedBlocks getBlockLocations(String path, HdfsFileStatus file) } catch (FileNotFoundException fnfe) { blocks = null; } finally { - fsn.readUnlock(); + fsn.readUnlock("fsckGetBlockLocations"); } return blocks; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 3c5ae0ef3f6..18ce68886f2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -2630,6 +2630,14 @@ + + dfs.namenode.lock.detailed-metrics.enabled + false + If true, the namenode will keep track of how long various + operations hold the Namesystem lock for and emit this as metrics. + + + dfs.namenode.startup.delay.block.deletion.sec 0 diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemLock.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemLock.java index 08900ecba8f..c97e0f0ac2e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemLock.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemLock.java @@ -21,8 +21,12 @@ import com.google.common.base.Supplier; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.metrics2.MetricsRecordBuilder; +import org.apache.hadoop.metrics2.lib.MetricsRegistry; +import org.apache.hadoop.metrics2.lib.MutableRatesWithAggregation; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils.LogCapturer; +import org.apache.hadoop.test.MetricsAsserts; import org.apache.hadoop.util.FakeTimer; import org.apache.log4j.Level; import org.junit.Test; @@ -36,6 +40,8 @@ import java.util.regex.Pattern; import static org.junit.Assert.*; +import static org.apache.hadoop.test.MetricsAsserts.assertCounter; +import static org.apache.hadoop.test.MetricsAsserts.assertGauge; /** * Tests the FSNamesystemLock, looking at lock compatibilities and @@ -48,17 +54,17 @@ public void testFsLockFairness() throws IOException, InterruptedException{ Configuration conf = new Configuration(); conf.setBoolean("dfs.namenode.fslock.fair", true); - FSNamesystemLock fsnLock = new FSNamesystemLock(conf); + FSNamesystemLock fsnLock = new FSNamesystemLock(conf, null); assertTrue(fsnLock.coarseLock.isFair()); conf.setBoolean("dfs.namenode.fslock.fair", false); - fsnLock = new FSNamesystemLock(conf); + fsnLock = new FSNamesystemLock(conf, null); assertFalse(fsnLock.coarseLock.isFair()); } @Test public void testFSNamesystemLockCompatibility() { - FSNamesystemLock rwLock = new FSNamesystemLock(new Configuration()); + FSNamesystemLock rwLock = new FSNamesystemLock(new Configuration(), null); assertEquals(0, rwLock.getReadHoldCount()); rwLock.readLock(); @@ -98,7 +104,7 @@ public void testFSLockGetWaiterCount() throws InterruptedException { final CountDownLatch latch = new CountDownLatch(threadCount); final Configuration conf = new Configuration(); conf.setBoolean("dfs.namenode.fslock.fair", true); - final FSNamesystemLock rwLock = new FSNamesystemLock(conf); + final FSNamesystemLock rwLock = new FSNamesystemLock(conf, null); rwLock.writeLock(); ExecutorService helper = Executors.newFixedThreadPool(threadCount); @@ -141,7 +147,7 @@ public void testFSWriteLockLongHoldingReport() throws Exception { writeLockSuppressWarningInterval, TimeUnit.MILLISECONDS); final FakeTimer timer = new FakeTimer(); - final FSNamesystemLock fsnLock = new FSNamesystemLock(conf, timer); + final FSNamesystemLock fsnLock = new FSNamesystemLock(conf, null, timer); timer.advance(writeLockSuppressWarningInterval); LogCapturer logs = LogCapturer.captureLogs(FSNamesystem.LOG); @@ -216,7 +222,7 @@ public void testFSReadLockLongHoldingReport() throws Exception { readLockSuppressWarningInterval, TimeUnit.MILLISECONDS); final FakeTimer timer = new FakeTimer(); - final FSNamesystemLock fsnLock = new FSNamesystemLock(conf, timer); + final FSNamesystemLock fsnLock = new FSNamesystemLock(conf, null, timer); timer.advance(readLockSuppressWarningInterval); LogCapturer logs = LogCapturer.captureLogs(FSNamesystem.LOG); @@ -303,7 +309,7 @@ public void run() { t2.join(); // Look for the differentiating class names in the stack trace String stackTracePatternString = - String.format("INFO.+%s(.+\n){4}\\Q%%s\\E\\.run", readLockLogStmt); + String.format("INFO.+%s(.+\n){5}\\Q%%s\\E\\.run", readLockLogStmt); Pattern t1Pattern = Pattern.compile( String.format(stackTracePatternString, t1.getClass().getName())); assertTrue(t1Pattern.matcher(logs.getOutput()).find()); @@ -314,4 +320,43 @@ public void run() { "Number of suppressed read-lock reports: 2")); } + @Test + public void testDetailedHoldMetrics() throws Exception { + Configuration conf = new Configuration(); + conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_LOCK_DETAILED_METRICS_KEY, true); + FakeTimer timer = new FakeTimer(); + MetricsRegistry registry = new MetricsRegistry("Test"); + MutableRatesWithAggregation rates = + registry.newRatesWithAggregation("Test"); + FSNamesystemLock fsLock = new FSNamesystemLock(conf, rates, timer); + + fsLock.readLock(); + timer.advance(1); + fsLock.readUnlock("foo"); + fsLock.readLock(); + timer.advance(2); + fsLock.readUnlock("foo"); + + fsLock.readLock(); + timer.advance(1); + fsLock.readLock(); + timer.advance(1); + fsLock.readUnlock("bar"); + fsLock.readUnlock("bar"); + + fsLock.writeLock(); + timer.advance(1); + fsLock.writeUnlock("baz"); + + MetricsRecordBuilder rb = MetricsAsserts.mockMetricsRecordBuilder(); + rates.snapshot(rb, true); + + assertGauge("FSNReadLockFooAvgTime", 1.5, rb); + assertCounter("FSNReadLockFooNumOps", 2L, rb); + assertGauge("FSNReadLockBarAvgTime", 2.0, rb); + assertCounter("FSNReadLockBarNumOps", 1L, rb); + assertGauge("FSNWriteLockBazAvgTime", 1.0, rb); + assertCounter("FSNWriteLockBazNumOps", 1L, rb); + } + } \ No newline at end of file diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/MiniHadoopClusterManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/MiniHadoopClusterManager.java index 2e8ba5e6270..a989357d899 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/MiniHadoopClusterManager.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/MiniHadoopClusterManager.java @@ -150,6 +150,7 @@ private void sleepForever() { */ public void start() throws IOException, FileNotFoundException, URISyntaxException { + conf.setBoolean("dfs.namenode.lock.detailed-metrics.enabled", true); if (!noDFS) { dfs = new MiniDFSCluster.Builder(conf).nameNodePort(nnPort) .numDataNodes(numDataNodes).startupOption(dfsOpts).build();