From c498ef4b6dc7960f1f4699cafea598b3d71ca885 Mon Sep 17 00:00:00 2001 From: tomscut Date: Mon, 1 Mar 2021 16:35:12 -0800 Subject: [PATCH] HDFS-15808. Add metrics for FSNamesystem read/write lock hold long time. (#2668) Contributed by tomscut. (cherry picked from commit 9cb51bf106802c78b1400fba9f1d1c7e772dd5e7) --- .../hdfs/server/namenode/FSNamesystem.java | 14 ++++++++ .../server/namenode/FSNamesystemLock.java | 34 +++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 6a6405b6566..fae094ad7f5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -4758,6 +4758,20 @@ public int getFsLockQueueLength() { return fsLock.getQueueLength(); } + @Metric(value = {"ReadLockLongHoldCount", "The number of time " + + "the read lock has been held for longer than the threshold"}, + type = Metric.Type.COUNTER) + public long getNumOfReadLockLongHold() { + return fsLock.getNumOfReadLockLongHold(); + } + + @Metric(value = {"WriteLockLongHoldCount", "The number of time " + + "the write lock has been held for longer than the threshold"}, + type = Metric.Type.COUNTER) + public long getNumOfWriteLockLongHold() { + return fsLock.getNumOfWriteLockLongHold(); + } + int getNumberOfDatanodes(DatanodeReportType type) { readLock(); try { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java index 811cd231c1b..6502c4c2b4d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java @@ -108,6 +108,16 @@ public Long initialValue() { private final AtomicReference longestReadLockHeldInfo = new AtomicReference<>(new LockHeldInfo(0, 0, null)); private LockHeldInfo longestWriteLockHeldInfo = new LockHeldInfo(0, 0, null); + /** + * The number of time the read lock + * has been held longer than the threshold. + */ + private final AtomicLong numReadLockLongHold = new AtomicLong(0); + /** + * The number of time the write lock + * has been held for longer than the threshold. + */ + private final AtomicLong numWriteLockLongHold = new AtomicLong(0); @VisibleForTesting static final String OP_NAME_OTHER = "OTHER"; @@ -176,6 +186,7 @@ public void readUnlock(String opName) { final long readLockIntervalMs = TimeUnit.NANOSECONDS.toMillis(readLockIntervalNanos); if (needReport && readLockIntervalMs >= this.readLockReportingThresholdMs) { + numReadLockLongHold.incrementAndGet(); LockHeldInfo localLockHeldInfo; do { localLockHeldInfo = longestReadLockHeldInfo.get(); @@ -253,6 +264,7 @@ public void writeUnlock(String opName, boolean suppressWriteLockReport) { LogAction logAction = LogThrottlingHelper.DO_NOT_LOG; if (needReport && writeLockIntervalMs >= this.writeLockReportingThresholdMs) { + numWriteLockLongHold.incrementAndGet(); if (longestWriteLockHeldInfo.getIntervalMs() < writeLockIntervalMs) { longestWriteLockHeldInfo = new LockHeldInfo(currentTimeMs, writeLockIntervalMs, @@ -302,6 +314,28 @@ public Condition newWriteLockCondition() { return coarseLock.writeLock().newCondition(); } + /** + * Returns the number of time the read lock + * has been held longer than the threshold. + * + * @return long - Number of time the read lock + * has been held longer than the threshold + */ + public long getNumOfReadLockLongHold() { + return numReadLockLongHold.get(); + } + + /** + * Returns the number of time the write lock + * has been held longer than the threshold. + * + * @return long - Number of time the write lock + * has been held longer than the threshold. + */ + public long getNumOfWriteLockLongHold() { + return numWriteLockLongHold.get(); + } + /** * Returns the QueueLength of waiting threads. *