HDFS-11615. FSNamesystemLock metrics can be inaccurate due to millisecond precision. Contributed by Erik Krogen.

(cherry picked from commit ad49098eb3)
(cherry picked from commit 051ab77806)
(cherry picked from commit 07047335a3)
This commit is contained in:
Zhe Zhang 2017-04-17 16:22:20 -07:00
parent fac5bdc023
commit ccd24f0917
4 changed files with 90 additions and 66 deletions

View File

@ -225,7 +225,7 @@ Release 2.7.4 - UNRELEASED
(Eric Krogen via aajisaka) (Eric Krogen via aajisaka)
HDFS-11352. Potential deadlock in NN when failing over. HDFS-11352. Potential deadlock in NN when failing over.
(Eric Krogen via aajisaka) (Erik Krogen via aajisaka)
HDFS-11379. DFSInputStream may infinite loop requesting block locations. Contributed by Daryn Sharp. HDFS-11379. DFSInputStream may infinite loop requesting block locations. Contributed by Daryn Sharp.
@ -235,6 +235,9 @@ Release 2.7.4 - UNRELEASED
HDFS-11486. Client close() should not fail fast if the last block is being HDFS-11486. Client close() should not fail fast if the last block is being
decommissioned. Contributed by Wei-Chiu Chuang and Yiqun Lin. decommissioned. Contributed by Wei-Chiu Chuang and Yiqun Lin.
HDFS-11615. FSNamesystemLock metrics can be inaccurate due to millisecond precision.
(Erik Krogen via zhz)
Release 2.7.3 - 2016-08-25 Release 2.7.3 - 2016-08-25
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -1,4 +1,3 @@
/** /**
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
@ -43,11 +42,15 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_WRITE_LOCK_REPOR
/** /**
* Mimics a ReentrantReadWriteLock but does not directly implement the interface * Mimics a ReentrantReadWriteLock but does not directly implement the interface
* so more sophisticated locking capabilities and logging/metrics are possible. * so more sophisticated locking capabilities and logging/metrics are possible.
* If {@link org.apache.hadoop.hdfs.DFSConfigKeys#DFS_NAMENODE_LOCK_DETAILED_METRICS_KEY} * {@link org.apache.hadoop.hdfs.DFSConfigKeys#DFS_NAMENODE_LOCK_DETAILED_METRICS_KEY}
* is set to be true, metrics will be emitted into the FSNamesystem metrics * to be true, metrics will be emitted into the FSNamesystem metrics registry
* registry for each operation which acquires this lock indicating how long * for each operation which acquires this lock indicating how long the operation
* the operation held the lock for. Note that if a thread dies, metrics produced * held the lock for. These metrics have names of the form
* after the most recent snapshot will be lost due to the use of * FSN(Read|Write)LockNanosOperationName, where OperationName denotes the name
* of the operation that initiated the lock hold (this will be OTHER for certain
* uncategorized operations) and they export the hold time values in
* nanoseconds. Note that if a thread dies, metrics produced after the
* most recent snapshot will be lost due to the use of
* {@link MutableRatesWithAggregation}. However since threads are re-used * {@link MutableRatesWithAggregation}. However since threads are re-used
* between operations this should not generally be an issue. * between operations this should not generally be an issue.
*/ */
@ -63,24 +66,26 @@ class FSNamesystemLock {
* Log statements about long lock hold times will not be produced more * Log statements about long lock hold times will not be produced more
* frequently than this interval. * frequently than this interval.
*/ */
private final long lockSuppressWarningInterval; private final long lockSuppressWarningIntervalMs;
/** Threshold (ms) for long holding write lock report. */ /** Threshold (ms) for long holding write lock report. */
private final long writeLockReportingThreshold; private final long writeLockReportingThresholdMs;
/** Last time stamp for write lock. Keep the longest one for multi-entrance.*/ /** Last time stamp for write lock. Keep the longest one for multi-entrance.*/
private long writeLockHeldTimeStamp; private long writeLockHeldTimeStampNanos;
private int numWriteLockWarningsSuppressed = 0; private int numWriteLockWarningsSuppressed = 0;
private long timeStampOfLastWriteLockReport = 0; /** Time stamp (ms) of the last time a write lock report was written. */
private long longestWriteLockHeldInterval = 0; private long timeStampOfLastWriteLockReportMs = 0;
/** Longest time (ms) a write lock was held since the last report. */
private long longestWriteLockHeldIntervalMs = 0;
/** Threshold (ms) for long holding read lock report. */ /** Threshold (ms) for long holding read lock report. */
private final long readLockReportingThreshold; private final long readLockReportingThresholdMs;
/** /**
* Last time stamp for read lock. Keep the longest one for * Last time stamp for read lock. Keep the longest one for
* multi-entrance. This is ThreadLocal since there could be * multi-entrance. This is ThreadLocal since there could be
* many read locks held simultaneously. * many read locks held simultaneously.
*/ */
private final ThreadLocal<Long> readLockHeldTimeStamp = private final ThreadLocal<Long> readLockHeldTimeStampNanos =
new ThreadLocal<Long>() { new ThreadLocal<Long>() {
@Override @Override
public Long initialValue() { public Long initialValue() {
@ -89,13 +94,16 @@ class FSNamesystemLock {
}; };
private final AtomicInteger numReadLockWarningsSuppressed = private final AtomicInteger numReadLockWarningsSuppressed =
new AtomicInteger(0); new AtomicInteger(0);
private final AtomicLong timeStampOfLastReadLockReport = new AtomicLong(0); /** Time stamp (ms) of the last time a read lock report was written. */
private final AtomicLong longestReadLockHeldInterval = new AtomicLong(0); private final AtomicLong timeStampOfLastReadLockReportMs = new AtomicLong(0);
/** Longest time (ms) a read lock was held since the last report. */
private final AtomicLong longestReadLockHeldIntervalMs = new AtomicLong(0);
@VisibleForTesting @VisibleForTesting
static final String OP_NAME_OTHER = "OTHER"; static final String OP_NAME_OTHER = "OTHER";
private static final String READ_LOCK_METRIC_PREFIX = "FSNReadLock"; private static final String READ_LOCK_METRIC_PREFIX = "FSNReadLock";
private static final String WRITE_LOCK_METRIC_PREFIX = "FSNWriteLock"; private static final String WRITE_LOCK_METRIC_PREFIX = "FSNWriteLock";
private static final String LOCK_METRIC_SUFFIX = "Nanos";
FSNamesystemLock(Configuration conf, FSNamesystemLock(Configuration conf,
MutableRatesWithAggregation detailedHoldTimeMetrics) { MutableRatesWithAggregation detailedHoldTimeMetrics) {
@ -110,13 +118,13 @@ class FSNamesystemLock {
this.coarseLock = new ReentrantReadWriteLock(fair); this.coarseLock = new ReentrantReadWriteLock(fair);
this.timer = timer; this.timer = timer;
this.writeLockReportingThreshold = conf.getLong( this.writeLockReportingThresholdMs = conf.getLong(
DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY, DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY,
DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_DEFAULT); DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_DEFAULT);
this.readLockReportingThreshold = conf.getLong( this.readLockReportingThresholdMs = conf.getLong(
DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_KEY, DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_KEY,
DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_DEFAULT); DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_DEFAULT);
this.lockSuppressWarningInterval = conf.getTimeDuration( this.lockSuppressWarningIntervalMs = conf.getTimeDuration(
DFS_LOCK_SUPPRESS_WARNING_INTERVAL_KEY, DFS_LOCK_SUPPRESS_WARNING_INTERVAL_KEY,
DFS_LOCK_SUPPRESS_WARNING_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); DFS_LOCK_SUPPRESS_WARNING_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS);
this.metricsEnabled = conf.getBoolean( this.metricsEnabled = conf.getBoolean(
@ -130,7 +138,7 @@ class FSNamesystemLock {
public void readLock() { public void readLock() {
coarseLock.readLock().lock(); coarseLock.readLock().lock();
if (coarseLock.getReadHoldCount() == 1) { if (coarseLock.getReadHoldCount() == 1) {
readLockHeldTimeStamp.set(timer.monotonicNow()); readLockHeldTimeStampNanos.set(timer.monotonicNowNanos());
} }
} }
@ -140,56 +148,59 @@ class FSNamesystemLock {
public void readUnlock(String opName) { public void readUnlock(String opName) {
final boolean needReport = coarseLock.getReadHoldCount() == 1; final boolean needReport = coarseLock.getReadHoldCount() == 1;
final long readLockInterval = final long readLockIntervalNanos =
timer.monotonicNow() - readLockHeldTimeStamp.get(); timer.monotonicNowNanos() - readLockHeldTimeStampNanos.get();
coarseLock.readLock().unlock(); coarseLock.readLock().unlock();
if (needReport) { if (needReport) {
addMetric(opName, readLockInterval, false); addMetric(opName, readLockIntervalNanos, false);
readLockHeldTimeStamp.remove(); readLockHeldTimeStampNanos.remove();
} }
if (needReport && readLockInterval >= this.readLockReportingThreshold) { final long readLockIntervalMs =
TimeUnit.NANOSECONDS.toMillis(readLockIntervalNanos);
if (needReport && readLockIntervalMs >= this.readLockReportingThresholdMs) {
long localLongestReadLock; long localLongestReadLock;
do { do {
localLongestReadLock = longestReadLockHeldInterval.get(); localLongestReadLock = longestReadLockHeldIntervalMs.get();
} while (localLongestReadLock - readLockInterval < 0 && } while (localLongestReadLock - readLockIntervalMs < 0 &&
!longestReadLockHeldInterval.compareAndSet(localLongestReadLock, !longestReadLockHeldIntervalMs.compareAndSet(localLongestReadLock,
readLockInterval)); readLockIntervalMs));
long localTimeStampOfLastReadLockReport; long localTimeStampOfLastReadLockReport;
long now; long nowMs;
do { do {
now = timer.monotonicNow(); nowMs = timer.monotonicNow();
localTimeStampOfLastReadLockReport = localTimeStampOfLastReadLockReport =
timeStampOfLastReadLockReport.get(); timeStampOfLastReadLockReportMs.get();
if (now - localTimeStampOfLastReadLockReport < if (nowMs - localTimeStampOfLastReadLockReport <
lockSuppressWarningInterval) { lockSuppressWarningIntervalMs) {
numReadLockWarningsSuppressed.incrementAndGet(); numReadLockWarningsSuppressed.incrementAndGet();
return; return;
} }
} while (!timeStampOfLastReadLockReport.compareAndSet( } while (!timeStampOfLastReadLockReportMs.compareAndSet(
localTimeStampOfLastReadLockReport, now)); localTimeStampOfLastReadLockReport, nowMs));
int numSuppressedWarnings = numReadLockWarningsSuppressed.getAndSet(0); int numSuppressedWarnings = numReadLockWarningsSuppressed.getAndSet(0);
long longestLockHeldInterval = longestReadLockHeldInterval.getAndSet(0); long longestLockHeldIntervalMs =
longestReadLockHeldIntervalMs.getAndSet(0);
FSNamesystem.LOG.info("FSNamesystem read lock held for " + FSNamesystem.LOG.info("FSNamesystem read lock held for " +
readLockInterval + " ms via\n" + readLockIntervalMs + " ms via\n" +
StringUtils.getStackTrace(Thread.currentThread()) + StringUtils.getStackTrace(Thread.currentThread()) +
"\tNumber of suppressed read-lock reports: " + numSuppressedWarnings + "\tNumber of suppressed read-lock reports: " + numSuppressedWarnings +
"\n\tLongest read-lock held interval: " + longestLockHeldInterval); "\n\tLongest read-lock held interval: " + longestLockHeldIntervalMs);
} }
} }
public void writeLock() { public void writeLock() {
coarseLock.writeLock().lock(); coarseLock.writeLock().lock();
if (coarseLock.getWriteHoldCount() == 1) { if (coarseLock.getWriteHoldCount() == 1) {
writeLockHeldTimeStamp = timer.monotonicNow(); writeLockHeldTimeStampNanos = timer.monotonicNowNanos();
} }
} }
public void writeLockInterruptibly() throws InterruptedException { public void writeLockInterruptibly() throws InterruptedException {
coarseLock.writeLock().lockInterruptibly(); coarseLock.writeLock().lockInterruptibly();
if (coarseLock.getWriteHoldCount() == 1) { if (coarseLock.getWriteHoldCount() == 1) {
writeLockHeldTimeStamp = timer.monotonicNow(); writeLockHeldTimeStampNanos = timer.monotonicNowNanos();
} }
} }
@ -200,24 +211,29 @@ class FSNamesystemLock {
public void writeUnlock(String opName) { public void writeUnlock(String opName) {
final boolean needReport = coarseLock.getWriteHoldCount() == 1 && final boolean needReport = coarseLock.getWriteHoldCount() == 1 &&
coarseLock.isWriteLockedByCurrentThread(); coarseLock.isWriteLockedByCurrentThread();
final long currentTime = timer.monotonicNow(); final long currentTimeNanos = timer.monotonicNowNanos();
final long writeLockInterval = currentTime - writeLockHeldTimeStamp; final long writeLockIntervalNanos =
currentTimeNanos - writeLockHeldTimeStampNanos;
final long currentTimeMs = TimeUnit.NANOSECONDS.toMillis(currentTimeNanos);
final long writeLockIntervalMs =
TimeUnit.NANOSECONDS.toMillis(writeLockIntervalNanos);
boolean logReport = false; boolean logReport = false;
int numSuppressedWarnings = 0; int numSuppressedWarnings = 0;
long longestLockHeldInterval = 0; long longestLockHeldIntervalMs = 0;
if (needReport && writeLockInterval >= this.writeLockReportingThreshold) { if (needReport &&
if (writeLockInterval > longestWriteLockHeldInterval) { writeLockIntervalMs >= this.writeLockReportingThresholdMs) {
longestWriteLockHeldInterval = writeLockInterval; if (writeLockIntervalMs > longestWriteLockHeldIntervalMs) {
longestWriteLockHeldIntervalMs = writeLockIntervalMs;
} }
if (currentTime - timeStampOfLastWriteLockReport > if (currentTimeMs - timeStampOfLastWriteLockReportMs >
this.lockSuppressWarningInterval) { this.lockSuppressWarningIntervalMs) {
logReport = true; logReport = true;
numSuppressedWarnings = numWriteLockWarningsSuppressed; numSuppressedWarnings = numWriteLockWarningsSuppressed;
numWriteLockWarningsSuppressed = 0; numWriteLockWarningsSuppressed = 0;
longestLockHeldInterval = longestWriteLockHeldInterval; longestLockHeldIntervalMs = longestWriteLockHeldIntervalMs;
longestWriteLockHeldInterval = 0; longestWriteLockHeldIntervalMs = 0;
timeStampOfLastWriteLockReport = currentTime; timeStampOfLastWriteLockReportMs = currentTimeMs;
} else { } else {
numWriteLockWarningsSuppressed++; numWriteLockWarningsSuppressed++;
} }
@ -226,16 +242,16 @@ class FSNamesystemLock {
coarseLock.writeLock().unlock(); coarseLock.writeLock().unlock();
if (needReport) { if (needReport) {
addMetric(opName, writeLockInterval, true); addMetric(opName, writeLockIntervalNanos, true);
} }
if (logReport) { if (logReport) {
FSNamesystem.LOG.info("FSNamesystem write lock held for " + FSNamesystem.LOG.info("FSNamesystem write lock held for " +
writeLockInterval + " ms via\n" + writeLockIntervalMs + " ms via\n" +
StringUtils.getStackTrace(Thread.currentThread()) + StringUtils.getStackTrace(Thread.currentThread()) +
"\tNumber of suppressed write-lock reports: " + "\tNumber of suppressed write-lock reports: " +
numSuppressedWarnings + "\n\tLongest write-lock held interval: " + numSuppressedWarnings + "\n\tLongest write-lock held interval: " +
longestLockHeldInterval); longestLockHeldIntervalMs);
} }
} }
@ -269,13 +285,14 @@ class FSNamesystemLock {
/** /**
* Add the lock hold time for a recent operation to the metrics. * Add the lock hold time for a recent operation to the metrics.
* @param operationName Name of the operation for which to record the time * @param operationName Name of the operation for which to record the time
* @param value Length of time the lock was held * @param value Length of time the lock was held (nanoseconds)
*/ */
private void addMetric(String operationName, long value, boolean isWrite) { private void addMetric(String operationName, long value, boolean isWrite) {
if (metricsEnabled) { if (metricsEnabled) {
String metricName = String metricName =
(isWrite ? WRITE_LOCK_METRIC_PREFIX : READ_LOCK_METRIC_PREFIX) + (isWrite ? WRITE_LOCK_METRIC_PREFIX : READ_LOCK_METRIC_PREFIX) +
org.apache.commons.lang.StringUtils.capitalize(operationName); org.apache.commons.lang.StringUtils.capitalize(operationName) +
LOCK_METRIC_SUFFIX;
detailedHoldTimeMetrics.add(metricName, value); detailedHoldTimeMetrics.add(metricName, value);
} }
} }

View File

@ -2358,7 +2358,11 @@
<name>dfs.namenode.lock.detailed-metrics.enabled</name> <name>dfs.namenode.lock.detailed-metrics.enabled</name>
<value>false</value> <value>false</value>
<description>If true, the namenode will keep track of how long various <description>If true, the namenode will keep track of how long various
operations hold the Namesystem lock for and emit this as metrics. operations hold the Namesystem lock for and emit this as metrics. These
metrics have names of the form FSN(Read|Write)LockNanosOperationName,
where OperationName denotes the name of the operation that initiated the
lock hold (this will be OTHER for certain uncategorized operations) and
they export the hold time values in nanoseconds.
</description> </description>
</property> </property>

View File

@ -331,10 +331,10 @@ public class TestFSNamesystemLock {
FSNamesystemLock fsLock = new FSNamesystemLock(conf, rates, timer); FSNamesystemLock fsLock = new FSNamesystemLock(conf, rates, timer);
fsLock.readLock(); fsLock.readLock();
timer.advance(1); timer.advanceNanos(1200000);
fsLock.readUnlock("foo"); fsLock.readUnlock("foo");
fsLock.readLock(); fsLock.readLock();
timer.advance(2); timer.advanceNanos(2400000);
fsLock.readUnlock("foo"); fsLock.readUnlock("foo");
fsLock.readLock(); fsLock.readLock();
@ -351,12 +351,12 @@ public class TestFSNamesystemLock {
MetricsRecordBuilder rb = MetricsAsserts.mockMetricsRecordBuilder(); MetricsRecordBuilder rb = MetricsAsserts.mockMetricsRecordBuilder();
rates.snapshot(rb, true); rates.snapshot(rb, true);
assertGauge("FSNReadLockFooAvgTime", 1.5, rb); assertGauge("FSNReadLockFooNanosAvgTime", 1800000.0, rb);
assertCounter("FSNReadLockFooNumOps", 2L, rb); assertCounter("FSNReadLockFooNanosNumOps", 2L, rb);
assertGauge("FSNReadLockBarAvgTime", 2.0, rb); assertGauge("FSNReadLockBarNanosAvgTime", 2000000.0, rb);
assertCounter("FSNReadLockBarNumOps", 1L, rb); assertCounter("FSNReadLockBarNanosNumOps", 1L, rb);
assertGauge("FSNWriteLockBazAvgTime", 1.0, rb); assertGauge("FSNWriteLockBazNanosAvgTime", 1000000.0, rb);
assertCounter("FSNWriteLockBazNumOps", 1L, rb); assertCounter("FSNWriteLockBazNanosNumOps", 1L, rb);
} }
} }