diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java index 1e5852b5985..5bc1252dfc6 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java @@ -196,6 +196,10 @@ public abstract class GenericTestUtils { logger.removeAppender(appender); } + + public void clearOutput() { + sw.getBuffer().setLength(0); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 617c345196a..f2d5f7c4ca7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1547,6 +1547,9 @@ Release 2.8.0 - UNRELEASED HDFS-9139. Enable parallel JUnit tests for HDFS Pre-commit (Chris Nauroth and Vinayakumar B via vinayakumarb) + HDFS-9145. Tracking methods that hold FSNamesytemLock for too long. + (Mingliang Liu via wheat9) + BUG FIXES HDFS-7501. TransactionsSinceLastCheckpoint can be negative on SBNs. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index a5b6c777cee..9d3f5828cbb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -1458,6 +1458,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, return Util.stringCollectionAsURIs(dirNames); } + /** Threshold (ms) for long holding write lock report. */ + static final short WRITELOCK_REPORTING_THRESHOLD = 1000; + /** Last time stamp for write lock. Keep the longest one for multi-entrance.*/ + private long writeLockHeldTimeStamp; + @Override public void readLock() { this.fsLock.readLock().lock(); @@ -1469,14 +1474,30 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, @Override public void writeLock() { this.fsLock.writeLock().lock(); + if (fsLock.getWriteHoldCount() == 1) { + writeLockHeldTimeStamp = monotonicNow(); + } } @Override public void writeLockInterruptibly() throws InterruptedException { this.fsLock.writeLock().lockInterruptibly(); + if (fsLock.getWriteHoldCount() == 1) { + writeLockHeldTimeStamp = monotonicNow(); + } } @Override public void writeUnlock() { + final boolean needReport = fsLock.getWriteHoldCount() == 1 && + fsLock.isWriteLockedByCurrentThread(); this.fsLock.writeLock().unlock(); + + if (needReport) { + long writeLockInterval = monotonicNow() - writeLockHeldTimeStamp; + if (writeLockInterval >= WRITELOCK_REPORTING_THRESHOLD) { + LOG.info("FSNamesystem write lock held for " + writeLockInterval + + " ms via\n" + StringUtils.getStackTrace(Thread.currentThread())); + } + } } @Override public boolean hasWriteLock() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java index d533cf6d5b8..15fc40e4fe7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java @@ -39,6 +39,9 @@ import org.apache.hadoop.hdfs.server.namenode.FSNamesystem.SafeModeInfo; import org.apache.hadoop.hdfs.server.namenode.ha.HAContext; import org.apache.hadoop.hdfs.server.namenode.ha.HAState; import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; +import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.GenericTestUtils.LogCapturer; +import org.apache.log4j.Level; import org.junit.After; import org.junit.Assert; import org.junit.Test; @@ -268,6 +271,59 @@ public class TestFSNamesystem { threadCount, rwLock.getQueueLength()); } + /** + * Test when FSNamesystem lock is held for a long time, logger will report it. + */ + @Test(timeout=45000) + public void testFSLockLongHoldingReport() throws Exception { + Configuration conf = new Configuration(); + FSImage fsImage = Mockito.mock(FSImage.class); + FSEditLog fsEditLog = Mockito.mock(FSEditLog.class); + Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog); + FSNamesystem fsn = new FSNamesystem(conf, fsImage); + + LogCapturer logs = LogCapturer.captureLogs(FSNamesystem.LOG); + GenericTestUtils.setLogLevel(FSNamesystem.LOG, Level.INFO); + + // Don't report if the write lock is held for a short time + fsn.writeLock(); + Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2); + fsn.writeUnlock(); + assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName())); + + + // Report if the write lock is held for a long time + fsn.writeLock(); + Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD + 100); + logs.clearOutput(); + fsn.writeUnlock(); + assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName())); + + // Report if the write lock is held (interruptibly) for a long time + fsn.writeLockInterruptibly(); + Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD + 100); + logs.clearOutput(); + fsn.writeUnlock(); + assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName())); + + // Report if it's held for a long time when re-entering write lock + fsn.writeLock(); + Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2 + 1); + fsn.writeLockInterruptibly(); + Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2 + 1); + fsn.writeLock(); + Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2); + logs.clearOutput(); + fsn.writeUnlock(); + assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName())); + logs.clearOutput(); + fsn.writeUnlock(); + assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName())); + logs.clearOutput(); + fsn.writeUnlock(); + assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName())); + } + @Test public void testSafemodeReplicationConf() throws IOException { Configuration conf = new Configuration();