HDFS-9145. Tracking methods that hold FSNamesytemLock for too long. Contributed by Mingliang Liu.

This commit is contained in:
Haohui Mai 2015-10-13 13:20:11 -07:00
parent 59e9980117
commit d1e1925bf6
4 changed files with 84 additions and 0 deletions

View File

@ -196,6 +196,10 @@ public void stopCapturing() {
logger.removeAppender(appender);
}
public void clearOutput() {
sw.getBuffer().setLength(0);
}
}

View File

@ -1547,6 +1547,9 @@ Release 2.8.0 - UNRELEASED
HDFS-9139. Enable parallel JUnit tests for HDFS Pre-commit
(Chris Nauroth and Vinayakumar B via vinayakumarb)
HDFS-9145. Tracking methods that hold FSNamesytemLock for too long.
(Mingliang Liu via wheat9)
BUG FIXES
HDFS-7501. TransactionsSinceLastCheckpoint can be negative on SBNs.

View File

@ -1458,6 +1458,11 @@ public static List<URI> getSharedEditsDirs(Configuration conf) {
return Util.stringCollectionAsURIs(dirNames);
}
/** Threshold (ms) for long holding write lock report. */
static final short WRITELOCK_REPORTING_THRESHOLD = 1000;
/** Last time stamp for write lock. Keep the longest one for multi-entrance.*/
private long writeLockHeldTimeStamp;
@Override
public void readLock() {
this.fsLock.readLock().lock();
@ -1469,14 +1474,30 @@ public void readUnlock() {
@Override
public void writeLock() {
this.fsLock.writeLock().lock();
if (fsLock.getWriteHoldCount() == 1) {
writeLockHeldTimeStamp = monotonicNow();
}
}
@Override
public void writeLockInterruptibly() throws InterruptedException {
this.fsLock.writeLock().lockInterruptibly();
if (fsLock.getWriteHoldCount() == 1) {
writeLockHeldTimeStamp = monotonicNow();
}
}
@Override
public void writeUnlock() {
final boolean needReport = fsLock.getWriteHoldCount() == 1 &&
fsLock.isWriteLockedByCurrentThread();
this.fsLock.writeLock().unlock();
if (needReport) {
long writeLockInterval = monotonicNow() - writeLockHeldTimeStamp;
if (writeLockInterval >= WRITELOCK_REPORTING_THRESHOLD) {
LOG.info("FSNamesystem write lock held for " + writeLockInterval +
" ms via\n" + StringUtils.getStackTrace(Thread.currentThread()));
}
}
}
@Override
public boolean hasWriteLock() {

View File

@ -39,6 +39,9 @@
import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
import org.apache.log4j.Level;
import org.junit.After;
import org.junit.Assert;
import org.junit.Test;
@ -268,6 +271,59 @@ public void run() {
threadCount, rwLock.getQueueLength());
}
/**
* Test when FSNamesystem lock is held for a long time, logger will report it.
*/
@Test(timeout=45000)
public void testFSLockLongHoldingReport() throws Exception {
Configuration conf = new Configuration();
FSImage fsImage = Mockito.mock(FSImage.class);
FSEditLog fsEditLog = Mockito.mock(FSEditLog.class);
Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog);
FSNamesystem fsn = new FSNamesystem(conf, fsImage);
LogCapturer logs = LogCapturer.captureLogs(FSNamesystem.LOG);
GenericTestUtils.setLogLevel(FSNamesystem.LOG, Level.INFO);
// Don't report if the write lock is held for a short time
fsn.writeLock();
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2);
fsn.writeUnlock();
assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
// Report if the write lock is held for a long time
fsn.writeLock();
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD + 100);
logs.clearOutput();
fsn.writeUnlock();
assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()));
// Report if the write lock is held (interruptibly) for a long time
fsn.writeLockInterruptibly();
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD + 100);
logs.clearOutput();
fsn.writeUnlock();
assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()));
// Report if it's held for a long time when re-entering write lock
fsn.writeLock();
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2 + 1);
fsn.writeLockInterruptibly();
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2 + 1);
fsn.writeLock();
Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2);
logs.clearOutput();
fsn.writeUnlock();
assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
logs.clearOutput();
fsn.writeUnlock();
assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
logs.clearOutput();
fsn.writeUnlock();
assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()));
}
@Test
public void testSafemodeReplicationConf() throws IOException {
Configuration conf = new Configuration();