HDFS-8883. NameNode Metrics : Add FSNameSystem lock Queue Length. Contributed by Anu Engineer.

This commit is contained in:
Xiaoyu Yao 2015-08-17 10:15:56 -07:00
parent 13604bd5f1
commit a7862d5fe4
7 changed files with 88 additions and 2 deletions

View File

@ -236,6 +236,7 @@ Each metrics record contains tags such as HAState and Hostname as additional inf
| `NumActiveClients` | Current number of active clients holding lease | | `NumActiveClients` | Current number of active clients holding lease |
| `HAState` | (HA-only) Current state of the NameNode: initializing or active or standby or stopping state | | `HAState` | (HA-only) Current state of the NameNode: initializing or active or standby or stopping state |
| `FSState` | Current state of the file system: Safemode or Operational | | `FSState` | Current state of the file system: Safemode or Operational |
| `LockQueueLength` | Number of threads waiting to acquire FSNameSystem lock |
JournalNode JournalNode
----------- -----------

View File

@ -788,6 +788,9 @@ Release 2.8.0 - UNRELEASED
HDFS-8824. Do not use small blocks for balancing the cluster. (szetszwo) HDFS-8824. Do not use small blocks for balancing the cluster. (szetszwo)
HDFS-8883. NameNode Metrics : Add FSNameSystem lock Queue Length.
(Anu Engineer via xyao)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than

View File

@ -3911,6 +3911,21 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
return dir.ezManager.getNumEncryptionZones(); return dir.ezManager.getNumEncryptionZones();
} }
/**
* Returns the length of the wait Queue for the FSNameSystemLock.
*
* A larger number here indicates lots of threads are waiting for
* FSNameSystemLock.
*
* @return int - Number of Threads waiting to acquire FSNameSystemLock
*/
@Override
@Metric({"LockQueueLength", "Number of threads waiting to " +
"acquire FSNameSystemLock"})
public int getFsLockQueueLength() {
return fsLock.getQueueLength();
}
int getNumberOfDatanodes(DatanodeReportType type) { int getNumberOfDatanodes(DatanodeReportType type) {
readLock(); readLock();
try { try {

View File

@ -59,4 +59,15 @@ class FSNamesystemLock implements ReadWriteLock {
public boolean isWriteLockedByCurrentThread() { public boolean isWriteLockedByCurrentThread() {
return coarseLock.isWriteLockedByCurrentThread(); return coarseLock.isWriteLockedByCurrentThread();
} }
/**
* Returns the QueueLength of waiting threads.
*
* A larger number indicates greater lock contention.
*
* @return int - Number of threads waiting on this lock
*/
public int getQueueLength() {
return coarseLock.getQueueLength();
}
} }

View File

@ -189,4 +189,13 @@ public interface FSNamesystemMBean {
* Return the number of encryption zones in the system. * Return the number of encryption zones in the system.
*/ */
int getNumEncryptionZones(); int getNumEncryptionZones();
/**
* Returns the length of the wait Queue for the FSNameSystemLock.
*
* A larger number here indicates lots of threads are waiting for
* FSNameSystemLock.
* @return int - Number of Threads waiting to acquire FSNameSystemLock
*/
int getFsLockQueueLength();
} }

View File

@ -37,10 +37,15 @@ import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
import org.apache.hadoop.hdfs.server.namenode.ha.HAState; import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
import org.junit.After; import org.junit.After;
import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
import org.mockito.Mockito; import org.mockito.Mockito;
import org.mockito.internal.util.reflection.Whitebox; import org.mockito.internal.util.reflection.Whitebox;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class TestFSNamesystem { public class TestFSNamesystem {
@After @After
@ -233,4 +238,27 @@ public class TestFSNamesystem {
assertEquals(-63, assertEquals(-63,
FSNamesystem.getEffectiveLayoutVersion(false, -63, -61, -63)); FSNamesystem.getEffectiveLayoutVersion(false, -63, -61, -63));
} }
@Test
public void testFSLockGetWaiterCount() throws InterruptedException {
final int threadCount = 3;
final CountDownLatch latch = new CountDownLatch(threadCount);
final FSNamesystemLock rwLock = new FSNamesystemLock(true);
rwLock.writeLock().lock();
ExecutorService helper = Executors.newFixedThreadPool(threadCount);
for (int x = 0; x < threadCount; x++) {
helper.execute(new Runnable() {
@Override
public void run() {
latch.countDown();
rwLock.readLock().lock();
}
});
}
latch.await();
Assert.assertEquals("Expected number of blocked thread not found",
threadCount, rwLock.getQueueLength());
}
} }

View File

@ -17,8 +17,8 @@
*/ */
package org.apache.hadoop.hdfs.server.namenode; package org.apache.hadoop.hdfs.server.namenode;
import org.apache.commons.io.FileUtils;
import com.google.common.util.concurrent.Uninterruptibles; import com.google.common.util.concurrent.Uninterruptibles;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.FileUtil;
@ -173,7 +173,7 @@ public class TestNameNodeMXBean {
// This will cause the first dir to fail. // This will cause the first dir to fail.
File failedNameDir = new File(nameDirUris.iterator().next()); File failedNameDir = new File(nameDirUris.iterator().next());
assertEquals(0, FileUtil.chmod( assertEquals(0, FileUtil.chmod(
new File(failedNameDir, "current").getAbsolutePath(), "000")); new File(failedNameDir, "current").getAbsolutePath(), "000"));
cluster.getNameNodeRpc().rollEditLog(); cluster.getNameNodeRpc().rollEditLog();
nameDirStatuses = (String) (mbs.getAttribute(mxbeanName, nameDirStatuses = (String) (mbs.getAttribute(mxbeanName,
@ -376,4 +376,23 @@ public class TestNameNodeMXBean {
} }
} }
} }
@Test(timeout = 120000)
public void testQueueLength() throws Exception {
final Configuration conf = new Configuration();
MiniDFSCluster cluster = null;
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build();
cluster.waitActive();
MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
ObjectName mxbeanNameFs =
new ObjectName("Hadoop:service=NameNode,name=FSNamesystem");
int queueLength = (int) mbs.getAttribute(mxbeanNameFs, "LockQueueLength");
assertEquals(0, queueLength);
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
} }