HDFS-7773. Additional metrics in HDFS to be accessed via jmx. Contributed by Anu Engineer.

This commit is contained in:
cnauroth 2015-02-20 12:37:48 -08:00
parent 8c6ae0d619
commit 02e7dec79d
8 changed files with 149 additions and 14 deletions

View File

@ -191,6 +191,7 @@ Each metrics record contains tags such as ProcessName, SessionId, and Hostname a
| `GetImageAvgTime` | Average fsimage download time in milliseconds |
| `PutImageNumOps` | Total number of fsimage uploads to SecondaryNameNode |
| `PutImageAvgTime` | Average fsimage upload time in milliseconds |
| `TotalFileOps`| Total number of file operations performed |
FSNamesystem
------------
@ -314,6 +315,10 @@ Each metrics record contains tags such as SessionId and Hostname as additional i
| `SendDataPacketBlockedOnNetworkNanosAvgTime` | Average waiting time of sending packets in nanoseconds |
| `SendDataPacketTransferNanosNumOps` | Total number of sending packets |
| `SendDataPacketTransferNanosAvgTime` | Average transfer time of sending packets in nanoseconds |
| `TotalWriteTime`| Total number of milliseconds spent on write operation |
| `TotalReadTime` | Total number of milliseconds spent on read operation |
| `RemoteBytesRead` | Number of bytes read by remote clients |
| `RemoteBytesWritten` | Number of bytes written by remote clients |
yarn context
============

View File

@ -663,6 +663,9 @@ Release 2.7.0 - UNRELEASED
HDFS-7772. Document hdfs balancer -exclude/-include option in
HDFSCommands.html (Xiaoyu Yao via cnauroth)
HDFS-7773. Additional metrics in HDFS to be accessed via jmx.
(Anu Engineer via cnauroth)
OPTIMIZATIONS
HDFS-7454. Reduce memory footprint for AclEntries in NameNode.

View File

@ -658,6 +658,7 @@ class BlockReceiver implements Closeable {
replicaInfo.setLastChecksumAndDataLen(offsetInBlock, lastCrc);
datanode.metrics.incrBytesWritten(len);
datanode.metrics.incrTotalWriteTime(duration);
manageWriterOsCache(offsetInBlock);
}

View File

@ -86,6 +86,7 @@ import org.apache.hadoop.util.DataChecksum;
import com.google.common.base.Preconditions;
import com.google.protobuf.ByteString;
import org.apache.hadoop.util.Time;
/**
@ -480,7 +481,7 @@ class DataXceiver extends Receiver implements Runnable {
final boolean sendChecksum,
final CachingStrategy cachingStrategy) throws IOException {
previousOpClientName = clientName;
long read = 0;
OutputStream baseStream = getOutputStream();
DataOutputStream out = new DataOutputStream(new BufferedOutputStream(
baseStream, HdfsConstants.SMALL_BUFFER_SIZE));
@ -515,8 +516,9 @@ class DataXceiver extends Receiver implements Runnable {
// send op status
writeSuccessWithChecksumInfo(blockSender, new DataOutputStream(getOutputStream()));
long read = blockSender.sendBlock(out, baseStream, null); // send data
long beginRead = Time.monotonicNow();
read = blockSender.sendBlock(out, baseStream, null); // send data
long duration = Time.monotonicNow() - beginRead;
if (blockSender.didSendEntireByteRange()) {
// If we sent the entire range, then we should expect the client
// to respond with a Status enum.
@ -539,6 +541,7 @@ class DataXceiver extends Receiver implements Runnable {
}
datanode.metrics.incrBytesRead((int) read);
datanode.metrics.incrBlocksRead();
datanode.metrics.incrTotalReadTime(duration);
} catch ( SocketException ignored ) {
if (LOG.isTraceEnabled()) {
LOG.trace(dnR + ":Ignoring exception while serving " + block + " to " +
@ -563,7 +566,7 @@ class DataXceiver extends Receiver implements Runnable {
//update metrics
datanode.metrics.addReadBlockOp(elapsed());
datanode.metrics.incrReadsFromClient(peer.isLocal());
datanode.metrics.incrReadsFromClient(peer.isLocal(), read);
}
@Override
@ -590,7 +593,7 @@ class DataXceiver extends Receiver implements Runnable {
final boolean isClient = !isDatanode;
final boolean isTransfer = stage == BlockConstructionStage.TRANSFER_RBW
|| stage == BlockConstructionStage.TRANSFER_FINALIZED;
long size = 0;
// check single target for transfer-RBW/Finalized
if (isTransfer && targets.length > 0) {
throw new IOException(stage + " does not support multiple targets "
@ -796,7 +799,9 @@ class DataXceiver extends Receiver implements Runnable {
+ localAddress + " of size " + block.getNumBytes());
}
if(isClient) {
size = block.getNumBytes();
}
} catch (IOException ioe) {
LOG.info("opWriteBlock " + block + " received exception " + ioe);
incrDatanodeNetworkErrors();
@ -813,7 +818,7 @@ class DataXceiver extends Receiver implements Runnable {
//update metrics
datanode.metrics.addWriteBlockOp(elapsed());
datanode.metrics.incrWritesFromClient(peer.isLocal());
datanode.metrics.incrWritesFromClient(peer.isLocal(), size);
}
@Override
@ -993,12 +998,15 @@ class DataXceiver extends Receiver implements Runnable {
// send status first
writeSuccessWithChecksumInfo(blockSender, reply);
// send block content to the target
long read = blockSender.sendBlock(reply, baseStream,
dataXceiverServer.balanceThrottler);
long beginRead = Time.monotonicNow();
// send block content to the target
long read = blockSender.sendBlock(reply, baseStream,
dataXceiverServer.balanceThrottler);
long duration = Time.monotonicNow() - beginRead;
datanode.metrics.incrBytesRead((int) read);
datanode.metrics.incrBlocksRead();
datanode.metrics.incrTotalReadTime(duration);
LOG.info("Copied " + block + " to " + peer.getRemoteAddressString());
} catch (IOException ioe) {

View File

@ -50,7 +50,11 @@ import org.apache.hadoop.metrics2.source.JvmMetrics;
public class DataNodeMetrics {
@Metric MutableCounterLong bytesWritten;
@Metric("Milliseconds spent writing")
MutableCounterLong totalWriteTime;
@Metric MutableCounterLong bytesRead;
@Metric("Milliseconds spent reading")
MutableCounterLong totalReadTime;
@Metric MutableCounterLong blocksWritten;
@Metric MutableCounterLong blocksRead;
@Metric MutableCounterLong blocksReplicated;
@ -64,6 +68,10 @@ public class DataNodeMetrics {
@Metric MutableCounterLong writesFromLocalClient;
@Metric MutableCounterLong writesFromRemoteClient;
@Metric MutableCounterLong blocksGetLocalPathInfo;
@Metric("Bytes read by remote client")
MutableCounterLong remoteBytesRead;
@Metric("Bytes written by remote client")
MutableCounterLong remoteBytesWritten;
// RamDisk metrics on read/write
@Metric MutableCounterLong ramDiskBlocksWrite;
@ -262,6 +270,15 @@ public class DataNodeMetrics {
fsyncCount.incr();
}
public void incrTotalWriteTime(long timeTaken) {
totalWriteTime.incr(timeTaken);
}
public void incrTotalReadTime(long timeTaken) {
totalReadTime.incr(timeTaken);
}
public void addPacketAckRoundTripTimeNanos(long latencyNanos) {
packetAckRoundTripTimeNanos.add(latencyNanos);
for (MutableQuantiles q : packetAckRoundTripTimeNanosQuantiles) {
@ -287,12 +304,23 @@ public class DataNodeMetrics {
DefaultMetricsSystem.shutdown();
}
public void incrWritesFromClient(boolean local) {
(local ? writesFromLocalClient : writesFromRemoteClient).incr();
public void incrWritesFromClient(boolean local, long size) {
if(local) {
writesFromLocalClient.incr();
} else {
writesFromRemoteClient.incr();
remoteBytesWritten.incr(size);
}
}
public void incrReadsFromClient(boolean local) {
(local ? readsFromLocalClient : readsFromRemoteClient).incr();
public void incrReadsFromClient(boolean local, long size) {
if (local) {
readsFromLocalClient.incr();
} else {
readsFromRemoteClient.incr();
remoteBytesRead.incr(size);
}
}
public void incrVolumeFailures() {

View File

@ -77,6 +77,31 @@ public class NameNodeMetrics {
@Metric("Number of blockReports from individual storages")
MutableCounterLong storageBlockReportOps;
@Metric("Number of file system operations")
public long totalFileOps(){
return
getBlockLocations.value() +
createFileOps.value() +
filesAppended.value() +
addBlockOps.value() +
getAdditionalDatanodeOps.value() +
filesRenamed.value() +
filesTruncated.value() +
deleteFileOps.value() +
getListingOps.value() +
fileInfoOps.value() +
getLinkTargetOps.value() +
createSnapshotOps.value() +
deleteSnapshotOps.value() +
allowSnapshotOps.value() +
disallowSnapshotOps.value() +
renameSnapshotOps.value() +
listSnapshottableDirOps.value() +
createSymlinkOps.value() +
snapshotDiffReportOps.value();
}
@Metric("Journal transactions") MutableRate transactions;
@Metric("Journal syncs") MutableRate syncs;
final MutableQuantiles[] syncsQuantiles;

View File

@ -47,6 +47,7 @@ import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.util.Time;
import org.junit.Test;
import org.mockito.Mockito;
@ -246,4 +247,48 @@ public class TestDataNodeMetrics {
DataNodeFaultInjector.instance = new DataNodeFaultInjector();
}
}
/**
* This function ensures that writing causes TotalWritetime to increment
* and reading causes totalReadTime to move.
* @throws Exception
*/
@Test
public void testDataNodeTimeSpend() throws Exception {
Configuration conf = new HdfsConfiguration();
SimulatedFSDataset.setFactory(conf);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
try {
FileSystem fs = cluster.getFileSystem();
List<DataNode> datanodes = cluster.getDataNodes();
assertEquals(datanodes.size(), 1);
DataNode datanode = datanodes.get(0);
MetricsRecordBuilder rb = getMetrics(datanode.getMetrics().name());
final long LONG_FILE_LEN = 1024 * 1024 * 10;
long startWriteValue = getLongCounter("TotalWriteTime", rb);
long startReadValue = getLongCounter("TotalReadTime", rb);
for (int x =0; x < 50; x++) {
DFSTestUtil.createFile(fs, new Path("/time.txt."+ x),
LONG_FILE_LEN, (short) 1, Time.monotonicNow());
}
for (int x =0; x < 50; x++) {
String s = DFSTestUtil.readFile(fs, new Path("/time.txt." + x));
}
MetricsRecordBuilder rbNew = getMetrics(datanode.getMetrics().name());
long endWriteValue = getLongCounter("TotalWriteTime", rbNew);
long endReadValue = getLongCounter("TotalReadTime", rbNew);
assertTrue(endReadValue > startReadValue);
assertTrue(endWriteValue > startWriteValue);
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
}

View File

@ -455,4 +455,24 @@ public class TestNameNodeMetrics {
assertQuantileGauges("Syncs1s", rb);
assertQuantileGauges("BlockReport1s", rb);
}
/**
* Test NN ReadOps Count and WriteOps Count
*/
@Test
public void testReadWriteOps() throws Exception {
MetricsRecordBuilder rb = getMetrics(NN_METRICS);
long startWriteCounter = MetricsAsserts.getLongCounter("TransactionsNumOps",
rb);
Path file1_Path = new Path(TEST_ROOT_DIR_PATH, "ReadData.dat");
//Perform create file operation
createFile(file1_Path, 1024 * 1024,(short)2);
// Perform read file operation on earlier created file
readFile(fs, file1_Path);
MetricsRecordBuilder rbNew = getMetrics(NN_METRICS);
assertTrue(MetricsAsserts.getLongCounter("TransactionsNumOps", rbNew) >
startWriteCounter);
}
}