HDFS-7773. Additional metrics in HDFS to be accessed via jmx. Contributed by Anu Engineer.
This commit is contained in:
parent
8c6ae0d619
commit
02e7dec79d
|
@ -191,6 +191,7 @@ Each metrics record contains tags such as ProcessName, SessionId, and Hostname a
|
||||||
| `GetImageAvgTime` | Average fsimage download time in milliseconds |
|
| `GetImageAvgTime` | Average fsimage download time in milliseconds |
|
||||||
| `PutImageNumOps` | Total number of fsimage uploads to SecondaryNameNode |
|
| `PutImageNumOps` | Total number of fsimage uploads to SecondaryNameNode |
|
||||||
| `PutImageAvgTime` | Average fsimage upload time in milliseconds |
|
| `PutImageAvgTime` | Average fsimage upload time in milliseconds |
|
||||||
|
| `TotalFileOps`| Total number of file operations performed |
|
||||||
|
|
||||||
FSNamesystem
|
FSNamesystem
|
||||||
------------
|
------------
|
||||||
|
@ -314,6 +315,10 @@ Each metrics record contains tags such as SessionId and Hostname as additional i
|
||||||
| `SendDataPacketBlockedOnNetworkNanosAvgTime` | Average waiting time of sending packets in nanoseconds |
|
| `SendDataPacketBlockedOnNetworkNanosAvgTime` | Average waiting time of sending packets in nanoseconds |
|
||||||
| `SendDataPacketTransferNanosNumOps` | Total number of sending packets |
|
| `SendDataPacketTransferNanosNumOps` | Total number of sending packets |
|
||||||
| `SendDataPacketTransferNanosAvgTime` | Average transfer time of sending packets in nanoseconds |
|
| `SendDataPacketTransferNanosAvgTime` | Average transfer time of sending packets in nanoseconds |
|
||||||
|
| `TotalWriteTime`| Total number of milliseconds spent on write operation |
|
||||||
|
| `TotalReadTime` | Total number of milliseconds spent on read operation |
|
||||||
|
| `RemoteBytesRead` | Number of bytes read by remote clients |
|
||||||
|
| `RemoteBytesWritten` | Number of bytes written by remote clients |
|
||||||
|
|
||||||
yarn context
|
yarn context
|
||||||
============
|
============
|
||||||
|
|
|
@ -663,6 +663,9 @@ Release 2.7.0 - UNRELEASED
|
||||||
HDFS-7772. Document hdfs balancer -exclude/-include option in
|
HDFS-7772. Document hdfs balancer -exclude/-include option in
|
||||||
HDFSCommands.html (Xiaoyu Yao via cnauroth)
|
HDFSCommands.html (Xiaoyu Yao via cnauroth)
|
||||||
|
|
||||||
|
HDFS-7773. Additional metrics in HDFS to be accessed via jmx.
|
||||||
|
(Anu Engineer via cnauroth)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
HDFS-7454. Reduce memory footprint for AclEntries in NameNode.
|
HDFS-7454. Reduce memory footprint for AclEntries in NameNode.
|
||||||
|
|
|
@ -658,6 +658,7 @@ class BlockReceiver implements Closeable {
|
||||||
replicaInfo.setLastChecksumAndDataLen(offsetInBlock, lastCrc);
|
replicaInfo.setLastChecksumAndDataLen(offsetInBlock, lastCrc);
|
||||||
|
|
||||||
datanode.metrics.incrBytesWritten(len);
|
datanode.metrics.incrBytesWritten(len);
|
||||||
|
datanode.metrics.incrTotalWriteTime(duration);
|
||||||
|
|
||||||
manageWriterOsCache(offsetInBlock);
|
manageWriterOsCache(offsetInBlock);
|
||||||
}
|
}
|
||||||
|
|
|
@ -86,6 +86,7 @@ import org.apache.hadoop.util.DataChecksum;
|
||||||
|
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import com.google.protobuf.ByteString;
|
import com.google.protobuf.ByteString;
|
||||||
|
import org.apache.hadoop.util.Time;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -480,7 +481,7 @@ class DataXceiver extends Receiver implements Runnable {
|
||||||
final boolean sendChecksum,
|
final boolean sendChecksum,
|
||||||
final CachingStrategy cachingStrategy) throws IOException {
|
final CachingStrategy cachingStrategy) throws IOException {
|
||||||
previousOpClientName = clientName;
|
previousOpClientName = clientName;
|
||||||
|
long read = 0;
|
||||||
OutputStream baseStream = getOutputStream();
|
OutputStream baseStream = getOutputStream();
|
||||||
DataOutputStream out = new DataOutputStream(new BufferedOutputStream(
|
DataOutputStream out = new DataOutputStream(new BufferedOutputStream(
|
||||||
baseStream, HdfsConstants.SMALL_BUFFER_SIZE));
|
baseStream, HdfsConstants.SMALL_BUFFER_SIZE));
|
||||||
|
@ -515,8 +516,9 @@ class DataXceiver extends Receiver implements Runnable {
|
||||||
// send op status
|
// send op status
|
||||||
writeSuccessWithChecksumInfo(blockSender, new DataOutputStream(getOutputStream()));
|
writeSuccessWithChecksumInfo(blockSender, new DataOutputStream(getOutputStream()));
|
||||||
|
|
||||||
long read = blockSender.sendBlock(out, baseStream, null); // send data
|
long beginRead = Time.monotonicNow();
|
||||||
|
read = blockSender.sendBlock(out, baseStream, null); // send data
|
||||||
|
long duration = Time.monotonicNow() - beginRead;
|
||||||
if (blockSender.didSendEntireByteRange()) {
|
if (blockSender.didSendEntireByteRange()) {
|
||||||
// If we sent the entire range, then we should expect the client
|
// If we sent the entire range, then we should expect the client
|
||||||
// to respond with a Status enum.
|
// to respond with a Status enum.
|
||||||
|
@ -539,6 +541,7 @@ class DataXceiver extends Receiver implements Runnable {
|
||||||
}
|
}
|
||||||
datanode.metrics.incrBytesRead((int) read);
|
datanode.metrics.incrBytesRead((int) read);
|
||||||
datanode.metrics.incrBlocksRead();
|
datanode.metrics.incrBlocksRead();
|
||||||
|
datanode.metrics.incrTotalReadTime(duration);
|
||||||
} catch ( SocketException ignored ) {
|
} catch ( SocketException ignored ) {
|
||||||
if (LOG.isTraceEnabled()) {
|
if (LOG.isTraceEnabled()) {
|
||||||
LOG.trace(dnR + ":Ignoring exception while serving " + block + " to " +
|
LOG.trace(dnR + ":Ignoring exception while serving " + block + " to " +
|
||||||
|
@ -563,7 +566,7 @@ class DataXceiver extends Receiver implements Runnable {
|
||||||
|
|
||||||
//update metrics
|
//update metrics
|
||||||
datanode.metrics.addReadBlockOp(elapsed());
|
datanode.metrics.addReadBlockOp(elapsed());
|
||||||
datanode.metrics.incrReadsFromClient(peer.isLocal());
|
datanode.metrics.incrReadsFromClient(peer.isLocal(), read);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -590,7 +593,7 @@ class DataXceiver extends Receiver implements Runnable {
|
||||||
final boolean isClient = !isDatanode;
|
final boolean isClient = !isDatanode;
|
||||||
final boolean isTransfer = stage == BlockConstructionStage.TRANSFER_RBW
|
final boolean isTransfer = stage == BlockConstructionStage.TRANSFER_RBW
|
||||||
|| stage == BlockConstructionStage.TRANSFER_FINALIZED;
|
|| stage == BlockConstructionStage.TRANSFER_FINALIZED;
|
||||||
|
long size = 0;
|
||||||
// check single target for transfer-RBW/Finalized
|
// check single target for transfer-RBW/Finalized
|
||||||
if (isTransfer && targets.length > 0) {
|
if (isTransfer && targets.length > 0) {
|
||||||
throw new IOException(stage + " does not support multiple targets "
|
throw new IOException(stage + " does not support multiple targets "
|
||||||
|
@ -796,7 +799,9 @@ class DataXceiver extends Receiver implements Runnable {
|
||||||
+ localAddress + " of size " + block.getNumBytes());
|
+ localAddress + " of size " + block.getNumBytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(isClient) {
|
||||||
|
size = block.getNumBytes();
|
||||||
|
}
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
LOG.info("opWriteBlock " + block + " received exception " + ioe);
|
LOG.info("opWriteBlock " + block + " received exception " + ioe);
|
||||||
incrDatanodeNetworkErrors();
|
incrDatanodeNetworkErrors();
|
||||||
|
@ -813,7 +818,7 @@ class DataXceiver extends Receiver implements Runnable {
|
||||||
|
|
||||||
//update metrics
|
//update metrics
|
||||||
datanode.metrics.addWriteBlockOp(elapsed());
|
datanode.metrics.addWriteBlockOp(elapsed());
|
||||||
datanode.metrics.incrWritesFromClient(peer.isLocal());
|
datanode.metrics.incrWritesFromClient(peer.isLocal(), size);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -993,12 +998,15 @@ class DataXceiver extends Receiver implements Runnable {
|
||||||
|
|
||||||
// send status first
|
// send status first
|
||||||
writeSuccessWithChecksumInfo(blockSender, reply);
|
writeSuccessWithChecksumInfo(blockSender, reply);
|
||||||
|
|
||||||
|
long beginRead = Time.monotonicNow();
|
||||||
// send block content to the target
|
// send block content to the target
|
||||||
long read = blockSender.sendBlock(reply, baseStream,
|
long read = blockSender.sendBlock(reply, baseStream,
|
||||||
dataXceiverServer.balanceThrottler);
|
dataXceiverServer.balanceThrottler);
|
||||||
|
long duration = Time.monotonicNow() - beginRead;
|
||||||
datanode.metrics.incrBytesRead((int) read);
|
datanode.metrics.incrBytesRead((int) read);
|
||||||
datanode.metrics.incrBlocksRead();
|
datanode.metrics.incrBlocksRead();
|
||||||
|
datanode.metrics.incrTotalReadTime(duration);
|
||||||
|
|
||||||
LOG.info("Copied " + block + " to " + peer.getRemoteAddressString());
|
LOG.info("Copied " + block + " to " + peer.getRemoteAddressString());
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
|
|
|
@ -50,7 +50,11 @@ import org.apache.hadoop.metrics2.source.JvmMetrics;
|
||||||
public class DataNodeMetrics {
|
public class DataNodeMetrics {
|
||||||
|
|
||||||
@Metric MutableCounterLong bytesWritten;
|
@Metric MutableCounterLong bytesWritten;
|
||||||
|
@Metric("Milliseconds spent writing")
|
||||||
|
MutableCounterLong totalWriteTime;
|
||||||
@Metric MutableCounterLong bytesRead;
|
@Metric MutableCounterLong bytesRead;
|
||||||
|
@Metric("Milliseconds spent reading")
|
||||||
|
MutableCounterLong totalReadTime;
|
||||||
@Metric MutableCounterLong blocksWritten;
|
@Metric MutableCounterLong blocksWritten;
|
||||||
@Metric MutableCounterLong blocksRead;
|
@Metric MutableCounterLong blocksRead;
|
||||||
@Metric MutableCounterLong blocksReplicated;
|
@Metric MutableCounterLong blocksReplicated;
|
||||||
|
@ -64,6 +68,10 @@ public class DataNodeMetrics {
|
||||||
@Metric MutableCounterLong writesFromLocalClient;
|
@Metric MutableCounterLong writesFromLocalClient;
|
||||||
@Metric MutableCounterLong writesFromRemoteClient;
|
@Metric MutableCounterLong writesFromRemoteClient;
|
||||||
@Metric MutableCounterLong blocksGetLocalPathInfo;
|
@Metric MutableCounterLong blocksGetLocalPathInfo;
|
||||||
|
@Metric("Bytes read by remote client")
|
||||||
|
MutableCounterLong remoteBytesRead;
|
||||||
|
@Metric("Bytes written by remote client")
|
||||||
|
MutableCounterLong remoteBytesWritten;
|
||||||
|
|
||||||
// RamDisk metrics on read/write
|
// RamDisk metrics on read/write
|
||||||
@Metric MutableCounterLong ramDiskBlocksWrite;
|
@Metric MutableCounterLong ramDiskBlocksWrite;
|
||||||
|
@ -262,6 +270,15 @@ public class DataNodeMetrics {
|
||||||
fsyncCount.incr();
|
fsyncCount.incr();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void incrTotalWriteTime(long timeTaken) {
|
||||||
|
totalWriteTime.incr(timeTaken);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrTotalReadTime(long timeTaken) {
|
||||||
|
totalReadTime.incr(timeTaken);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public void addPacketAckRoundTripTimeNanos(long latencyNanos) {
|
public void addPacketAckRoundTripTimeNanos(long latencyNanos) {
|
||||||
packetAckRoundTripTimeNanos.add(latencyNanos);
|
packetAckRoundTripTimeNanos.add(latencyNanos);
|
||||||
for (MutableQuantiles q : packetAckRoundTripTimeNanosQuantiles) {
|
for (MutableQuantiles q : packetAckRoundTripTimeNanosQuantiles) {
|
||||||
|
@ -287,12 +304,23 @@ public class DataNodeMetrics {
|
||||||
DefaultMetricsSystem.shutdown();
|
DefaultMetricsSystem.shutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void incrWritesFromClient(boolean local) {
|
public void incrWritesFromClient(boolean local, long size) {
|
||||||
(local ? writesFromLocalClient : writesFromRemoteClient).incr();
|
if(local) {
|
||||||
|
writesFromLocalClient.incr();
|
||||||
|
} else {
|
||||||
|
writesFromRemoteClient.incr();
|
||||||
|
remoteBytesWritten.incr(size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void incrReadsFromClient(boolean local) {
|
public void incrReadsFromClient(boolean local, long size) {
|
||||||
(local ? readsFromLocalClient : readsFromRemoteClient).incr();
|
|
||||||
|
if (local) {
|
||||||
|
readsFromLocalClient.incr();
|
||||||
|
} else {
|
||||||
|
readsFromRemoteClient.incr();
|
||||||
|
remoteBytesRead.incr(size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void incrVolumeFailures() {
|
public void incrVolumeFailures() {
|
||||||
|
|
|
@ -77,6 +77,31 @@ public class NameNodeMetrics {
|
||||||
@Metric("Number of blockReports from individual storages")
|
@Metric("Number of blockReports from individual storages")
|
||||||
MutableCounterLong storageBlockReportOps;
|
MutableCounterLong storageBlockReportOps;
|
||||||
|
|
||||||
|
@Metric("Number of file system operations")
|
||||||
|
public long totalFileOps(){
|
||||||
|
return
|
||||||
|
getBlockLocations.value() +
|
||||||
|
createFileOps.value() +
|
||||||
|
filesAppended.value() +
|
||||||
|
addBlockOps.value() +
|
||||||
|
getAdditionalDatanodeOps.value() +
|
||||||
|
filesRenamed.value() +
|
||||||
|
filesTruncated.value() +
|
||||||
|
deleteFileOps.value() +
|
||||||
|
getListingOps.value() +
|
||||||
|
fileInfoOps.value() +
|
||||||
|
getLinkTargetOps.value() +
|
||||||
|
createSnapshotOps.value() +
|
||||||
|
deleteSnapshotOps.value() +
|
||||||
|
allowSnapshotOps.value() +
|
||||||
|
disallowSnapshotOps.value() +
|
||||||
|
renameSnapshotOps.value() +
|
||||||
|
listSnapshottableDirOps.value() +
|
||||||
|
createSymlinkOps.value() +
|
||||||
|
snapshotDiffReportOps.value();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Metric("Journal transactions") MutableRate transactions;
|
@Metric("Journal transactions") MutableRate transactions;
|
||||||
@Metric("Journal syncs") MutableRate syncs;
|
@Metric("Journal syncs") MutableRate syncs;
|
||||||
final MutableQuantiles[] syncsQuantiles;
|
final MutableQuantiles[] syncsQuantiles;
|
||||||
|
|
|
@ -47,6 +47,7 @@ import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
|
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
|
||||||
|
import org.apache.hadoop.util.Time;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.mockito.Mockito;
|
import org.mockito.Mockito;
|
||||||
|
|
||||||
|
@ -246,4 +247,48 @@ public class TestDataNodeMetrics {
|
||||||
DataNodeFaultInjector.instance = new DataNodeFaultInjector();
|
DataNodeFaultInjector.instance = new DataNodeFaultInjector();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This function ensures that writing causes TotalWritetime to increment
|
||||||
|
* and reading causes totalReadTime to move.
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDataNodeTimeSpend() throws Exception {
|
||||||
|
Configuration conf = new HdfsConfiguration();
|
||||||
|
SimulatedFSDataset.setFactory(conf);
|
||||||
|
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
|
||||||
|
try {
|
||||||
|
FileSystem fs = cluster.getFileSystem();
|
||||||
|
List<DataNode> datanodes = cluster.getDataNodes();
|
||||||
|
assertEquals(datanodes.size(), 1);
|
||||||
|
DataNode datanode = datanodes.get(0);
|
||||||
|
MetricsRecordBuilder rb = getMetrics(datanode.getMetrics().name());
|
||||||
|
final long LONG_FILE_LEN = 1024 * 1024 * 10;
|
||||||
|
|
||||||
|
long startWriteValue = getLongCounter("TotalWriteTime", rb);
|
||||||
|
long startReadValue = getLongCounter("TotalReadTime", rb);
|
||||||
|
|
||||||
|
for (int x =0; x < 50; x++) {
|
||||||
|
DFSTestUtil.createFile(fs, new Path("/time.txt."+ x),
|
||||||
|
LONG_FILE_LEN, (short) 1, Time.monotonicNow());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int x =0; x < 50; x++) {
|
||||||
|
String s = DFSTestUtil.readFile(fs, new Path("/time.txt." + x));
|
||||||
|
}
|
||||||
|
|
||||||
|
MetricsRecordBuilder rbNew = getMetrics(datanode.getMetrics().name());
|
||||||
|
long endWriteValue = getLongCounter("TotalWriteTime", rbNew);
|
||||||
|
long endReadValue = getLongCounter("TotalReadTime", rbNew);
|
||||||
|
|
||||||
|
assertTrue(endReadValue > startReadValue);
|
||||||
|
assertTrue(endWriteValue > startWriteValue);
|
||||||
|
} finally {
|
||||||
|
if (cluster != null) {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -455,4 +455,24 @@ public class TestNameNodeMetrics {
|
||||||
assertQuantileGauges("Syncs1s", rb);
|
assertQuantileGauges("Syncs1s", rb);
|
||||||
assertQuantileGauges("BlockReport1s", rb);
|
assertQuantileGauges("BlockReport1s", rb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test NN ReadOps Count and WriteOps Count
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testReadWriteOps() throws Exception {
|
||||||
|
MetricsRecordBuilder rb = getMetrics(NN_METRICS);
|
||||||
|
long startWriteCounter = MetricsAsserts.getLongCounter("TransactionsNumOps",
|
||||||
|
rb);
|
||||||
|
Path file1_Path = new Path(TEST_ROOT_DIR_PATH, "ReadData.dat");
|
||||||
|
|
||||||
|
//Perform create file operation
|
||||||
|
createFile(file1_Path, 1024 * 1024,(short)2);
|
||||||
|
|
||||||
|
// Perform read file operation on earlier created file
|
||||||
|
readFile(fs, file1_Path);
|
||||||
|
MetricsRecordBuilder rbNew = getMetrics(NN_METRICS);
|
||||||
|
assertTrue(MetricsAsserts.getLongCounter("TransactionsNumOps", rbNew) >
|
||||||
|
startWriteCounter);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue