diff --git a/hadoop-common-project/hadoop-common/src/site/apt/Metrics.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/Metrics.apt.vm index 915467e5ca1..ecba7572114 100644 --- a/hadoop-common-project/hadoop-common/src/site/apt/Metrics.apt.vm +++ b/hadoop-common-project/hadoop-common/src/site/apt/Metrics.apt.vm @@ -336,6 +336,8 @@ dfs context *-------------------------------------+--------------------------------------+ |<<>> | Average fsimage upload time in milliseconds *-------------------------------------+--------------------------------------+ +|<<>> | Total number of file operations performed +*-------------------------------------+--------------------------------------+ * FSNamesystem @@ -604,6 +606,17 @@ dfs context |<<>> | Average transfer time of sending | packets in nanoseconds *-------------------------------------+--------------------------------------+ +|<<>> | Total number of milliseconds spent on write + | operation +*-------------------------------------+--------------------------------------+ +|<<>> | Total number of milliseconds spent on read + | operation +*-------------------------------------+--------------------------------------+ +|<<>> | Number of bytes read by remote clients +*-------------------------------------+--------------------------------------+ +|<<>> | Number of bytes written by remote clients +*-------------------------------------+--------------------------------------+ + yarn context diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 2d6db54b2b7..8e36495fb9d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -364,6 +364,9 @@ Release 2.7.0 - UNRELEASED HDFS-7772. Document hdfs balancer -exclude/-include option in HDFSCommands.html (Xiaoyu Yao via cnauroth) + HDFS-7773. Additional metrics in HDFS to be accessed via jmx. + (Anu Engineer via cnauroth) + OPTIMIZATIONS HDFS-7454. Reduce memory footprint for AclEntries in NameNode. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java index 368d80daff2..1db2c7885e1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java @@ -658,6 +658,7 @@ class BlockReceiver implements Closeable { replicaInfo.setLastChecksumAndDataLen(offsetInBlock, lastCrc); datanode.metrics.incrBytesWritten(len); + datanode.metrics.incrTotalWriteTime(duration); manageWriterOsCache(offsetInBlock); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java index bb5323af3c3..704993acdcf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java @@ -86,6 +86,7 @@ import org.apache.hadoop.util.DataChecksum; import com.google.common.base.Preconditions; import com.google.protobuf.ByteString; +import org.apache.hadoop.util.Time; /** @@ -480,7 +481,7 @@ class DataXceiver extends Receiver implements Runnable { final boolean sendChecksum, final CachingStrategy cachingStrategy) throws IOException { previousOpClientName = clientName; - + long read = 0; OutputStream baseStream = getOutputStream(); DataOutputStream out = new DataOutputStream(new BufferedOutputStream( baseStream, HdfsConstants.SMALL_BUFFER_SIZE)); @@ -515,8 +516,9 @@ class DataXceiver extends Receiver implements Runnable { // send op status writeSuccessWithChecksumInfo(blockSender, new DataOutputStream(getOutputStream())); - long read = blockSender.sendBlock(out, baseStream, null); // send data - + long beginRead = Time.monotonicNow(); + read = blockSender.sendBlock(out, baseStream, null); // send data + long duration = Time.monotonicNow() - beginRead; if (blockSender.didSendEntireByteRange()) { // If we sent the entire range, then we should expect the client // to respond with a Status enum. @@ -539,6 +541,7 @@ class DataXceiver extends Receiver implements Runnable { } datanode.metrics.incrBytesRead((int) read); datanode.metrics.incrBlocksRead(); + datanode.metrics.incrTotalReadTime(duration); } catch ( SocketException ignored ) { if (LOG.isTraceEnabled()) { LOG.trace(dnR + ":Ignoring exception while serving " + block + " to " + @@ -563,7 +566,7 @@ class DataXceiver extends Receiver implements Runnable { //update metrics datanode.metrics.addReadBlockOp(elapsed()); - datanode.metrics.incrReadsFromClient(peer.isLocal()); + datanode.metrics.incrReadsFromClient(peer.isLocal(), read); } @Override @@ -590,7 +593,7 @@ class DataXceiver extends Receiver implements Runnable { final boolean isClient = !isDatanode; final boolean isTransfer = stage == BlockConstructionStage.TRANSFER_RBW || stage == BlockConstructionStage.TRANSFER_FINALIZED; - + long size = 0; // check single target for transfer-RBW/Finalized if (isTransfer && targets.length > 0) { throw new IOException(stage + " does not support multiple targets " @@ -796,7 +799,9 @@ class DataXceiver extends Receiver implements Runnable { + localAddress + " of size " + block.getNumBytes()); } - + if(isClient) { + size = block.getNumBytes(); + } } catch (IOException ioe) { LOG.info("opWriteBlock " + block + " received exception " + ioe); incrDatanodeNetworkErrors(); @@ -813,7 +818,7 @@ class DataXceiver extends Receiver implements Runnable { //update metrics datanode.metrics.addWriteBlockOp(elapsed()); - datanode.metrics.incrWritesFromClient(peer.isLocal()); + datanode.metrics.incrWritesFromClient(peer.isLocal(), size); } @Override @@ -993,12 +998,15 @@ class DataXceiver extends Receiver implements Runnable { // send status first writeSuccessWithChecksumInfo(blockSender, reply); - // send block content to the target - long read = blockSender.sendBlock(reply, baseStream, - dataXceiverServer.balanceThrottler); + long beginRead = Time.monotonicNow(); + // send block content to the target + long read = blockSender.sendBlock(reply, baseStream, + dataXceiverServer.balanceThrottler); + long duration = Time.monotonicNow() - beginRead; datanode.metrics.incrBytesRead((int) read); datanode.metrics.incrBlocksRead(); + datanode.metrics.incrTotalReadTime(duration); LOG.info("Copied " + block + " to " + peer.getRemoteAddressString()); } catch (IOException ioe) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java index 09ad3da642b..0fbc2ee10eb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java @@ -50,7 +50,11 @@ import org.apache.hadoop.metrics2.source.JvmMetrics; public class DataNodeMetrics { @Metric MutableCounterLong bytesWritten; + @Metric("Milliseconds spent writing") + MutableCounterLong totalWriteTime; @Metric MutableCounterLong bytesRead; + @Metric("Milliseconds spent reading") + MutableCounterLong totalReadTime; @Metric MutableCounterLong blocksWritten; @Metric MutableCounterLong blocksRead; @Metric MutableCounterLong blocksReplicated; @@ -64,6 +68,10 @@ public class DataNodeMetrics { @Metric MutableCounterLong writesFromLocalClient; @Metric MutableCounterLong writesFromRemoteClient; @Metric MutableCounterLong blocksGetLocalPathInfo; + @Metric("Bytes read by remote client") + MutableCounterLong remoteBytesRead; + @Metric("Bytes written by remote client") + MutableCounterLong remoteBytesWritten; // RamDisk metrics on read/write @Metric MutableCounterLong ramDiskBlocksWrite; @@ -262,6 +270,15 @@ public class DataNodeMetrics { fsyncCount.incr(); } + public void incrTotalWriteTime(long timeTaken) { + totalWriteTime.incr(timeTaken); + } + + public void incrTotalReadTime(long timeTaken) { + totalReadTime.incr(timeTaken); + } + + public void addPacketAckRoundTripTimeNanos(long latencyNanos) { packetAckRoundTripTimeNanos.add(latencyNanos); for (MutableQuantiles q : packetAckRoundTripTimeNanosQuantiles) { @@ -287,12 +304,23 @@ public class DataNodeMetrics { DefaultMetricsSystem.shutdown(); } - public void incrWritesFromClient(boolean local) { - (local ? writesFromLocalClient : writesFromRemoteClient).incr(); + public void incrWritesFromClient(boolean local, long size) { + if(local) { + writesFromLocalClient.incr(); + } else { + writesFromRemoteClient.incr(); + remoteBytesWritten.incr(size); + } } - public void incrReadsFromClient(boolean local) { - (local ? readsFromLocalClient : readsFromRemoteClient).incr(); + public void incrReadsFromClient(boolean local, long size) { + + if (local) { + readsFromLocalClient.incr(); + } else { + readsFromRemoteClient.incr(); + remoteBytesRead.incr(size); + } } public void incrVolumeFailures() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java index 94e845ba3c6..31bc16479b3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java @@ -77,6 +77,31 @@ public class NameNodeMetrics { @Metric("Number of blockReports from individual storages") MutableCounterLong storageBlockReportOps; + @Metric("Number of file system operations") + public long totalFileOps(){ + return + getBlockLocations.value() + + createFileOps.value() + + filesAppended.value() + + addBlockOps.value() + + getAdditionalDatanodeOps.value() + + filesRenamed.value() + + filesTruncated.value() + + deleteFileOps.value() + + getListingOps.value() + + fileInfoOps.value() + + getLinkTargetOps.value() + + createSnapshotOps.value() + + deleteSnapshotOps.value() + + allowSnapshotOps.value() + + disallowSnapshotOps.value() + + renameSnapshotOps.value() + + listSnapshottableDirOps.value() + + createSymlinkOps.value() + + snapshotDiffReportOps.value(); + } + + @Metric("Journal transactions") MutableRate transactions; @Metric("Journal syncs") MutableRate syncs; final MutableQuantiles[] syncsQuantiles; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java index 0b85d35b0d5..8a2bacf90f1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java @@ -47,6 +47,7 @@ import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.metrics2.MetricsRecordBuilder; +import org.apache.hadoop.util.Time; import org.junit.Test; import org.mockito.Mockito; @@ -246,4 +247,48 @@ public class TestDataNodeMetrics { DataNodeFaultInjector.instance = new DataNodeFaultInjector(); } } + + /** + * This function ensures that writing causes TotalWritetime to increment + * and reading causes totalReadTime to move. + * @throws Exception + */ + @Test + public void testDataNodeTimeSpend() throws Exception { + Configuration conf = new HdfsConfiguration(); + SimulatedFSDataset.setFactory(conf); + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); + try { + FileSystem fs = cluster.getFileSystem(); + List datanodes = cluster.getDataNodes(); + assertEquals(datanodes.size(), 1); + DataNode datanode = datanodes.get(0); + MetricsRecordBuilder rb = getMetrics(datanode.getMetrics().name()); + final long LONG_FILE_LEN = 1024 * 1024 * 10; + + long startWriteValue = getLongCounter("TotalWriteTime", rb); + long startReadValue = getLongCounter("TotalReadTime", rb); + + for (int x =0; x < 50; x++) { + DFSTestUtil.createFile(fs, new Path("/time.txt."+ x), + LONG_FILE_LEN, (short) 1, Time.monotonicNow()); + } + + for (int x =0; x < 50; x++) { + String s = DFSTestUtil.readFile(fs, new Path("/time.txt." + x)); + } + + MetricsRecordBuilder rbNew = getMetrics(datanode.getMetrics().name()); + long endWriteValue = getLongCounter("TotalWriteTime", rbNew); + long endReadValue = getLongCounter("TotalReadTime", rbNew); + + assertTrue(endReadValue > startReadValue); + assertTrue(endWriteValue > startWriteValue); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java index a4c4670508a..8bd8a671c1e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java @@ -456,4 +456,24 @@ public class TestNameNodeMetrics { assertQuantileGauges("Syncs1s", rb); assertQuantileGauges("BlockReport1s", rb); } + + /** + * Test NN ReadOps Count and WriteOps Count + */ + @Test + public void testReadWriteOps() throws Exception { + MetricsRecordBuilder rb = getMetrics(NN_METRICS); + long startWriteCounter = MetricsAsserts.getLongCounter("TransactionsNumOps", + rb); + Path file1_Path = new Path(TEST_ROOT_DIR_PATH, "ReadData.dat"); + + //Perform create file operation + createFile(file1_Path, 1024 * 1024,(short)2); + + // Perform read file operation on earlier created file + readFile(fs, file1_Path); + MetricsRecordBuilder rbNew = getMetrics(NN_METRICS); + assertTrue(MetricsAsserts.getLongCounter("TransactionsNumOps", rbNew) > + startWriteCounter); + } }