From 73aed34dffa5e79f6f819137b69054c1dee2d4dd Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Wed, 13 Sep 2017 17:10:16 -0700 Subject: [PATCH] HDFS-12409. Add metrics of execution time of different stages in EC recovery task. (Lei (Eddy) Xu) --- .../erasurecode/StripedBlockReconstructor.java | 11 +++++++++++ .../datanode/metrics/DataNodeMetrics.java | 18 ++++++++++++++++++ .../TestDataNodeErasureCodingMetrics.java | 7 +++++++ 3 files changed, 36 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReconstructor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReconstructor.java index bac013aea29..34e58ae47b0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReconstructor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReconstructor.java @@ -22,6 +22,7 @@ import java.nio.ByteBuffer; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics; +import org.apache.hadoop.util.Time; /** * StripedBlockReconstructor reconstruct one or more missed striped block in @@ -83,18 +84,28 @@ class StripedBlockReconstructor extends StripedReconstructor final int toReconstructLen = (int) Math.min(getStripedReader().getBufferSize(), remaining); + long start = Time.monotonicNow(); // step1: read from minimum source DNs required for reconstruction. // The returned success list is the source DNs we do real read from getStripedReader().readMinimumSources(toReconstructLen); + long readEnd = Time.monotonicNow(); // step2: decode to reconstruct targets reconstructTargets(toReconstructLen); + long decodeEnd = Time.monotonicNow(); // step3: transfer data if (stripedWriter.transferData2Targets() == 0) { String error = "Transfer failed for all targets."; throw new IOException(error); } + long writeEnd = Time.monotonicNow(); + + // Only the succeed reconstructions are recorded. + final DataNodeMetrics metrics = getDatanode().getMetrics(); + metrics.incrECReconstructionReadTime(readEnd - start); + metrics.incrECReconstructionDecodingTime(decodeEnd - readEnd); + metrics.incrECReconstructionWriteTime(writeEnd - decodeEnd); updatePositionInBlock(toReconstructLen); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java index a8a691980bb..58a2f65f4e3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java @@ -151,6 +151,12 @@ public class DataNodeMetrics { MutableCounterLong ecReconstructionBytesWritten; @Metric("Bytes remote read by erasure coding worker") MutableCounterLong ecReconstructionRemoteBytesRead; + @Metric("Milliseconds spent on read by erasure coding worker") + private MutableCounterLong ecReconstructionReadTimeMillis; + @Metric("Milliseconds spent on decoding by erasure coding worker") + private MutableCounterLong ecReconstructionDecodingTimeMillis; + @Metric("Milliseconds spent on write by erasure coding worker") + private MutableCounterLong ecReconstructionWriteTimeMillis; final MetricsRegistry registry = new MetricsRegistry("datanode"); final String name; @@ -503,4 +509,16 @@ public class DataNodeMetrics { public void incrECReconstructionBytesWritten(long bytes) { ecReconstructionBytesWritten.incr(bytes); } + + public void incrECReconstructionReadTime(long millis) { + ecReconstructionReadTimeMillis.incr(millis); + } + + public void incrECReconstructionWriteTime(long millis) { + ecReconstructionWriteTimeMillis.incr(millis); + } + + public void incrECReconstructionDecodingTime(long millis) { + ecReconstructionDecodingTimeMillis.incr(millis); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java index ee2afbbd8b5..8ababfacf63 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeErasureCodingMetrics.java @@ -90,6 +90,10 @@ public class TestDataNodeErasureCodingMetrics { @Test(timeout = 120000) public void testFullBlock() throws Exception { + Assert.assertEquals(0, getLongMetric("EcReconstructionReadTimeMillis")); + Assert.assertEquals(0, getLongMetric("EcReconstructionDecodingTimeMillis")); + Assert.assertEquals(0, getLongMetric("EcReconstructionWriteTimeMillis")); + doTest("/testEcMetrics", blockGroupSize, 0); Assert.assertEquals("EcReconstructionTasks should be ", @@ -103,6 +107,9 @@ public class TestDataNodeErasureCodingMetrics { blockSize, getLongMetric("EcReconstructionBytesWritten")); Assert.assertEquals("EcReconstructionRemoteBytesRead should be ", 0, getLongMetricWithoutCheck("EcReconstructionRemoteBytesRead")); + Assert.assertTrue(getLongMetric("EcReconstructionReadTimeMillis") > 0); + Assert.assertTrue(getLongMetric("EcReconstructionDecodingTimeMillis") > 0); + Assert.assertTrue(getLongMetric("EcReconstructionWriteTimeMillis") > 0); } // A partial block, reconstruct the partial block