HDFS-12409. Add metrics of execution time of different stages in EC recovery task. (Lei (Eddy) Xu)

(cherry picked from commit 73aed34dff)
This commit is contained in:
Lei Xu 2017-09-13 17:10:16 -07:00 committed by Andrew Wang
parent 58eacdbb42
commit d9dd856739
3 changed files with 36 additions and 0 deletions

View File

@ -22,6 +22,7 @@ import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics; import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
import org.apache.hadoop.util.Time;
/** /**
* StripedBlockReconstructor reconstruct one or more missed striped block in * StripedBlockReconstructor reconstruct one or more missed striped block in
@ -83,18 +84,28 @@ class StripedBlockReconstructor extends StripedReconstructor
final int toReconstructLen = final int toReconstructLen =
(int) Math.min(getStripedReader().getBufferSize(), remaining); (int) Math.min(getStripedReader().getBufferSize(), remaining);
long start = Time.monotonicNow();
// step1: read from minimum source DNs required for reconstruction. // step1: read from minimum source DNs required for reconstruction.
// The returned success list is the source DNs we do real read from // The returned success list is the source DNs we do real read from
getStripedReader().readMinimumSources(toReconstructLen); getStripedReader().readMinimumSources(toReconstructLen);
long readEnd = Time.monotonicNow();
// step2: decode to reconstruct targets // step2: decode to reconstruct targets
reconstructTargets(toReconstructLen); reconstructTargets(toReconstructLen);
long decodeEnd = Time.monotonicNow();
// step3: transfer data // step3: transfer data
if (stripedWriter.transferData2Targets() == 0) { if (stripedWriter.transferData2Targets() == 0) {
String error = "Transfer failed for all targets."; String error = "Transfer failed for all targets.";
throw new IOException(error); throw new IOException(error);
} }
long writeEnd = Time.monotonicNow();
// Only the succeed reconstructions are recorded.
final DataNodeMetrics metrics = getDatanode().getMetrics();
metrics.incrECReconstructionReadTime(readEnd - start);
metrics.incrECReconstructionDecodingTime(decodeEnd - readEnd);
metrics.incrECReconstructionWriteTime(writeEnd - decodeEnd);
updatePositionInBlock(toReconstructLen); updatePositionInBlock(toReconstructLen);

View File

@ -151,6 +151,12 @@ public class DataNodeMetrics {
MutableCounterLong ecReconstructionBytesWritten; MutableCounterLong ecReconstructionBytesWritten;
@Metric("Bytes remote read by erasure coding worker") @Metric("Bytes remote read by erasure coding worker")
MutableCounterLong ecReconstructionRemoteBytesRead; MutableCounterLong ecReconstructionRemoteBytesRead;
@Metric("Milliseconds spent on read by erasure coding worker")
private MutableCounterLong ecReconstructionReadTimeMillis;
@Metric("Milliseconds spent on decoding by erasure coding worker")
private MutableCounterLong ecReconstructionDecodingTimeMillis;
@Metric("Milliseconds spent on write by erasure coding worker")
private MutableCounterLong ecReconstructionWriteTimeMillis;
final MetricsRegistry registry = new MetricsRegistry("datanode"); final MetricsRegistry registry = new MetricsRegistry("datanode");
final String name; final String name;
@ -503,4 +509,16 @@ public class DataNodeMetrics {
public void incrECReconstructionBytesWritten(long bytes) { public void incrECReconstructionBytesWritten(long bytes) {
ecReconstructionBytesWritten.incr(bytes); ecReconstructionBytesWritten.incr(bytes);
} }
public void incrECReconstructionReadTime(long millis) {
ecReconstructionReadTimeMillis.incr(millis);
}
public void incrECReconstructionWriteTime(long millis) {
ecReconstructionWriteTimeMillis.incr(millis);
}
public void incrECReconstructionDecodingTime(long millis) {
ecReconstructionDecodingTimeMillis.incr(millis);
}
} }

View File

@ -90,6 +90,10 @@ public class TestDataNodeErasureCodingMetrics {
@Test(timeout = 120000) @Test(timeout = 120000)
public void testFullBlock() throws Exception { public void testFullBlock() throws Exception {
Assert.assertEquals(0, getLongMetric("EcReconstructionReadTimeMillis"));
Assert.assertEquals(0, getLongMetric("EcReconstructionDecodingTimeMillis"));
Assert.assertEquals(0, getLongMetric("EcReconstructionWriteTimeMillis"));
doTest("/testEcMetrics", blockGroupSize, 0); doTest("/testEcMetrics", blockGroupSize, 0);
Assert.assertEquals("EcReconstructionTasks should be ", Assert.assertEquals("EcReconstructionTasks should be ",
@ -103,6 +107,9 @@ public class TestDataNodeErasureCodingMetrics {
blockSize, getLongMetric("EcReconstructionBytesWritten")); blockSize, getLongMetric("EcReconstructionBytesWritten"));
Assert.assertEquals("EcReconstructionRemoteBytesRead should be ", Assert.assertEquals("EcReconstructionRemoteBytesRead should be ",
0, getLongMetricWithoutCheck("EcReconstructionRemoteBytesRead")); 0, getLongMetricWithoutCheck("EcReconstructionRemoteBytesRead"));
Assert.assertTrue(getLongMetric("EcReconstructionReadTimeMillis") > 0);
Assert.assertTrue(getLongMetric("EcReconstructionDecodingTimeMillis") > 0);
Assert.assertTrue(getLongMetric("EcReconstructionWriteTimeMillis") > 0);
} }
// A partial block, reconstruct the partial block // A partial block, reconstruct the partial block