HDFS-12409. Add metrics of execution time of different stages in EC recovery task. (Lei (Eddy) Xu)

This commit is contained in:
Lei Xu 2017-09-13 17:10:16 -07:00
parent c3f35c422b
commit 73aed34dff
3 changed files with 36 additions and 0 deletions

View File

@ -22,6 +22,7 @@ import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
import org.apache.hadoop.util.Time;
/**
* StripedBlockReconstructor reconstruct one or more missed striped block in
@ -83,18 +84,28 @@ class StripedBlockReconstructor extends StripedReconstructor
final int toReconstructLen =
(int) Math.min(getStripedReader().getBufferSize(), remaining);
long start = Time.monotonicNow();
// step1: read from minimum source DNs required for reconstruction.
// The returned success list is the source DNs we do real read from
getStripedReader().readMinimumSources(toReconstructLen);
long readEnd = Time.monotonicNow();
// step2: decode to reconstruct targets
reconstructTargets(toReconstructLen);
long decodeEnd = Time.monotonicNow();
// step3: transfer data
if (stripedWriter.transferData2Targets() == 0) {
String error = "Transfer failed for all targets.";
throw new IOException(error);
}
long writeEnd = Time.monotonicNow();
// Only the succeed reconstructions are recorded.
final DataNodeMetrics metrics = getDatanode().getMetrics();
metrics.incrECReconstructionReadTime(readEnd - start);
metrics.incrECReconstructionDecodingTime(decodeEnd - readEnd);
metrics.incrECReconstructionWriteTime(writeEnd - decodeEnd);
updatePositionInBlock(toReconstructLen);

View File

@ -151,6 +151,12 @@ public class DataNodeMetrics {
MutableCounterLong ecReconstructionBytesWritten;
@Metric("Bytes remote read by erasure coding worker")
MutableCounterLong ecReconstructionRemoteBytesRead;
@Metric("Milliseconds spent on read by erasure coding worker")
private MutableCounterLong ecReconstructionReadTimeMillis;
@Metric("Milliseconds spent on decoding by erasure coding worker")
private MutableCounterLong ecReconstructionDecodingTimeMillis;
@Metric("Milliseconds spent on write by erasure coding worker")
private MutableCounterLong ecReconstructionWriteTimeMillis;
final MetricsRegistry registry = new MetricsRegistry("datanode");
final String name;
@ -503,4 +509,16 @@ public class DataNodeMetrics {
public void incrECReconstructionBytesWritten(long bytes) {
ecReconstructionBytesWritten.incr(bytes);
}
public void incrECReconstructionReadTime(long millis) {
ecReconstructionReadTimeMillis.incr(millis);
}
public void incrECReconstructionWriteTime(long millis) {
ecReconstructionWriteTimeMillis.incr(millis);
}
public void incrECReconstructionDecodingTime(long millis) {
ecReconstructionDecodingTimeMillis.incr(millis);
}
}

View File

@ -90,6 +90,10 @@ public class TestDataNodeErasureCodingMetrics {
@Test(timeout = 120000)
public void testFullBlock() throws Exception {
Assert.assertEquals(0, getLongMetric("EcReconstructionReadTimeMillis"));
Assert.assertEquals(0, getLongMetric("EcReconstructionDecodingTimeMillis"));
Assert.assertEquals(0, getLongMetric("EcReconstructionWriteTimeMillis"));
doTest("/testEcMetrics", blockGroupSize, 0);
Assert.assertEquals("EcReconstructionTasks should be ",
@ -103,6 +107,9 @@ public class TestDataNodeErasureCodingMetrics {
blockSize, getLongMetric("EcReconstructionBytesWritten"));
Assert.assertEquals("EcReconstructionRemoteBytesRead should be ",
0, getLongMetricWithoutCheck("EcReconstructionRemoteBytesRead"));
Assert.assertTrue(getLongMetric("EcReconstructionReadTimeMillis") > 0);
Assert.assertTrue(getLongMetric("EcReconstructionDecodingTimeMillis") > 0);
Assert.assertTrue(getLongMetric("EcReconstructionWriteTimeMillis") > 0);
}
// A partial block, reconstruct the partial block