HDFS-12409. Add metrics of execution time of different stages in EC recovery task. (Lei (Eddy) Xu)
(cherry picked from commit 73aed34dff
)
This commit is contained in:
parent
58eacdbb42
commit
d9dd856739
|
@ -22,6 +22,7 @@ import java.nio.ByteBuffer;
|
||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
|
import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
|
||||||
|
import org.apache.hadoop.util.Time;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* StripedBlockReconstructor reconstruct one or more missed striped block in
|
* StripedBlockReconstructor reconstruct one or more missed striped block in
|
||||||
|
@ -83,18 +84,28 @@ class StripedBlockReconstructor extends StripedReconstructor
|
||||||
final int toReconstructLen =
|
final int toReconstructLen =
|
||||||
(int) Math.min(getStripedReader().getBufferSize(), remaining);
|
(int) Math.min(getStripedReader().getBufferSize(), remaining);
|
||||||
|
|
||||||
|
long start = Time.monotonicNow();
|
||||||
// step1: read from minimum source DNs required for reconstruction.
|
// step1: read from minimum source DNs required for reconstruction.
|
||||||
// The returned success list is the source DNs we do real read from
|
// The returned success list is the source DNs we do real read from
|
||||||
getStripedReader().readMinimumSources(toReconstructLen);
|
getStripedReader().readMinimumSources(toReconstructLen);
|
||||||
|
long readEnd = Time.monotonicNow();
|
||||||
|
|
||||||
// step2: decode to reconstruct targets
|
// step2: decode to reconstruct targets
|
||||||
reconstructTargets(toReconstructLen);
|
reconstructTargets(toReconstructLen);
|
||||||
|
long decodeEnd = Time.monotonicNow();
|
||||||
|
|
||||||
// step3: transfer data
|
// step3: transfer data
|
||||||
if (stripedWriter.transferData2Targets() == 0) {
|
if (stripedWriter.transferData2Targets() == 0) {
|
||||||
String error = "Transfer failed for all targets.";
|
String error = "Transfer failed for all targets.";
|
||||||
throw new IOException(error);
|
throw new IOException(error);
|
||||||
}
|
}
|
||||||
|
long writeEnd = Time.monotonicNow();
|
||||||
|
|
||||||
|
// Only the succeed reconstructions are recorded.
|
||||||
|
final DataNodeMetrics metrics = getDatanode().getMetrics();
|
||||||
|
metrics.incrECReconstructionReadTime(readEnd - start);
|
||||||
|
metrics.incrECReconstructionDecodingTime(decodeEnd - readEnd);
|
||||||
|
metrics.incrECReconstructionWriteTime(writeEnd - decodeEnd);
|
||||||
|
|
||||||
updatePositionInBlock(toReconstructLen);
|
updatePositionInBlock(toReconstructLen);
|
||||||
|
|
||||||
|
|
|
@ -151,6 +151,12 @@ public class DataNodeMetrics {
|
||||||
MutableCounterLong ecReconstructionBytesWritten;
|
MutableCounterLong ecReconstructionBytesWritten;
|
||||||
@Metric("Bytes remote read by erasure coding worker")
|
@Metric("Bytes remote read by erasure coding worker")
|
||||||
MutableCounterLong ecReconstructionRemoteBytesRead;
|
MutableCounterLong ecReconstructionRemoteBytesRead;
|
||||||
|
@Metric("Milliseconds spent on read by erasure coding worker")
|
||||||
|
private MutableCounterLong ecReconstructionReadTimeMillis;
|
||||||
|
@Metric("Milliseconds spent on decoding by erasure coding worker")
|
||||||
|
private MutableCounterLong ecReconstructionDecodingTimeMillis;
|
||||||
|
@Metric("Milliseconds spent on write by erasure coding worker")
|
||||||
|
private MutableCounterLong ecReconstructionWriteTimeMillis;
|
||||||
|
|
||||||
final MetricsRegistry registry = new MetricsRegistry("datanode");
|
final MetricsRegistry registry = new MetricsRegistry("datanode");
|
||||||
final String name;
|
final String name;
|
||||||
|
@ -503,4 +509,16 @@ public class DataNodeMetrics {
|
||||||
public void incrECReconstructionBytesWritten(long bytes) {
|
public void incrECReconstructionBytesWritten(long bytes) {
|
||||||
ecReconstructionBytesWritten.incr(bytes);
|
ecReconstructionBytesWritten.incr(bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void incrECReconstructionReadTime(long millis) {
|
||||||
|
ecReconstructionReadTimeMillis.incr(millis);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrECReconstructionWriteTime(long millis) {
|
||||||
|
ecReconstructionWriteTimeMillis.incr(millis);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrECReconstructionDecodingTime(long millis) {
|
||||||
|
ecReconstructionDecodingTimeMillis.incr(millis);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -90,6 +90,10 @@ public class TestDataNodeErasureCodingMetrics {
|
||||||
|
|
||||||
@Test(timeout = 120000)
|
@Test(timeout = 120000)
|
||||||
public void testFullBlock() throws Exception {
|
public void testFullBlock() throws Exception {
|
||||||
|
Assert.assertEquals(0, getLongMetric("EcReconstructionReadTimeMillis"));
|
||||||
|
Assert.assertEquals(0, getLongMetric("EcReconstructionDecodingTimeMillis"));
|
||||||
|
Assert.assertEquals(0, getLongMetric("EcReconstructionWriteTimeMillis"));
|
||||||
|
|
||||||
doTest("/testEcMetrics", blockGroupSize, 0);
|
doTest("/testEcMetrics", blockGroupSize, 0);
|
||||||
|
|
||||||
Assert.assertEquals("EcReconstructionTasks should be ",
|
Assert.assertEquals("EcReconstructionTasks should be ",
|
||||||
|
@ -103,6 +107,9 @@ public class TestDataNodeErasureCodingMetrics {
|
||||||
blockSize, getLongMetric("EcReconstructionBytesWritten"));
|
blockSize, getLongMetric("EcReconstructionBytesWritten"));
|
||||||
Assert.assertEquals("EcReconstructionRemoteBytesRead should be ",
|
Assert.assertEquals("EcReconstructionRemoteBytesRead should be ",
|
||||||
0, getLongMetricWithoutCheck("EcReconstructionRemoteBytesRead"));
|
0, getLongMetricWithoutCheck("EcReconstructionRemoteBytesRead"));
|
||||||
|
Assert.assertTrue(getLongMetric("EcReconstructionReadTimeMillis") > 0);
|
||||||
|
Assert.assertTrue(getLongMetric("EcReconstructionDecodingTimeMillis") > 0);
|
||||||
|
Assert.assertTrue(getLongMetric("EcReconstructionWriteTimeMillis") > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// A partial block, reconstruct the partial block
|
// A partial block, reconstruct the partial block
|
||||||
|
|
Loading…
Reference in New Issue