HDFS-8410. Add computation time metrics to datanode for ECWorker. Contributed by SammiChen.

This commit is contained in:
Andrew Wang 2016-10-21 13:12:35 -07:00
parent ae8bccd509
commit 61e30cf83c
3 changed files with 43 additions and 16 deletions

View File

@ -103,7 +103,10 @@ class StripedBlockReconstructor extends StripedReconstructor
int[] erasedIndices = stripedWriter.getRealTargetIndices();
ByteBuffer[] outputs = stripedWriter.getRealTargetBuffers(toReconstructLen);
long start = System.nanoTime();
getDecoder().decode(inputs, erasedIndices, outputs);
long end = System.nanoTime();
this.getDatanode().getMetrics().incrECDecodingTime(end - start);
stripedWriter.updateRealTargetBuffers(toReconstructLen);
}

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.hdfs.server.datanode.metrics;
import static org.apache.hadoop.metrics2.impl.MsInfo.SessionId;
import static org.apache.hadoop.metrics2.lib.Interns.info;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
@ -134,6 +135,8 @@ public class DataNodeMetrics {
MutableCounterLong ecReconstructionTasks;
@Metric("Count of erasure coding failed reconstruction tasks")
MutableCounterLong ecFailedReconstructionTasks;
// Nanoseconds spent by decoding tasks.
MutableCounterLong ecDecodingTimeNanos;
final MetricsRegistry registry = new MetricsRegistry("datanode");
final String name;
@ -153,7 +156,10 @@ public class DataNodeMetrics {
sendDataPacketTransferNanosQuantiles = new MutableQuantiles[len];
ramDiskBlocksEvictionWindowMsQuantiles = new MutableQuantiles[len];
ramDiskBlocksLazyPersistWindowMsQuantiles = new MutableQuantiles[len];
ecDecodingTimeNanos = registry.newCounter(
info("ecDecodingTimeNanos", "Nanoseconds spent by decoding tasks"),
(long) 0);
for (int i = 0; i < len; i++) {
int interval = intervals[i];
packetAckRoundTripTimeNanosQuantiles[i] = registry.newQuantiles(
@ -442,7 +448,10 @@ public class DataNodeMetrics {
}
public void setDataNodeActiveXceiversCount(int value) {
this.dataNodeActiveXceiversCount.set(value);
dataNodeActiveXceiversCount.set(value);
}
public void incrECDecodingTime(long decodingTimeNanos) {
ecDecodingTimeNanos.incr(decodingTimeNanos);
}
}

View File

@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hdfs.server.datanode;
import com.google.common.base.Supplier;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@ -42,14 +43,15 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import org.apache.hadoop.test.GenericTestUtils;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.util.Arrays;
/**
* This file tests the erasure coding metrics in DataNode.
*/
@ -94,24 +96,37 @@ public class TestDataNodeErasureCodingMetrics {
DataNode workerDn = doTest("/testEcTasks");
MetricsRecordBuilder rb = getMetrics(workerDn.getMetrics().name());
// EcReconstructionTasks metric value will be updated in the finally block
// of striped reconstruction thread. Here, giving a grace period to finish
// EC reconstruction metric updates in DN.
LOG.info("Waiting to finish EC reconstruction metric updates in DN");
int retries = 0;
while (retries < 20) {
long taskMetricValue = getLongCounter("EcReconstructionTasks", rb);
if (taskMetricValue > 0) {
break;
// Ensure that reconstruction task is finished
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
long taskMetricValue = getLongCounter("EcReconstructionTasks", rb);
return (taskMetricValue > 0);
}
Thread.sleep(500);
retries++;
rb = getMetrics(workerDn.getMetrics().name());
}
}, 500, 10000);
assertCounter("EcReconstructionTasks", (long) 1, rb);
assertCounter("EcFailedReconstructionTasks", (long) 0, rb);
}
@Test(timeout = 120000)
public void testEcCodingTime() throws Exception {
DataNode workerDn = doTest("/testEcCodingTime");
MetricsRecordBuilder rb = getMetrics(workerDn.getMetrics().name());
// Ensure that reconstruction task is finished
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
long taskMetricValue = getLongCounter("EcReconstructionTasks", rb);
return (taskMetricValue > 0);
}
}, 500, 10000);
long decodeTime = getLongCounter("ecDecodingTimeNanos", rb);
Assert.assertTrue(decodeTime > 0);
}
private DataNode doTest(String fileName) throws Exception {
Path file = new Path(fileName);