HDFS-15754. Add DataNode packet metrics (#2578)

Contributed by Fengnan Li.
This commit is contained in:
lfengnan 2021-01-07 23:46:23 -08:00 committed by GitHub
parent 1b1791075a
commit 87bd4d2aca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 114 additions and 18 deletions

View File

@ -469,6 +469,10 @@ Each metrics record contains tags such as SessionId and Hostname as additional i
| `CheckAndUpdateOpAvgTime` | Average time of check and update operations in milliseconds | | `CheckAndUpdateOpAvgTime` | Average time of check and update operations in milliseconds |
| `UpdateReplicaUnderRecoveryOpNumOps` | Total number of update replica under recovery operations | | `UpdateReplicaUnderRecoveryOpNumOps` | Total number of update replica under recovery operations |
| `UpdateReplicaUnderRecoveryOpAvgTime` | Average time of update replica under recovery operations in milliseconds | | `UpdateReplicaUnderRecoveryOpAvgTime` | Average time of update replica under recovery operations in milliseconds |
| `PacketsReceived` | Total number of packets received by Datanode (excluding heartbeat packet from client) |
| `PacketsSlowWriteToMirror` | Total number of packets whose write to other Datanodes in the pipeline takes more than a certain time (300ms by default) |
| `PacketsSlowWriteToDisk` | Total number of packets whose write to disk takes more than a certain time (300ms by default) |
| `PacketsSlowWriteToOsCache` | Total number of packets whose write to os cache takes more than a certain time (300ms by default) |
FsVolume FsVolume
-------- --------

View File

@ -586,6 +586,7 @@ class BlockReceiver implements Closeable {
return 0; return 0;
} }
datanode.metrics.incrPacketsReceived();
//First write the packet to the mirror: //First write the packet to the mirror:
if (mirrorOut != null && !mirrorError) { if (mirrorOut != null && !mirrorError) {
try { try {
@ -601,12 +602,15 @@ class BlockReceiver implements Closeable {
mirrorAddr, mirrorAddr,
duration); duration);
trackSendPacketToLastNodeInPipeline(duration); trackSendPacketToLastNodeInPipeline(duration);
if (duration > datanodeSlowLogThresholdMs && LOG.isWarnEnabled()) { if (duration > datanodeSlowLogThresholdMs) {
LOG.warn("Slow BlockReceiver write packet to mirror took " + duration datanode.metrics.incrPacketsSlowWriteToMirror();
+ "ms (threshold=" + datanodeSlowLogThresholdMs + "ms), " if (LOG.isWarnEnabled()) {
+ "downstream DNs=" + Arrays.toString(downstreamDNs) LOG.warn("Slow BlockReceiver write packet to mirror took {}ms " +
+ ", blockId=" + replicaInfo.getBlockId() "(threshold={}ms), downstream DNs={}, blockId={}, seqno={}",
+ ", seqno=" + seqno); duration, datanodeSlowLogThresholdMs,
Arrays.toString(downstreamDNs), replicaInfo.getBlockId(),
seqno);
}
} }
} catch (IOException e) { } catch (IOException e) {
handleMirrorOutError(e); handleMirrorOutError(e);
@ -736,13 +740,17 @@ class BlockReceiver implements Closeable {
long begin = Time.monotonicNow(); long begin = Time.monotonicNow();
streams.writeDataToDisk(dataBuf.array(), streams.writeDataToDisk(dataBuf.array(),
startByteToDisk, numBytesToDisk); startByteToDisk, numBytesToDisk);
// no-op in prod
DataNodeFaultInjector.get().delayWriteToDisk();
long duration = Time.monotonicNow() - begin; long duration = Time.monotonicNow() - begin;
if (duration > datanodeSlowLogThresholdMs && LOG.isWarnEnabled()) { if (duration > datanodeSlowLogThresholdMs) {
LOG.warn("Slow BlockReceiver write data to disk cost:" + duration datanode.metrics.incrPacketsSlowWriteToDisk();
+ "ms (threshold=" + datanodeSlowLogThresholdMs + "ms), " if (LOG.isWarnEnabled()) {
+ "volume=" + getVolumeBaseUri() LOG.warn("Slow BlockReceiver write data to disk cost: {}ms " +
+ ", blockId=" + replicaInfo.getBlockId() "(threshold={}ms), volume={}, blockId={}, seqno={}",
+ ", seqno=" + seqno); duration, datanodeSlowLogThresholdMs, getVolumeBaseUri(),
replicaInfo.getBlockId(), seqno);
}
} }
if (duration > maxWriteToDiskMs) { if (duration > maxWriteToDiskMs) {
@ -930,13 +938,17 @@ class BlockReceiver implements Closeable {
POSIX_FADV_DONTNEED); POSIX_FADV_DONTNEED);
} }
lastCacheManagementOffset = offsetInBlock; lastCacheManagementOffset = offsetInBlock;
// For testing. Normally no-op.
DataNodeFaultInjector.get().delayWriteToOsCache();
long duration = Time.monotonicNow() - begin; long duration = Time.monotonicNow() - begin;
if (duration > datanodeSlowLogThresholdMs && LOG.isWarnEnabled()) { if (duration > datanodeSlowLogThresholdMs) {
LOG.warn("Slow manageWriterOsCache took " + duration datanode.metrics.incrPacketsSlowWriteToOsCache();
+ "ms (threshold=" + datanodeSlowLogThresholdMs if (LOG.isWarnEnabled()) {
+ "ms), volume=" + getVolumeBaseUri() LOG.warn("Slow manageWriterOsCache took {}ms " +
+ ", blockId=" + replicaInfo.getBlockId() "(threshold={}ms), volume={}, blockId={}, seqno={}",
+ ", seqno=" + seqno); duration, datanodeSlowLogThresholdMs, getVolumeBaseUri(),
replicaInfo.getBlockId(), seqno);
}
} }
} }
} catch (Throwable t) { } catch (Throwable t) {

View File

@ -67,6 +67,16 @@ public class DataNodeFaultInjector {
throws IOException { throws IOException {
} }
/**
* Used as a hook to delay writing a packet to disk.
*/
public void delayWriteToDisk() {}
/**
* Used as a hook to delay writing a packet to os cache.
*/
public void delayWriteToOsCache() {}
/** /**
* Used as a hook to intercept the latency of sending ack. * Used as a hook to intercept the latency of sending ack.
*/ */

View File

@ -183,6 +183,11 @@ public class DataNodeMetrics {
@Metric private MutableRate checkAndUpdateOp; @Metric private MutableRate checkAndUpdateOp;
@Metric private MutableRate updateReplicaUnderRecoveryOp; @Metric private MutableRate updateReplicaUnderRecoveryOp;
@Metric MutableCounterLong packetsReceived;
@Metric MutableCounterLong packetsSlowWriteToMirror;
@Metric MutableCounterLong packetsSlowWriteToDisk;
@Metric MutableCounterLong packetsSlowWriteToOsCache;
final MetricsRegistry registry = new MetricsRegistry("datanode"); final MetricsRegistry registry = new MetricsRegistry("datanode");
@Metric("Milliseconds spent on calling NN rpc") @Metric("Milliseconds spent on calling NN rpc")
private MutableRatesWithAggregation private MutableRatesWithAggregation
@ -690,4 +695,20 @@ public class DataNodeMetrics {
public void addUpdateReplicaUnderRecoveryOp(long latency) { public void addUpdateReplicaUnderRecoveryOp(long latency) {
updateReplicaUnderRecoveryOp.add(latency); updateReplicaUnderRecoveryOp.add(latency);
} }
public void incrPacketsReceived() {
packetsReceived.incr();
}
public void incrPacketsSlowWriteToMirror() {
packetsSlowWriteToMirror.incr();
}
public void incrPacketsSlowWriteToDisk() {
packetsSlowWriteToDisk.incr();
}
public void incrPacketsSlowWriteToOsCache() {
packetsSlowWriteToOsCache.incr();
}
} }

View File

@ -64,6 +64,8 @@ import org.apache.hadoop.test.MetricsAsserts;
import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Time;
import org.junit.Test; import org.junit.Test;
import org.mockito.Mockito; import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
import javax.management.MBeanServer; import javax.management.MBeanServer;
import javax.management.ObjectName; import javax.management.ObjectName;
@ -161,6 +163,53 @@ public class TestDataNodeMetrics {
} }
} }
@Test
public void testReceivePacketSlowMetrics() throws Exception {
Configuration conf = new HdfsConfiguration();
final int interval = 1;
conf.setInt(DFSConfigKeys.DFS_METRICS_PERCENTILES_INTERVALS_KEY, interval);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(3).build();
try {
cluster.waitActive();
DistributedFileSystem fs = cluster.getFileSystem();
final DataNodeFaultInjector injector =
Mockito.mock(DataNodeFaultInjector.class);
Answer answer = new Answer() {
@Override
public Object answer(InvocationOnMock invocationOnMock)
throws Throwable {
// make the op taking longer time
Thread.sleep(1000);
return null;
}
};
Mockito.doAnswer(answer).when(injector).
stopSendingPacketDownstream(Mockito.anyString());
Mockito.doAnswer(answer).when(injector).delayWriteToOsCache();
Mockito.doAnswer(answer).when(injector).delayWriteToDisk();
DataNodeFaultInjector.set(injector);
Path testFile = new Path("/testFlushNanosMetric.txt");
FSDataOutputStream fout = fs.create(testFile);
fout.write(new byte[1]);
fout.hsync();
fout.close();
List<DataNode> datanodes = cluster.getDataNodes();
DataNode datanode = datanodes.get(0);
MetricsRecordBuilder dnMetrics = getMetrics(datanode.getMetrics().name());
assertTrue("More than 1 packet received",
getLongCounter("TotalPacketsReceived", dnMetrics) > 1L);
assertTrue("More than 1 slow packet to mirror",
getLongCounter("TotalPacketsSlowWriteToMirror", dnMetrics) > 1L);
assertCounter("TotalPacketsSlowWriteToDisk", 1L, dnMetrics);
assertCounter("TotalPacketsSlowWriteToOsCache", 0L, dnMetrics);
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
/** /**
* HDFS-15242: This function ensures that writing causes some metrics * HDFS-15242: This function ensures that writing causes some metrics
* of FSDatasetImpl to increment. * of FSDatasetImpl to increment.