HDFS-11603. Improve slow mirror/disk warnings in BlockReceiver.
This commit is contained in:
parent
318bfb01bc
commit
5485d93bda
|
@ -55,6 +55,7 @@ public class DatanodeInfo extends DatanodeID implements Node {
|
||||||
private String softwareVersion;
|
private String softwareVersion;
|
||||||
private List<String> dependentHostNames = new LinkedList<>();
|
private List<String> dependentHostNames = new LinkedList<>();
|
||||||
private String upgradeDomain;
|
private String upgradeDomain;
|
||||||
|
public static final DatanodeInfo[] EMPTY_ARRAY = {};
|
||||||
|
|
||||||
// Datanode administrative states
|
// Datanode administrative states
|
||||||
public enum AdminStates {
|
public enum AdminStates {
|
||||||
|
|
|
@ -100,6 +100,7 @@ class BlockReceiver implements Closeable {
|
||||||
private DataTransferThrottler throttler;
|
private DataTransferThrottler throttler;
|
||||||
private ReplicaOutputStreams streams;
|
private ReplicaOutputStreams streams;
|
||||||
private DatanodeInfo srcDataNode = null;
|
private DatanodeInfo srcDataNode = null;
|
||||||
|
private DatanodeInfo[] downstreamDNs = DatanodeInfo.EMPTY_ARRAY;
|
||||||
private final DataNode datanode;
|
private final DataNode datanode;
|
||||||
volatile private boolean mirrorError;
|
volatile private boolean mirrorError;
|
||||||
|
|
||||||
|
@ -424,10 +425,10 @@ class BlockReceiver implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
long duration = Time.monotonicNow() - begin;
|
long duration = Time.monotonicNow() - begin;
|
||||||
if (duration > datanodeSlowLogThresholdMs) {
|
if (duration > datanodeSlowLogThresholdMs && LOG.isWarnEnabled()) {
|
||||||
LOG.warn("Slow flushOrSync took " + duration + "ms (threshold="
|
LOG.warn("Slow flushOrSync took " + duration + "ms (threshold="
|
||||||
+ datanodeSlowLogThresholdMs + "ms), isSync:" + isSync + ", flushTotalNanos="
|
+ datanodeSlowLogThresholdMs + "ms), isSync:" + isSync + ", flushTotalNanos="
|
||||||
+ flushTotalNanos + "ns");
|
+ flushTotalNanos + "ns, volume=" + getVolumeBaseUri());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -578,9 +579,10 @@ class BlockReceiver implements Closeable {
|
||||||
mirrorAddr,
|
mirrorAddr,
|
||||||
duration);
|
duration);
|
||||||
trackSendPacketToLastNodeInPipeline(duration);
|
trackSendPacketToLastNodeInPipeline(duration);
|
||||||
if (duration > datanodeSlowLogThresholdMs) {
|
if (duration > datanodeSlowLogThresholdMs && LOG.isWarnEnabled()) {
|
||||||
LOG.warn("Slow BlockReceiver write packet to mirror took " + duration
|
LOG.warn("Slow BlockReceiver write packet to mirror took " + duration
|
||||||
+ "ms (threshold=" + datanodeSlowLogThresholdMs + "ms)");
|
+ "ms (threshold=" + datanodeSlowLogThresholdMs + "ms), "
|
||||||
|
+ "downstream DNs=" + Arrays.toString(downstreamDNs));
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
handleMirrorOutError(e);
|
handleMirrorOutError(e);
|
||||||
|
@ -711,9 +713,10 @@ class BlockReceiver implements Closeable {
|
||||||
streams.writeDataToDisk(dataBuf.array(),
|
streams.writeDataToDisk(dataBuf.array(),
|
||||||
startByteToDisk, numBytesToDisk);
|
startByteToDisk, numBytesToDisk);
|
||||||
long duration = Time.monotonicNow() - begin;
|
long duration = Time.monotonicNow() - begin;
|
||||||
if (duration > datanodeSlowLogThresholdMs) {
|
if (duration > datanodeSlowLogThresholdMs && LOG.isWarnEnabled()) {
|
||||||
LOG.warn("Slow BlockReceiver write data to disk cost:" + duration
|
LOG.warn("Slow BlockReceiver write data to disk cost:" + duration
|
||||||
+ "ms (threshold=" + datanodeSlowLogThresholdMs + "ms)");
|
+ "ms (threshold=" + datanodeSlowLogThresholdMs + "ms), "
|
||||||
|
+ "volume=" + getVolumeBaseUri());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (duration > maxWriteToDiskMs) {
|
if (duration > maxWriteToDiskMs) {
|
||||||
|
@ -902,9 +905,10 @@ class BlockReceiver implements Closeable {
|
||||||
}
|
}
|
||||||
lastCacheManagementOffset = offsetInBlock;
|
lastCacheManagementOffset = offsetInBlock;
|
||||||
long duration = Time.monotonicNow() - begin;
|
long duration = Time.monotonicNow() - begin;
|
||||||
if (duration > datanodeSlowLogThresholdMs) {
|
if (duration > datanodeSlowLogThresholdMs && LOG.isWarnEnabled()) {
|
||||||
LOG.warn("Slow manageWriterOsCache took " + duration
|
LOG.warn("Slow manageWriterOsCache took " + duration
|
||||||
+ "ms (threshold=" + datanodeSlowLogThresholdMs + "ms)");
|
+ "ms (threshold=" + datanodeSlowLogThresholdMs
|
||||||
|
+ "ms), volume=" + getVolumeBaseUri());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
|
@ -932,13 +936,7 @@ class BlockReceiver implements Closeable {
|
||||||
boolean responderClosed = false;
|
boolean responderClosed = false;
|
||||||
mirrorOut = mirrOut;
|
mirrorOut = mirrOut;
|
||||||
mirrorAddr = mirrAddr;
|
mirrorAddr = mirrAddr;
|
||||||
isPenultimateNode = ((downstreams != null) && (downstreams.length == 1));
|
initPerfMonitoring(downstreams);
|
||||||
if (isPenultimateNode) {
|
|
||||||
mirrorNameForMetrics = (downstreams[0].getInfoSecurePort() != 0 ?
|
|
||||||
downstreams[0].getInfoSecureAddr() : downstreams[0].getInfoAddr());
|
|
||||||
LOG.debug("Will collect peer metrics for downstream node {}",
|
|
||||||
mirrorNameForMetrics);
|
|
||||||
}
|
|
||||||
throttler = throttlerArg;
|
throttler = throttlerArg;
|
||||||
|
|
||||||
this.replyOut = replyOut;
|
this.replyOut = replyOut;
|
||||||
|
@ -1058,6 +1056,39 @@ class BlockReceiver implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If we have downstream DNs and peerMetrics are enabled, then initialize
|
||||||
|
* some state for monitoring the performance of downstream DNs.
|
||||||
|
*
|
||||||
|
* @param downstreams downstream DNs, or null if there are none.
|
||||||
|
*/
|
||||||
|
private void initPerfMonitoring(DatanodeInfo[] downstreams) {
|
||||||
|
if (downstreams != null && downstreams.length > 0) {
|
||||||
|
downstreamDNs = downstreams;
|
||||||
|
isPenultimateNode = (downstreams.length == 1);
|
||||||
|
if (isPenultimateNode && datanode.getPeerMetrics() != null) {
|
||||||
|
mirrorNameForMetrics = (downstreams[0].getInfoSecurePort() != 0 ?
|
||||||
|
downstreams[0].getInfoSecureAddr() : downstreams[0].getInfoAddr());
|
||||||
|
LOG.debug("Will collect peer metrics for downstream node {}",
|
||||||
|
mirrorNameForMetrics);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch the base URI of the volume on which this replica resides.
|
||||||
|
*
|
||||||
|
* @returns Volume base URI as string if available. Else returns the
|
||||||
|
* the string "unavailable".
|
||||||
|
*/
|
||||||
|
private String getVolumeBaseUri() {
|
||||||
|
final ReplicaInfo ri = replicaInfo.getReplicaInfo();
|
||||||
|
if (ri != null && ri.getVolume() != null) {
|
||||||
|
return ri.getVolume().getBaseURI().toString();
|
||||||
|
}
|
||||||
|
return "unavailable";
|
||||||
|
}
|
||||||
|
|
||||||
/** Cleanup a partial block
|
/** Cleanup a partial block
|
||||||
* if this write is for a replication request (and not from a client)
|
* if this write is for a replication request (and not from a client)
|
||||||
*/
|
*/
|
||||||
|
|
Loading…
Reference in New Issue