HDFS-16086. Add volume information to datanode log for tracing (#3136)

This commit is contained in:
litao 2021-07-01 13:06:56 +08:00 committed by GitHub
parent 7c999e2d9a
commit 56c7ada7a5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 47 additions and 8 deletions

View File

@ -33,6 +33,7 @@ import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.hdfs.protocol.BlockListAsLongs.BlockReportReplica; import org.apache.hadoop.hdfs.protocol.BlockListAsLongs.BlockReportReplica;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState;
import org.apache.hadoop.hdfs.server.datanode.Replica; import org.apache.hadoop.hdfs.server.datanode.Replica;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
import org.apache.hadoop.thirdparty.protobuf.ByteString; import org.apache.hadoop.thirdparty.protobuf.ByteString;
@ -523,6 +524,7 @@ public abstract class BlockListAsLongs implements Iterable<BlockReportReplica> {
@InterfaceAudience.Private @InterfaceAudience.Private
public static class BlockReportReplica extends Block implements Replica { public static class BlockReportReplica extends Block implements Replica {
private ReplicaState state; private ReplicaState state;
private BlockReportReplica() { private BlockReportReplica() {
} }
public BlockReportReplica(Block block) { public BlockReportReplica(Block block) {
@ -557,6 +559,10 @@ public abstract class BlockListAsLongs implements Iterable<BlockReportReplica> {
throw new UnsupportedOperationException(); throw new UnsupportedOperationException();
} }
@Override @Override
public FsVolumeSpi getVolume() {
throw new UnsupportedOperationException();
}
@Override
public boolean equals(Object o) { public boolean equals(Object o) {
return super.equals(o); return super.equals(o);
} }

View File

@ -1551,11 +1551,12 @@ class BlockReceiver implements Closeable {
DatanodeRegistration dnR = datanode.getDNRegistrationForBP(block DatanodeRegistration dnR = datanode.getDNRegistrationForBP(block
.getBlockPoolId()); .getBlockPoolId());
ClientTraceLog.info(String.format(DN_CLIENTTRACE_FORMAT, inAddr, ClientTraceLog.info(String.format(DN_CLIENTTRACE_FORMAT, inAddr,
myAddr, block.getNumBytes(), "HDFS_WRITE", clientname, offset, myAddr, replicaInfo.getVolume(), block.getNumBytes(),
dnR.getDatanodeUuid(), block, endTime - startTime)); "HDFS_WRITE", clientname, offset, dnR.getDatanodeUuid(),
block, endTime - startTime));
} else { } else {
LOG.info("Received " + block + " size " + block.getNumBytes() LOG.info("Received " + block + " on volume " + replicaInfo.getVolume()
+ " from " + inAddr); + " size " + block.getNumBytes() + " from " + inAddr);
} }
} }

View File

@ -268,6 +268,7 @@ public class DataNode extends ReconfigurableBase
public static final String DN_CLIENTTRACE_FORMAT = public static final String DN_CLIENTTRACE_FORMAT =
"src: %s" + // src IP "src: %s" + // src IP
", dest: %s" + // dst IP ", dest: %s" + // dst IP
", volume: %s" + // volume
", bytes: %s" + // byte count ", bytes: %s" + // byte count
", op: %s" + // operation ", op: %s" + // operation
", cliID: %s" + // DFSClient id ", cliID: %s" + // DFSClient id

View File

@ -587,7 +587,7 @@ class DataXceiver extends Receiver implements Runnable {
final String clientTraceFmt = final String clientTraceFmt =
clientName.length() > 0 && ClientTraceLog.isInfoEnabled() clientName.length() > 0 && ClientTraceLog.isInfoEnabled()
? String.format(DN_CLIENTTRACE_FORMAT, localAddress, remoteAddress, ? String.format(DN_CLIENTTRACE_FORMAT, localAddress, remoteAddress,
"%d", "HDFS_READ", clientName, "%d", "", "%d", "HDFS_READ", clientName, "%d",
dnR.getDatanodeUuid(), block, "%d") dnR.getDatanodeUuid(), block, "%d")
: dnR + " Served block " + block + " to " + : dnR + " Served block " + block + " to " +
remoteAddress; remoteAddress;
@ -929,8 +929,9 @@ class DataXceiver extends Receiver implements Runnable {
if (isDatanode || if (isDatanode ||
stage == BlockConstructionStage.PIPELINE_CLOSE_RECOVERY) { stage == BlockConstructionStage.PIPELINE_CLOSE_RECOVERY) {
datanode.closeBlock(block, null, storageUuid, isOnTransientStorage); datanode.closeBlock(block, null, storageUuid, isOnTransientStorage);
LOG.info("Received {} src: {} dest: {} of size {}", LOG.info("Received {} src: {} dest: {} volume: {} of size {}",
block, remoteAddress, localAddress, block.getNumBytes()); block, remoteAddress, localAddress, replica.getVolume(),
block.getNumBytes());
} }
if(isClient) { if(isClient) {

View File

@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.datanode;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
/** /**
* This represents block replicas which are stored in DataNode. * This represents block replicas which are stored in DataNode.
@ -64,4 +65,10 @@ public interface Replica {
* Return true if the target volume is backed by RAM. * Return true if the target volume is backed by RAM.
*/ */
public boolean isOnTransientStorage(); public boolean isOnTransientStorage();
/**
* Get the volume of replica.
* @return the volume of replica
*/
public FsVolumeSpi getVolume();
} }

View File

@ -232,7 +232,8 @@ class FsDatasetAsyncDiskService {
void deleteAsync(FsVolumeReference volumeRef, ReplicaInfo replicaToDelete, void deleteAsync(FsVolumeReference volumeRef, ReplicaInfo replicaToDelete,
ExtendedBlock block, String trashDirectory) { ExtendedBlock block, String trashDirectory) {
LOG.info("Scheduling " + block.getLocalBlock() LOG.info("Scheduling " + block.getLocalBlock()
+ " replica " + replicaToDelete + " for deletion"); + " replica " + replicaToDelete + " on volume " +
replicaToDelete.getVolume() + " for deletion");
ReplicaFileDeleteTask deletionTask = new ReplicaFileDeleteTask( ReplicaFileDeleteTask deletionTask = new ReplicaFileDeleteTask(
volumeRef, replicaToDelete, block, trashDirectory); volumeRef, replicaToDelete, block, trashDirectory);
execute(((FsVolumeImpl) volumeRef.getVolume()), deletionTask); execute(((FsVolumeImpl) volumeRef.getVolume()), deletionTask);

View File

@ -1129,6 +1129,8 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
} }
} }
try { try {
LOG.debug("moving block {} from {} to {}", block,
replicaInfo.getVolume(), volumeRef.getVolume());
moveBlock(block, replicaInfo, volumeRef, useVolumeOnSameMount); moveBlock(block, replicaInfo, volumeRef, useVolumeOnSameMount);
datanode.getMetrics().incrReplaceBlockOpOnSameHost(); datanode.getMetrics().incrReplaceBlockOpOnSameHost();
if (useVolumeOnSameMount) { if (useVolumeOnSameMount) {
@ -1631,6 +1633,7 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
if (ref == null) { if (ref == null) {
ref = volumes.getNextVolume(storageType, storageId, b.getNumBytes()); ref = volumes.getNextVolume(storageType, storageId, b.getNumBytes());
} }
LOG.debug("Creating Rbw, block: {} on volume: {}", b, ref.getVolume());
FsVolumeImpl v = (FsVolumeImpl) ref.getVolume(); FsVolumeImpl v = (FsVolumeImpl) ref.getVolume();
// create an rbw file to hold block in the designated volume // create an rbw file to hold block in the designated volume
@ -1904,6 +1907,8 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
ReplicaInPipeline newReplicaInfo; ReplicaInPipeline newReplicaInfo;
try { try {
newReplicaInfo = v.createTemporary(b); newReplicaInfo = v.createTemporary(b);
LOG.debug("creating temporary for block: {} on volume: {}",
b, ref.getVolume());
} catch (IOException e) { } catch (IOException e) {
IOUtils.cleanupWithLogger(null, ref); IOUtils.cleanupWithLogger(null, ref);
throw e; throw e;

View File

@ -416,6 +416,11 @@ public class SimulatedFSDataset implements FsDatasetSpi<FsVolumeSpi> {
} while (deadLine > System.currentTimeMillis()); } while (deadLine > System.currentTimeMillis());
throw new IOException("Minimum length was not achieved within timeout"); throw new IOException("Minimum length was not achieved within timeout");
} }
@Override
public FsVolumeSpi getVolume() {
return getStorage(theBlock).getVolume();
}
} }
/** /**

View File

@ -20,6 +20,7 @@ package org.apache.hadoop.hdfs.server.datanode.extdataset;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState;
import org.apache.hadoop.hdfs.server.datanode.Replica; import org.apache.hadoop.hdfs.server.datanode.Replica;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
public class ExternalReplica implements Replica { public class ExternalReplica implements Replica {
@ -62,4 +63,9 @@ public class ExternalReplica implements Replica {
public boolean isOnTransientStorage() { public boolean isOnTransientStorage() {
return false; return false;
} }
@Override
public FsVolumeSpi getVolume() {
return null;
}
} }

View File

@ -26,6 +26,7 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState;
import org.apache.hadoop.hdfs.server.datanode.ChunkChecksum; import org.apache.hadoop.hdfs.server.datanode.ChunkChecksum;
import org.apache.hadoop.hdfs.server.datanode.ReplicaInPipeline; import org.apache.hadoop.hdfs.server.datanode.ReplicaInPipeline;
import org.apache.hadoop.hdfs.server.datanode.ReplicaInfo; import org.apache.hadoop.hdfs.server.datanode.ReplicaInfo;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaOutputStreams; import org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaOutputStreams;
import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.DataChecksum;
@ -135,4 +136,9 @@ public class ExternalReplicaInPipeline implements ReplicaInPipeline {
public void waitForMinLength(long minLength, long time, TimeUnit unit) public void waitForMinLength(long minLength, long time, TimeUnit unit)
throws IOException { throws IOException {
} }
@Override
public FsVolumeSpi getVolume() {
return null;
}
} }