HDFS-10453. ReplicationMonitor thread could stuck for long time due to the race between replication and delete of same file in a large cluster.. Contributed by He Xiaoqiao.

This commit is contained in:
Arpit Agarwal 2018-02-12 07:02:42 -08:00
parent 050aa531ba
commit 41d434d08e

View File

@ -26,6 +26,7 @@
class ReplicationWork {
private final BlockInfo block;
private final String srcPath;
private final long blockSize;
private final byte storagePolicyID;
private final DatanodeDescriptor srcNode;
private final int additionalReplRequired;
@ -40,6 +41,7 @@ public ReplicationWork(BlockInfo block, BlockCollection bc,
int priority) {
this.block = block;
this.srcPath = bc.getName();
this.blockSize = block.getNumBytes();
this.storagePolicyID = bc.getStoragePolicyID();
this.srcNode = srcNode;
this.srcNode.incrementPendingReplicationWithoutTargets();
@ -56,7 +58,7 @@ void chooseTargets(BlockPlacementPolicy blockplacement,
try {
targets = blockplacement.chooseTarget(getSrcPath(),
additionalReplRequired, srcNode, liveReplicaStorages, false,
excludedNodes, block.getNumBytes(),
excludedNodes, blockSize,
storagePolicySuite.getPolicy(getStoragePolicyID()), null);
} finally {
srcNode.decrementPendingReplicationWithoutTargets();