HDFS-10453. ReplicationMonitor thread could stuck for long time due to the race between replication and delete of same file in a large cluster.. Contributed by He Xiaoqiao.

2018-02-12 07:02:42 -08:00 · 2018-02-12 07:02:42 -08:00 · 41d434d08e
commit 41d434d08e
parent 050aa531ba
1 changed files with 3 additions and 1 deletions
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ReplicationWork.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ReplicationWork.java
@ -26,6 +26,7 @@
 class ReplicationWork {
  private final BlockInfo block;
  private final String srcPath;
+  private final long blockSize;
  private final byte storagePolicyID;
  private final DatanodeDescriptor srcNode;
  private final int additionalReplRequired;
@ -40,6 +41,7 @@ public ReplicationWork(BlockInfo block, BlockCollection bc,
      int priority) {
    this.block = block;
    this.srcPath = bc.getName();
+    this.blockSize = block.getNumBytes();
    this.storagePolicyID = bc.getStoragePolicyID();
    this.srcNode = srcNode;
    this.srcNode.incrementPendingReplicationWithoutTargets();
@ -56,7 +58,7 @@ void chooseTargets(BlockPlacementPolicy blockplacement,
    try {
      targets = blockplacement.chooseTarget(getSrcPath(),
          additionalReplRequired, srcNode, liveReplicaStorages, false,
-          excludedNodes, block.getNumBytes(),
+          excludedNodes, blockSize,
          storagePolicySuite.getPolicy(getStoragePolicyID()), null);
    } finally {
      srcNode.decrementPendingReplicationWithoutTargets();