diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 6337b50ef46..8be1734a362 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -4119,21 +4119,41 @@ public class BlockManager implements BlockStatsMXBean { if (!isPopulatingReplQueues()) { return; } - final Iterator it = srcNode.getBlockIterator(); + int numExtraRedundancy = 0; - while(it.hasNext()) { - final BlockInfo block = it.next(); - if (block.isDeleted()) { - //Orphan block, will be handled eventually, skip + for (DatanodeStorageInfo datanodeStorageInfo : srcNode.getStorageInfos()) { + // the namesystem lock is released between iterations. Make sure the + // storage is not removed before continuing. + if (srcNode.getStorageInfo(datanodeStorageInfo.getStorageID()) == null) { continue; } - int expectedReplication = this.getExpectedRedundancyNum(block); - NumberReplicas num = countNodes(block); - if (shouldProcessExtraRedundancy(num, expectedReplication)) { - // extra redundancy block - processExtraRedundancyBlock(block, (short) expectedReplication, null, - null); - numExtraRedundancy++; + final Iterator it = datanodeStorageInfo.getBlockIterator(); + while(it.hasNext()) { + final BlockInfo block = it.next(); + if (block.isDeleted()) { + //Orphan block, will be handled eventually, skip + continue; + } + int expectedReplication = this.getExpectedRedundancyNum(block); + NumberReplicas num = countNodes(block); + if (shouldProcessExtraRedundancy(num, expectedReplication)) { + // extra redundancy block + processExtraRedundancyBlock(block, (short) expectedReplication, null, + null); + numExtraRedundancy++; + } + } + // When called by tests like TestDefaultBlockPlacementPolicy. + // testPlacementWithLocalRackNodesDecommissioned, it is not protected by + // lock, only when called by DatanodeManager.refreshNodes have writeLock + if (namesystem.hasWriteLock()) { + namesystem.writeUnlock(); + try { + Thread.sleep(1); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + namesystem.writeLock(); } } LOG.info("Invalidated {} extra redundancy blocks on {} after "