HDFS-10477. Stop decommission a rack of DataNodes caused NameNode fail over to standby. Contributed by yunjiong zhao and Wei-Chiu Chuang.

(cherry picked from commit be488b6070)
(cherry picked from commit c8703dda07)
(cherry picked from commit 2a94603ae66d9000c0bb07df0d592279339af103)
This commit is contained in:
Wei-Chiu Chuang 2019-04-03 11:00:12 -07:00
parent 6f0896e087
commit cbac102566
1 changed files with 32 additions and 12 deletions

View File

@ -4119,21 +4119,41 @@ public class BlockManager implements BlockStatsMXBean {
if (!isPopulatingReplQueues()) { if (!isPopulatingReplQueues()) {
return; return;
} }
final Iterator<BlockInfo> it = srcNode.getBlockIterator();
int numExtraRedundancy = 0; int numExtraRedundancy = 0;
while(it.hasNext()) { for (DatanodeStorageInfo datanodeStorageInfo : srcNode.getStorageInfos()) {
final BlockInfo block = it.next(); // the namesystem lock is released between iterations. Make sure the
if (block.isDeleted()) { // storage is not removed before continuing.
//Orphan block, will be handled eventually, skip if (srcNode.getStorageInfo(datanodeStorageInfo.getStorageID()) == null) {
continue; continue;
} }
int expectedReplication = this.getExpectedRedundancyNum(block); final Iterator<BlockInfo> it = datanodeStorageInfo.getBlockIterator();
NumberReplicas num = countNodes(block); while(it.hasNext()) {
if (shouldProcessExtraRedundancy(num, expectedReplication)) { final BlockInfo block = it.next();
// extra redundancy block if (block.isDeleted()) {
processExtraRedundancyBlock(block, (short) expectedReplication, null, //Orphan block, will be handled eventually, skip
null); continue;
numExtraRedundancy++; }
int expectedReplication = this.getExpectedRedundancyNum(block);
NumberReplicas num = countNodes(block);
if (shouldProcessExtraRedundancy(num, expectedReplication)) {
// extra redundancy block
processExtraRedundancyBlock(block, (short) expectedReplication, null,
null);
numExtraRedundancy++;
}
}
// When called by tests like TestDefaultBlockPlacementPolicy.
// testPlacementWithLocalRackNodesDecommissioned, it is not protected by
// lock, only when called by DatanodeManager.refreshNodes have writeLock
if (namesystem.hasWriteLock()) {
namesystem.writeUnlock();
try {
Thread.sleep(1);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
namesystem.writeLock();
} }
} }
LOG.info("Invalidated {} extra redundancy blocks on {} after " LOG.info("Invalidated {} extra redundancy blocks on {} after "