From c36d69a7b30927eaea16335e06cfcc247accde35 Mon Sep 17 00:00:00 2001 From: Vinayakumar B Date: Wed, 29 Aug 2018 22:40:13 +0530 Subject: [PATCH] HDFS-13027. Handle possible NPEs due to deleted blocks in race condition. Contributed by Vinayakumar B. (cherry picked from commit 65977e5d8124be2bc208af25beed934933f170b3) --- .../apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java | 2 +- .../hadoop/hdfs/server/blockmanagement/BlockManager.java | 4 ++++ .../org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java | 2 +- .../org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java | 5 ++++- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java index 43f4f476bbc..d160f61fc8f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java @@ -52,7 +52,7 @@ public abstract class BlockInfo extends Block /** * Block collection ID. */ - private long bcId; + private volatile long bcId; /** For implementing {@link LightWeightGSet.LinkedElement} interface. */ private LightWeightGSet.LinkedElement nextLinkedElement; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 17f6f6e53d7..675221a1ec5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -4171,6 +4171,10 @@ public class BlockManager implements BlockStatsMXBean { int numExtraRedundancy = 0; while(it.hasNext()) { final BlockInfo block = it.next(); + if (block.isDeleted()) { + //Orphan block, will be handled eventually, skip + continue; + } int expectedReplication = this.getExpectedRedundancyNum(block); NumberReplicas num = countNodes(block); if (shouldProcessExtraRedundancy(num, expectedReplication)) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 6ba0e0b2ffc..74c9f104826 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -4128,7 +4128,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, while (it.hasNext()) { Block b = it.next(); BlockInfo blockInfo = blockManager.getStoredBlock(b); - if (blockInfo == null) { + if (blockInfo == null || blockInfo.isDeleted()) { LOG.info("Cannot find block info for block " + b); } else { BlockCollection bc = getBlockCollection(blockInfo); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java index 5e7bab5dfa8..0201ca11610 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java @@ -264,12 +264,13 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { return; } + namenode.getNamesystem().readLock(); try { //get blockInfo Block block = new Block(Block.getBlockId(blockId)); //find which file this block belongs to BlockInfo blockInfo = blockManager.getStoredBlock(block); - if(blockInfo == null) { + if (blockInfo == null || blockInfo.isDeleted()) { out.println("Block "+ blockId +" " + NONEXISTENT_STATUS); LOG.warn("Block "+ blockId + " " + NONEXISTENT_STATUS); return; @@ -329,6 +330,8 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { out.println(e.getMessage()); out.print("\n\n" + errMsg); LOG.warn("Error in looking up block", e); + } finally { + namenode.getNamesystem().readUnlock("fsck"); } }