From 74634eb002717a9684d00f0e9dc263ab7eb49246 Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Fri, 23 Oct 2020 12:05:53 -0700 Subject: [PATCH] HDFS-15644. Failed volumes can cause DNs to stop block reporting. Contributed by Ahmed Hussein. --- .../fsdataset/impl/FsDatasetImpl.java | 38 ++++++++++--------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java index e65e37fe13d..ed779923235 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java @@ -1957,28 +1957,32 @@ class FsDatasetImpl implements FsDatasetSpi { continue; } String volStorageID = b.getVolume().getStorageID(); - if (!builders.containsKey(volStorageID)) { + switch(b.getState()) { + case FINALIZED: + case RBW: + case RWR: + break; + case RUR: + // use the original replica. + b = b.getOriginalReplica(); + break; + case TEMPORARY: + continue; + default: + assert false : "Illegal ReplicaInfo state."; + continue; + } + BlockListAsLongs.Builder storageBuilder = builders.get(volStorageID); + // a storage in the process of failing will not be in the volumes list + // but will be in the replica map. + if (storageBuilder != null) { + storageBuilder.add(b); + } else { if (!missingVolumesReported.contains(volStorageID)) { LOG.warn("Storage volume: " + volStorageID + " missing for the" + " replica block: " + b + ". Probably being removed!"); missingVolumesReported.add(volStorageID); } - continue; - } - switch(b.getState()) { - case FINALIZED: - case RBW: - case RWR: - builders.get(volStorageID).add(b); - break; - case RUR: - ReplicaInfo orig = b.getOriginalReplica(); - builders.get(volStorageID).add(orig); - break; - case TEMPORARY: - break; - default: - assert false : "Illegal ReplicaInfo state."; } } }