From 1293199532f64fd0d93666bedd57adb99e8ac6e4 Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Tue, 28 Jan 2020 22:59:26 +0530 Subject: [PATCH] HDFS-14993. checkDiskError doesn't work during datanode startup. Contributed by Yang Yun. --- .../hadoop/hdfs/server/datanode/DataNode.java | 6 ++-- .../datanode/TestDataNodeVolumeFailure.java | 31 +++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index 05527e323dd..6955c01d269 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -1678,14 +1678,14 @@ public class DataNode extends ReconfigurableBase // the dataset, block scanners, etc. initStorage(nsInfo); - // Exclude failed disks before initializing the block pools to avoid startup - // failures. - checkDiskError(); try { data.addBlockPool(nsInfo.getBlockPoolID(), getConf()); } catch (AddBlockPoolException e) { handleAddBlockPoolError(e); } + // HDFS-14993: check disk after add the block pool info. + checkDiskError(); + blockScanner.enableBlockPoolId(bpos.getBlockPoolId()); initDirectoryScanner(getConf()); initDiskBalancer(data, getConf()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java index 4c691b4ee15..d9ec2cd0739 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java @@ -868,4 +868,35 @@ public class TestDataNodeVolumeFailure { } } } + + /* + * Verify the failed volume can be cheched during dn startup + */ + @Test(timeout = 120000) + public void testVolumeFailureDuringStartup() throws Exception { + LOG.debug("Data dir: is " + dataDir.getPath()); + + // fail the volume + data_fail = cluster.getInstanceStorageDir(1, 0); + failedDir = MiniDFSCluster.getFinalizedDir(data_fail, + cluster.getNamesystem().getBlockPoolId()); + failedDir.setReadOnly(); + + // restart the dn + cluster.restartDataNode(1); + final DataNode dn = cluster.getDataNodes().get(1); + + // should get the failed volume during startup + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + return dn.getFSDataset() !=null && + dn.getFSDataset().getVolumeFailureSummary() != null && + dn.getFSDataset().getVolumeFailureSummary(). + getFailedStorageLocations()!= null && + dn.getFSDataset().getVolumeFailureSummary(). + getFailedStorageLocations().length == 1; + } + }, 10, 30 * 1000); + } }