HDFS-14993. checkDiskError doesn't work during datanode startup. Contributed by Yang Yun.

This commit is contained in:
Ayush Saxena 2020-01-28 22:59:26 +05:30
parent f3ff61865a
commit 1293199532
2 changed files with 34 additions and 3 deletions

View File

@ -1678,14 +1678,14 @@ public class DataNode extends ReconfigurableBase
// the dataset, block scanners, etc.
initStorage(nsInfo);
// Exclude failed disks before initializing the block pools to avoid startup
// failures.
checkDiskError();
try {
data.addBlockPool(nsInfo.getBlockPoolID(), getConf());
} catch (AddBlockPoolException e) {
handleAddBlockPoolError(e);
}
// HDFS-14993: check disk after add the block pool info.
checkDiskError();
blockScanner.enableBlockPoolId(bpos.getBlockPoolId());
initDirectoryScanner(getConf());
initDiskBalancer(data, getConf());

View File

@ -868,4 +868,35 @@ public class TestDataNodeVolumeFailure {
}
}
}
/*
* Verify the failed volume can be cheched during dn startup
*/
@Test(timeout = 120000)
public void testVolumeFailureDuringStartup() throws Exception {
LOG.debug("Data dir: is " + dataDir.getPath());
// fail the volume
data_fail = cluster.getInstanceStorageDir(1, 0);
failedDir = MiniDFSCluster.getFinalizedDir(data_fail,
cluster.getNamesystem().getBlockPoolId());
failedDir.setReadOnly();
// restart the dn
cluster.restartDataNode(1);
final DataNode dn = cluster.getDataNodes().get(1);
// should get the failed volume during startup
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
return dn.getFSDataset() !=null &&
dn.getFSDataset().getVolumeFailureSummary() != null &&
dn.getFSDataset().getVolumeFailureSummary().
getFailedStorageLocations()!= null &&
dn.getFSDataset().getVolumeFailureSummary().
getFailedStorageLocations().length == 1;
}
}, 10, 30 * 1000);
}
}