HDFS-10512. VolumeScanner may terminate due to NPE in DataNode.reportBadBlocks. Contributed by Wei-Chiu Chuang and Yiqun Lin.
(cherry picked from commit da6f1b88dd47e22b24d44f6fc8bbee73e85746f7)
This commit is contained in:
parent
845daee463
commit
96e68e722f
@ -1127,8 +1127,25 @@ public void notifyNamenodeDeletedBlock(ExtendedBlock block, String storageUuid)
|
||||
* Report a bad block which is hosted on the local DN.
|
||||
*/
|
||||
public void reportBadBlocks(ExtendedBlock block) throws IOException{
|
||||
BPOfferService bpos = getBPOSForBlock(block);
|
||||
FsVolumeSpi volume = getFSDataset().getVolume(block);
|
||||
if (volume == null) {
|
||||
LOG.warn("Cannot find FsVolumeSpi to report bad block: " + block);
|
||||
return;
|
||||
}
|
||||
reportBadBlocks(block, volume);
|
||||
}
|
||||
|
||||
/**
|
||||
* Report a bad block which is hosted on the local DN.
|
||||
*
|
||||
* @param block the bad block which is hosted on the local DN
|
||||
* @param volume the volume that block is stored in and the volume
|
||||
* must not be null
|
||||
* @throws IOException
|
||||
*/
|
||||
public void reportBadBlocks(ExtendedBlock block, FsVolumeSpi volume)
|
||||
throws IOException {
|
||||
BPOfferService bpos = getBPOSForBlock(block);
|
||||
bpos.reportBadBlocks(
|
||||
block, volume.getStorageID(), volume.getStorageType());
|
||||
}
|
||||
@ -2043,6 +2060,10 @@ int getXmitsInProgress() {
|
||||
private void reportBadBlock(final BPOfferService bpos,
|
||||
final ExtendedBlock block, final String msg) {
|
||||
FsVolumeSpi volume = getFSDataset().getVolume(block);
|
||||
if (volume == null) {
|
||||
LOG.warn("Cannot find FsVolumeSpi to report bad block: " + block);
|
||||
return;
|
||||
}
|
||||
bpos.reportBadBlocks(
|
||||
block, volume.getStorageID(), volume.getStorageType());
|
||||
LOG.warn(msg);
|
||||
|
@ -283,7 +283,7 @@ public void handle(ExtendedBlock block, IOException e) {
|
||||
}
|
||||
LOG.warn("Reporting bad {} on {}", block, volume.getBasePath());
|
||||
try {
|
||||
scanner.datanode.reportBadBlocks(block);
|
||||
scanner.datanode.reportBadBlocks(block, volume);
|
||||
} catch (IOException ie) {
|
||||
// This is bad, but not bad enough to shut down the scanner.
|
||||
LOG.warn("Cannot report bad " + block.getBlockId(), e);
|
||||
|
@ -2371,7 +2371,8 @@ public void checkAndUpdate(String bpid, long blockId, File diskFile,
|
||||
LOG.warn("Reporting the block " + corruptBlock
|
||||
+ " as corrupt due to length mismatch");
|
||||
try {
|
||||
datanode.reportBadBlocks(new ExtendedBlock(bpid, corruptBlock));
|
||||
datanode.reportBadBlocks(new ExtendedBlock(bpid, corruptBlock),
|
||||
memBlockInfo.getVolume());
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Failed to repot bad block " + corruptBlock, e);
|
||||
}
|
||||
|
@ -35,6 +35,7 @@
|
||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
|
||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
||||
import org.apache.hadoop.hdfs.server.common.Storage;
|
||||
import org.apache.hadoop.hdfs.server.common.StorageInfo;
|
||||
@ -92,6 +93,7 @@
|
||||
import static org.mockito.Mockito.spy;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -594,4 +596,45 @@ public void testCleanShutdownOfVolume() throws Exception {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeout = 30000)
|
||||
public void testReportBadBlocks() throws Exception {
|
||||
boolean threwException = false;
|
||||
MiniDFSCluster cluster = null;
|
||||
try {
|
||||
Configuration config = new HdfsConfiguration();
|
||||
cluster = new MiniDFSCluster.Builder(config).numDataNodes(1).build();
|
||||
cluster.waitActive();
|
||||
|
||||
Assert.assertEquals(0, cluster.getNamesystem().getCorruptReplicaBlocks());
|
||||
DataNode dataNode = cluster.getDataNodes().get(0);
|
||||
ExtendedBlock block =
|
||||
new ExtendedBlock(cluster.getNamesystem().getBlockPoolId(), 0);
|
||||
try {
|
||||
// Test the reportBadBlocks when the volume is null
|
||||
dataNode.reportBadBlocks(block);
|
||||
} catch (NullPointerException npe) {
|
||||
threwException = true;
|
||||
}
|
||||
Thread.sleep(3000);
|
||||
Assert.assertFalse(threwException);
|
||||
Assert.assertEquals(0, cluster.getNamesystem().getCorruptReplicaBlocks());
|
||||
|
||||
FileSystem fs = cluster.getFileSystem();
|
||||
Path filePath = new Path("testData");
|
||||
DFSTestUtil.createFile(fs, filePath, 1, (short) 1, 0);
|
||||
|
||||
block = DFSTestUtil.getFirstBlock(fs, filePath);
|
||||
// Test for the overloaded method reportBadBlocks
|
||||
dataNode.reportBadBlocks(block, dataNode.getFSDataset()
|
||||
.getFsVolumeReferences().get(0));
|
||||
Thread.sleep(3000);
|
||||
BlockManagerTestUtil.updateState(cluster.getNamesystem()
|
||||
.getBlockManager());
|
||||
// Verify the bad block has been reported to namenode
|
||||
Assert.assertEquals(1, cluster.getNamesystem().getCorruptReplicaBlocks());
|
||||
} finally {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user