HDFS-10512. VolumeScanner may terminate due to NPE in DataNode.reportBadBlocks. Contributed by Wei-Chiu Chuang and Yiqun Lin.

(cherry picked from commit da6f1b88dd)
(cherry picked from commit 96e68e722f)
This commit is contained in:
Yongjun Zhang 2016-07-08 19:40:44 -07:00 committed by Wei-Chiu Chuang
parent 86f291f057
commit b2dfab4326
5 changed files with 70 additions and 3 deletions

View File

@ -154,6 +154,9 @@ Release 2.7.4 - UNRELEASED
HDFS-11002. Fix broken attr/getfattr/setfattr links in HDFS-11002. Fix broken attr/getfattr/setfattr links in
ExtendedAttributes.md. (Mingliang Liu via aajisaka) ExtendedAttributes.md. (Mingliang Liu via aajisaka)
HDFS-10512. VolumeScanner may terminate due to NPE in
DataNode.reportBadBlocks. Contributed by Wei-Chiu Chuang and Yiqun Lin.
Release 2.7.3 - 2016-08-25 Release 2.7.3 - 2016-08-25
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -990,8 +990,25 @@ public class DataNode extends ReconfigurableBase
* Report a bad block which is hosted on the local DN. * Report a bad block which is hosted on the local DN.
*/ */
public void reportBadBlocks(ExtendedBlock block) throws IOException{ public void reportBadBlocks(ExtendedBlock block) throws IOException{
BPOfferService bpos = getBPOSForBlock(block);
FsVolumeSpi volume = getFSDataset().getVolume(block); FsVolumeSpi volume = getFSDataset().getVolume(block);
if (volume == null) {
LOG.warn("Cannot find FsVolumeSpi to report bad block: " + block);
return;
}
reportBadBlocks(block, volume);
}
/**
* Report a bad block which is hosted on the local DN.
*
* @param block the bad block which is hosted on the local DN
* @param volume the volume that block is stored in and the volume
* must not be null
* @throws IOException
*/
public void reportBadBlocks(ExtendedBlock block, FsVolumeSpi volume)
throws IOException {
BPOfferService bpos = getBPOSForBlock(block);
bpos.reportBadBlocks( bpos.reportBadBlocks(
block, volume.getStorageID(), volume.getStorageType()); block, volume.getStorageID(), volume.getStorageType());
} }
@ -1860,6 +1877,10 @@ public class DataNode extends ReconfigurableBase
private void reportBadBlock(final BPOfferService bpos, private void reportBadBlock(final BPOfferService bpos,
final ExtendedBlock block, final String msg) { final ExtendedBlock block, final String msg) {
FsVolumeSpi volume = getFSDataset().getVolume(block); FsVolumeSpi volume = getFSDataset().getVolume(block);
if (volume == null) {
LOG.warn("Cannot find FsVolumeSpi to report bad block: " + block);
return;
}
bpos.reportBadBlocks( bpos.reportBadBlocks(
block, volume.getStorageID(), volume.getStorageType()); block, volume.getStorageID(), volume.getStorageType());
LOG.warn(msg); LOG.warn(msg);

View File

@ -285,7 +285,7 @@ public class VolumeScanner extends Thread {
LOG.warn("Reporting bad " + block + " with volume " LOG.warn("Reporting bad " + block + " with volume "
+ volume.getBasePath(), e); + volume.getBasePath(), e);
try { try {
scanner.datanode.reportBadBlocks(block); scanner.datanode.reportBadBlocks(block, volume);
} catch (IOException ie) { } catch (IOException ie) {
// This is bad, but not bad enough to shut down the scanner. // This is bad, but not bad enough to shut down the scanner.
LOG.warn("Cannot report bad block " + block, ie); LOG.warn("Cannot report bad block " + block, ie);

View File

@ -2254,7 +2254,8 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
LOG.warn("Reporting the block " + corruptBlock LOG.warn("Reporting the block " + corruptBlock
+ " as corrupt due to length mismatch"); + " as corrupt due to length mismatch");
try { try {
datanode.reportBadBlocks(new ExtendedBlock(bpid, corruptBlock)); datanode.reportBadBlocks(new ExtendedBlock(bpid, corruptBlock),
memBlockInfo.getVolume());
} catch (IOException e) { } catch (IOException e) {
LOG.warn("Failed to repot bad block " + corruptBlock, e); LOG.warn("Failed to repot bad block " + corruptBlock, e);
} }

View File

@ -33,6 +33,7 @@ import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.StorageInfo; import org.apache.hadoop.hdfs.server.common.StorageInfo;
@ -503,4 +504,45 @@ public class TestFsDatasetImpl {
cluster.shutdown(); cluster.shutdown();
} }
} }
@Test(timeout = 30000)
public void testReportBadBlocks() throws Exception {
boolean threwException = false;
MiniDFSCluster cluster = null;
try {
Configuration config = new HdfsConfiguration();
cluster = new MiniDFSCluster.Builder(config).numDataNodes(1).build();
cluster.waitActive();
Assert.assertEquals(0, cluster.getNamesystem().getCorruptReplicaBlocks());
DataNode dataNode = cluster.getDataNodes().get(0);
ExtendedBlock block =
new ExtendedBlock(cluster.getNamesystem().getBlockPoolId(), 0);
try {
// Test the reportBadBlocks when the volume is null
dataNode.reportBadBlocks(block);
} catch (NullPointerException npe) {
threwException = true;
}
Thread.sleep(3000);
Assert.assertFalse(threwException);
Assert.assertEquals(0, cluster.getNamesystem().getCorruptReplicaBlocks());
FileSystem fs = cluster.getFileSystem();
Path filePath = new Path("testData");
DFSTestUtil.createFile(fs, filePath, 1, (short) 1, 0);
block = DFSTestUtil.getFirstBlock(fs, filePath);
// Test for the overloaded method reportBadBlocks
dataNode.reportBadBlocks(block, dataNode.getFSDataset()
.getVolumes().get(0));
Thread.sleep(3000);
BlockManagerTestUtil.updateState(cluster.getNamesystem()
.getBlockManager());
// Verify the bad block has been reported to namenode
Assert.assertEquals(1, cluster.getNamesystem().getCorruptReplicaBlocks());
} finally {
cluster.shutdown();
}
}
} }