HDFS-10512. VolumeScanner may terminate due to NPE in DataNode.reportBadBlocks. Contributed by Wei-Chiu Chuang and Yiqun Lin.
(cherry picked from commitda6f1b88dd
) (cherry picked from commit96e68e722f
)
This commit is contained in:
parent
86f291f057
commit
b2dfab4326
|
@ -154,6 +154,9 @@ Release 2.7.4 - UNRELEASED
|
||||||
HDFS-11002. Fix broken attr/getfattr/setfattr links in
|
HDFS-11002. Fix broken attr/getfattr/setfattr links in
|
||||||
ExtendedAttributes.md. (Mingliang Liu via aajisaka)
|
ExtendedAttributes.md. (Mingliang Liu via aajisaka)
|
||||||
|
|
||||||
|
HDFS-10512. VolumeScanner may terminate due to NPE in
|
||||||
|
DataNode.reportBadBlocks. Contributed by Wei-Chiu Chuang and Yiqun Lin.
|
||||||
|
|
||||||
Release 2.7.3 - 2016-08-25
|
Release 2.7.3 - 2016-08-25
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -990,8 +990,25 @@ public class DataNode extends ReconfigurableBase
|
||||||
* Report a bad block which is hosted on the local DN.
|
* Report a bad block which is hosted on the local DN.
|
||||||
*/
|
*/
|
||||||
public void reportBadBlocks(ExtendedBlock block) throws IOException{
|
public void reportBadBlocks(ExtendedBlock block) throws IOException{
|
||||||
BPOfferService bpos = getBPOSForBlock(block);
|
|
||||||
FsVolumeSpi volume = getFSDataset().getVolume(block);
|
FsVolumeSpi volume = getFSDataset().getVolume(block);
|
||||||
|
if (volume == null) {
|
||||||
|
LOG.warn("Cannot find FsVolumeSpi to report bad block: " + block);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
reportBadBlocks(block, volume);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Report a bad block which is hosted on the local DN.
|
||||||
|
*
|
||||||
|
* @param block the bad block which is hosted on the local DN
|
||||||
|
* @param volume the volume that block is stored in and the volume
|
||||||
|
* must not be null
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void reportBadBlocks(ExtendedBlock block, FsVolumeSpi volume)
|
||||||
|
throws IOException {
|
||||||
|
BPOfferService bpos = getBPOSForBlock(block);
|
||||||
bpos.reportBadBlocks(
|
bpos.reportBadBlocks(
|
||||||
block, volume.getStorageID(), volume.getStorageType());
|
block, volume.getStorageID(), volume.getStorageType());
|
||||||
}
|
}
|
||||||
|
@ -1860,6 +1877,10 @@ public class DataNode extends ReconfigurableBase
|
||||||
private void reportBadBlock(final BPOfferService bpos,
|
private void reportBadBlock(final BPOfferService bpos,
|
||||||
final ExtendedBlock block, final String msg) {
|
final ExtendedBlock block, final String msg) {
|
||||||
FsVolumeSpi volume = getFSDataset().getVolume(block);
|
FsVolumeSpi volume = getFSDataset().getVolume(block);
|
||||||
|
if (volume == null) {
|
||||||
|
LOG.warn("Cannot find FsVolumeSpi to report bad block: " + block);
|
||||||
|
return;
|
||||||
|
}
|
||||||
bpos.reportBadBlocks(
|
bpos.reportBadBlocks(
|
||||||
block, volume.getStorageID(), volume.getStorageType());
|
block, volume.getStorageID(), volume.getStorageType());
|
||||||
LOG.warn(msg);
|
LOG.warn(msg);
|
||||||
|
|
|
@ -285,7 +285,7 @@ public class VolumeScanner extends Thread {
|
||||||
LOG.warn("Reporting bad " + block + " with volume "
|
LOG.warn("Reporting bad " + block + " with volume "
|
||||||
+ volume.getBasePath(), e);
|
+ volume.getBasePath(), e);
|
||||||
try {
|
try {
|
||||||
scanner.datanode.reportBadBlocks(block);
|
scanner.datanode.reportBadBlocks(block, volume);
|
||||||
} catch (IOException ie) {
|
} catch (IOException ie) {
|
||||||
// This is bad, but not bad enough to shut down the scanner.
|
// This is bad, but not bad enough to shut down the scanner.
|
||||||
LOG.warn("Cannot report bad block " + block, ie);
|
LOG.warn("Cannot report bad block " + block, ie);
|
||||||
|
|
|
@ -2254,7 +2254,8 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
||||||
LOG.warn("Reporting the block " + corruptBlock
|
LOG.warn("Reporting the block " + corruptBlock
|
||||||
+ " as corrupt due to length mismatch");
|
+ " as corrupt due to length mismatch");
|
||||||
try {
|
try {
|
||||||
datanode.reportBadBlocks(new ExtendedBlock(bpid, corruptBlock));
|
datanode.reportBadBlocks(new ExtendedBlock(bpid, corruptBlock),
|
||||||
|
memBlockInfo.getVolume());
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.warn("Failed to repot bad block " + corruptBlock, e);
|
LOG.warn("Failed to repot bad block " + corruptBlock, e);
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.hadoop.hdfs.protocol.Block;
|
||||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
|
||||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
||||||
import org.apache.hadoop.hdfs.server.common.Storage;
|
import org.apache.hadoop.hdfs.server.common.Storage;
|
||||||
import org.apache.hadoop.hdfs.server.common.StorageInfo;
|
import org.apache.hadoop.hdfs.server.common.StorageInfo;
|
||||||
|
@ -503,4 +504,45 @@ public class TestFsDatasetImpl {
|
||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 30000)
|
||||||
|
public void testReportBadBlocks() throws Exception {
|
||||||
|
boolean threwException = false;
|
||||||
|
MiniDFSCluster cluster = null;
|
||||||
|
try {
|
||||||
|
Configuration config = new HdfsConfiguration();
|
||||||
|
cluster = new MiniDFSCluster.Builder(config).numDataNodes(1).build();
|
||||||
|
cluster.waitActive();
|
||||||
|
|
||||||
|
Assert.assertEquals(0, cluster.getNamesystem().getCorruptReplicaBlocks());
|
||||||
|
DataNode dataNode = cluster.getDataNodes().get(0);
|
||||||
|
ExtendedBlock block =
|
||||||
|
new ExtendedBlock(cluster.getNamesystem().getBlockPoolId(), 0);
|
||||||
|
try {
|
||||||
|
// Test the reportBadBlocks when the volume is null
|
||||||
|
dataNode.reportBadBlocks(block);
|
||||||
|
} catch (NullPointerException npe) {
|
||||||
|
threwException = true;
|
||||||
|
}
|
||||||
|
Thread.sleep(3000);
|
||||||
|
Assert.assertFalse(threwException);
|
||||||
|
Assert.assertEquals(0, cluster.getNamesystem().getCorruptReplicaBlocks());
|
||||||
|
|
||||||
|
FileSystem fs = cluster.getFileSystem();
|
||||||
|
Path filePath = new Path("testData");
|
||||||
|
DFSTestUtil.createFile(fs, filePath, 1, (short) 1, 0);
|
||||||
|
|
||||||
|
block = DFSTestUtil.getFirstBlock(fs, filePath);
|
||||||
|
// Test for the overloaded method reportBadBlocks
|
||||||
|
dataNode.reportBadBlocks(block, dataNode.getFSDataset()
|
||||||
|
.getVolumes().get(0));
|
||||||
|
Thread.sleep(3000);
|
||||||
|
BlockManagerTestUtil.updateState(cluster.getNamesystem()
|
||||||
|
.getBlockManager());
|
||||||
|
// Verify the bad block has been reported to namenode
|
||||||
|
Assert.assertEquals(1, cluster.getNamesystem().getCorruptReplicaBlocks());
|
||||||
|
} finally {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue