HDFS-10827. When there are unrecoverable ec block groups, Namenode Web UI doesn't show the block names. Contributed by Takanobu Asanuma.
This commit is contained in:
parent
0007360c33
commit
adb96e109f
|
@ -4999,7 +4999,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
|||
BlockInfo blk = blkIterator.next();
|
||||
final INodeFile inode = getBlockCollection(blk);
|
||||
skip++;
|
||||
if (inode != null && blockManager.countNodes(blk).liveReplicas() == 0) {
|
||||
if (inode != null) {
|
||||
String src = inode.getFullPathName();
|
||||
if (src.startsWith(path)){
|
||||
corruptFiles.add(new CorruptFileBlockInfo(src, blk));
|
||||
|
|
|
@ -17,35 +17,48 @@
|
|||
*/
|
||||
package org.apache.hadoop.hdfs.server.namenode;
|
||||
|
||||
import com.google.common.base.Supplier;
|
||||
import com.google.common.util.concurrent.Uninterruptibles;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.FileUtil;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.permission.FsPermission;
|
||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||
import org.apache.hadoop.hdfs.DFSTestUtil;
|
||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
||||
import org.apache.hadoop.hdfs.StripedFileTestUtil;
|
||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedStripedBlock;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
|
||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||
import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
|
||||
import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
|
||||
import org.apache.hadoop.hdfs.server.namenode.top.TopConf;
|
||||
import org.apache.hadoop.hdfs.util.HostsFileWriter;
|
||||
import org.apache.hadoop.hdfs.util.StripedBlockUtil;
|
||||
import org.apache.hadoop.io.IOUtils;
|
||||
import org.apache.hadoop.io.nativeio.NativeIO;
|
||||
import org.apache.hadoop.io.nativeio.NativeIO.POSIX.NoMlockCacheManipulator;
|
||||
import org.apache.hadoop.net.ServerSocketUtil;
|
||||
import org.apache.hadoop.test.GenericTestUtils;
|
||||
import org.apache.hadoop.util.VersionInfo;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.mortbay.util.ajax.JSON;
|
||||
|
||||
import javax.management.MBeanServer;
|
||||
import javax.management.ObjectName;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.lang.management.ManagementFactory;
|
||||
import java.net.BindException;
|
||||
import java.net.URI;
|
||||
|
@ -495,4 +508,96 @@ public class TestNameNodeMXBean {
|
|||
FileUtils.sizeOfDirectory(dir));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVerifyMissingBlockGroupsMetrics() throws Exception {
|
||||
MiniDFSCluster cluster = null;
|
||||
DistributedFileSystem fs = null;
|
||||
try {
|
||||
Configuration conf = new HdfsConfiguration();
|
||||
int dataBlocks = ErasureCodingPolicyManager
|
||||
.getSystemDefaultPolicy().getNumDataUnits();
|
||||
int parityBlocks = ErasureCodingPolicyManager
|
||||
.getSystemDefaultPolicy().getNumParityUnits();
|
||||
int cellSize = ErasureCodingPolicyManager
|
||||
.getSystemDefaultPolicy().getCellSize();
|
||||
int totalSize = dataBlocks + parityBlocks;
|
||||
cluster = new MiniDFSCluster.Builder(conf)
|
||||
.numDataNodes(totalSize).build();
|
||||
fs = cluster.getFileSystem();
|
||||
|
||||
// create file
|
||||
Path ecDirPath = new Path("/striped");
|
||||
fs.mkdir(ecDirPath, FsPermission.getDirDefault());
|
||||
fs.getClient().setErasureCodingPolicy(ecDirPath.toString(), null);
|
||||
Path file = new Path(ecDirPath, "corrupted");
|
||||
final int length = cellSize * dataBlocks;
|
||||
final byte[] bytes = StripedFileTestUtil.generateBytes(length);
|
||||
DFSTestUtil.writeFile(fs, file, bytes);
|
||||
|
||||
LocatedStripedBlock lsb = (LocatedStripedBlock)fs.getClient()
|
||||
.getLocatedBlocks(file.toString(), 0, cellSize * dataBlocks).get(0);
|
||||
final LocatedBlock[] blks = StripedBlockUtil.parseStripedBlockGroup(lsb,
|
||||
cellSize, dataBlocks, parityBlocks);
|
||||
|
||||
// make an unrecoverable ec file with corrupted blocks
|
||||
for(int i = 0; i < parityBlocks + 1; i++) {
|
||||
int ipcPort = blks[i].getLocations()[0].getIpcPort();
|
||||
cluster.corruptReplica(cluster.getDataNode(ipcPort),
|
||||
blks[i].getBlock());
|
||||
}
|
||||
|
||||
// disable the heart beat from DN so that the corrupted block record is
|
||||
// kept in NameNode
|
||||
for (DataNode dn : cluster.getDataNodes()) {
|
||||
DataNodeTestUtils.setHeartbeatsDisabledForTests(dn, true);
|
||||
}
|
||||
|
||||
// Read the file to trigger reportBadBlocks
|
||||
try {
|
||||
IOUtils.copyBytes(fs.open(file), new IOUtils.NullOutputStream(), conf,
|
||||
true);
|
||||
} catch (IOException ie) {
|
||||
assertTrue(ie.getMessage().contains(
|
||||
"missingChunksNum=" + (parityBlocks + 1)));
|
||||
}
|
||||
|
||||
MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
|
||||
ObjectName mxbeanName = new ObjectName(
|
||||
"Hadoop:service=NameNode,name=NameNodeInfo");
|
||||
|
||||
// Wait for the metrics to discover the unrecoverable block group
|
||||
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
||||
@Override
|
||||
public Boolean get() {
|
||||
try {
|
||||
Long numMissingBlocks =
|
||||
(Long) mbs.getAttribute(mxbeanName, "NumberOfMissingBlocks");
|
||||
if (numMissingBlocks == 1L) {
|
||||
return true;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
Assert.fail("Caught unexpected exception.");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}, 1000, 60000);
|
||||
|
||||
String corruptFiles = (String) (mbs.getAttribute(mxbeanName,
|
||||
"CorruptFiles"));
|
||||
int numCorruptFiles = ((Object[]) JSON.parse(corruptFiles)).length;
|
||||
assertEquals(1, numCorruptFiles);
|
||||
} finally {
|
||||
if (fs != null) {
|
||||
try {
|
||||
fs.close();
|
||||
} catch (Exception e) {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
if (cluster != null) {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue