HDFS-7537. Add "UNDER MIN REPL'D BLOCKS" count to fsck. Contributed by GAO Rui

This commit is contained in:
Tsz-Wo Nicholas Sze 2015-02-26 11:45:56 +08:00
parent 12fe3afcd3
commit 22b13a0ddb
3 changed files with 111 additions and 9 deletions

View File

@ -375,6 +375,9 @@ Release 2.7.0 - UNRELEASED
HDFS-7495. Remove updatePosition argument from DFSInputStream#getBlockAt() HDFS-7495. Remove updatePosition argument from DFSInputStream#getBlockAt()
(cmccabe) (cmccabe)
HDFS-7537. Add "UNDER MIN REPL'D BLOCKS" count to fsck. (GAO Rui via
szetszwo)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-7454. Reduce memory footprint for AclEntries in NameNode. HDFS-7454. Reduce memory footprint for AclEntries in NameNode.

View File

@ -507,6 +507,9 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
res.totalReplicas += liveReplicas; res.totalReplicas += liveReplicas;
short targetFileReplication = file.getReplication(); short targetFileReplication = file.getReplication();
res.numExpectedReplicas += targetFileReplication; res.numExpectedReplicas += targetFileReplication;
if(liveReplicas<minReplication){
res.numUnderMinReplicatedBlocks++;
}
if (liveReplicas > targetFileReplication) { if (liveReplicas > targetFileReplication) {
res.excessiveReplicas += (liveReplicas - targetFileReplication); res.excessiveReplicas += (liveReplicas - targetFileReplication);
res.numOverReplicatedBlocks += 1; res.numOverReplicatedBlocks += 1;
@ -853,6 +856,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
long corruptBlocks = 0L; long corruptBlocks = 0L;
long excessiveReplicas = 0L; long excessiveReplicas = 0L;
long missingReplicas = 0L; long missingReplicas = 0L;
long numUnderMinReplicatedBlocks=0L;
long numOverReplicatedBlocks = 0L; long numOverReplicatedBlocks = 0L;
long numUnderReplicatedBlocks = 0L; long numUnderReplicatedBlocks = 0L;
long numMisReplicatedBlocks = 0L; // blocks that do not satisfy block placement policy long numMisReplicatedBlocks = 0L; // blocks that do not satisfy block placement policy
@ -869,10 +873,13 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
long totalReplicas = 0L; long totalReplicas = 0L;
final short replication; final short replication;
final int minReplication;
Result(Configuration conf) { Result(Configuration conf) {
this.replication = (short)conf.getInt(DFSConfigKeys.DFS_REPLICATION_KEY, this.replication = (short)conf.getInt(DFSConfigKeys.DFS_REPLICATION_KEY,
DFSConfigKeys.DFS_REPLICATION_DEFAULT); DFSConfigKeys.DFS_REPLICATION_DEFAULT);
this.minReplication = (short)conf.getInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY,
DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_DEFAULT);
} }
/** /**
@ -920,15 +927,28 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
res.append(" (Total open file blocks (not validated): ").append( res.append(" (Total open file blocks (not validated): ").append(
totalOpenFilesBlocks).append(")"); totalOpenFilesBlocks).append(")");
} }
if (corruptFiles > 0) { if (corruptFiles > 0 || numUnderMinReplicatedBlocks>0) {
res.append("\n ********************************").append( res.append("\n ********************************");
"\n CORRUPT FILES:\t").append(corruptFiles); if(numUnderMinReplicatedBlocks>0){
if (missingSize > 0) { res.append("\n UNDER MIN REPL'D BLOCKS:\t").append(numUnderMinReplicatedBlocks);
res.append("\n MISSING BLOCKS:\t").append(missingIds.size()).append( if(totalBlocks>0){
"\n MISSING SIZE:\t\t").append(missingSize).append(" B"); res.append(" (").append(
((float) (numUnderMinReplicatedBlocks * 100) / (float) totalBlocks))
.append(" %)");
}
res.append("\n ").append("DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY:\t")
.append(minReplication);
} }
if (corruptBlocks > 0) { if(corruptFiles>0) {
res.append("\n CORRUPT BLOCKS: \t").append(corruptBlocks); res.append(
"\n CORRUPT FILES:\t").append(corruptFiles);
if (missingSize > 0) {
res.append("\n MISSING BLOCKS:\t").append(missingIds.size()).append(
"\n MISSING SIZE:\t\t").append(missingSize).append(" B");
}
if (corruptBlocks > 0) {
res.append("\n CORRUPT BLOCKS: \t").append(corruptBlocks);
}
} }
res.append("\n ********************************"); res.append("\n ********************************");
} }

View File

@ -693,7 +693,86 @@ public class TestFsck {
if (cluster != null) {cluster.shutdown();} if (cluster != null) {cluster.shutdown();}
} }
} }
@Test
public void testUnderMinReplicatedBlock() throws Exception {
Configuration conf = new HdfsConfiguration();
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
// Set short retry timeouts so this test runs faster
conf.setInt(DFSConfigKeys.DFS_CLIENT_RETRY_WINDOW_BASE, 10);
// Set minReplication to 2
short minReplication=2;
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY,minReplication);
FileSystem fs = null;
DFSClient dfsClient = null;
LocatedBlocks blocks = null;
int replicaCount = 0;
Random random = new Random();
String outStr = null;
short factor = 1;
MiniDFSCluster cluster = null;
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
cluster.waitActive();
fs = cluster.getFileSystem();
Path file1 = new Path("/testUnderMinReplicatedBlock");
DFSTestUtil.createFile(fs, file1, 1024, minReplication, 0);
// Wait until file replication has completed
DFSTestUtil.waitReplication(fs, file1, minReplication);
ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, file1);
// Make sure filesystem is in healthy state
outStr = runFsck(conf, 0, true, "/");
System.out.println(outStr);
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
// corrupt the first replica
File blockFile = cluster.getBlockFile(0, block);
if (blockFile != null && blockFile.exists()) {
RandomAccessFile raFile = new RandomAccessFile(blockFile, "rw");
FileChannel channel = raFile.getChannel();
String badString = "BADBAD";
int rand = random.nextInt((int) channel.size()/2);
raFile.seek(rand);
raFile.write(badString.getBytes());
raFile.close();
}
dfsClient = new DFSClient(new InetSocketAddress("localhost",
cluster.getNameNodePort()), conf);
blocks = dfsClient.getNamenode().
getBlockLocations(file1.toString(), 0, Long.MAX_VALUE);
replicaCount = blocks.get(0).getLocations().length;
while (replicaCount != factor) {
try {
Thread.sleep(100);
// Read the file to trigger reportBadBlocks
try {
IOUtils.copyBytes(fs.open(file1), new IOUtils.NullOutputStream(), conf,
true);
} catch (IOException ie) {
// Ignore exception
}
System.out.println("sleep in try: replicaCount="+replicaCount+" factor="+factor);
} catch (InterruptedException ignore) {
}
blocks = dfsClient.getNamenode().
getBlockLocations(file1.toString(), 0, Long.MAX_VALUE);
replicaCount = blocks.get(0).getLocations().length;
}
// Check if fsck reports the same
outStr = runFsck(conf, 0, true, "/");
System.out.println(outStr);
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
assertTrue(outStr.contains("UNDER MIN REPL'D BLOCKS:\t1 (100.0 %)"));
assertTrue(outStr.contains("DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY:\t2"));
} finally {
if (cluster != null) {cluster.shutdown();}
}
}
/** Test if fsck can return -1 in case of failure /** Test if fsck can return -1 in case of failure
* *
* @throws Exception * @throws Exception