HDFS-7537. Add "UNDER MIN REPL'D BLOCKS" count to fsck. Contributed by GAO Rui
This commit is contained in:
parent
12fe3afcd3
commit
22b13a0ddb
|
@ -375,6 +375,9 @@ Release 2.7.0 - UNRELEASED
|
||||||
HDFS-7495. Remove updatePosition argument from DFSInputStream#getBlockAt()
|
HDFS-7495. Remove updatePosition argument from DFSInputStream#getBlockAt()
|
||||||
(cmccabe)
|
(cmccabe)
|
||||||
|
|
||||||
|
HDFS-7537. Add "UNDER MIN REPL'D BLOCKS" count to fsck. (GAO Rui via
|
||||||
|
szetszwo)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
HDFS-7454. Reduce memory footprint for AclEntries in NameNode.
|
HDFS-7454. Reduce memory footprint for AclEntries in NameNode.
|
||||||
|
|
|
@ -507,6 +507,9 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
res.totalReplicas += liveReplicas;
|
res.totalReplicas += liveReplicas;
|
||||||
short targetFileReplication = file.getReplication();
|
short targetFileReplication = file.getReplication();
|
||||||
res.numExpectedReplicas += targetFileReplication;
|
res.numExpectedReplicas += targetFileReplication;
|
||||||
|
if(liveReplicas<minReplication){
|
||||||
|
res.numUnderMinReplicatedBlocks++;
|
||||||
|
}
|
||||||
if (liveReplicas > targetFileReplication) {
|
if (liveReplicas > targetFileReplication) {
|
||||||
res.excessiveReplicas += (liveReplicas - targetFileReplication);
|
res.excessiveReplicas += (liveReplicas - targetFileReplication);
|
||||||
res.numOverReplicatedBlocks += 1;
|
res.numOverReplicatedBlocks += 1;
|
||||||
|
@ -853,6 +856,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
long corruptBlocks = 0L;
|
long corruptBlocks = 0L;
|
||||||
long excessiveReplicas = 0L;
|
long excessiveReplicas = 0L;
|
||||||
long missingReplicas = 0L;
|
long missingReplicas = 0L;
|
||||||
|
long numUnderMinReplicatedBlocks=0L;
|
||||||
long numOverReplicatedBlocks = 0L;
|
long numOverReplicatedBlocks = 0L;
|
||||||
long numUnderReplicatedBlocks = 0L;
|
long numUnderReplicatedBlocks = 0L;
|
||||||
long numMisReplicatedBlocks = 0L; // blocks that do not satisfy block placement policy
|
long numMisReplicatedBlocks = 0L; // blocks that do not satisfy block placement policy
|
||||||
|
@ -869,10 +873,13 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
long totalReplicas = 0L;
|
long totalReplicas = 0L;
|
||||||
|
|
||||||
final short replication;
|
final short replication;
|
||||||
|
final int minReplication;
|
||||||
|
|
||||||
Result(Configuration conf) {
|
Result(Configuration conf) {
|
||||||
this.replication = (short)conf.getInt(DFSConfigKeys.DFS_REPLICATION_KEY,
|
this.replication = (short)conf.getInt(DFSConfigKeys.DFS_REPLICATION_KEY,
|
||||||
DFSConfigKeys.DFS_REPLICATION_DEFAULT);
|
DFSConfigKeys.DFS_REPLICATION_DEFAULT);
|
||||||
|
this.minReplication = (short)conf.getInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY,
|
||||||
|
DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_DEFAULT);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -920,15 +927,28 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
res.append(" (Total open file blocks (not validated): ").append(
|
res.append(" (Total open file blocks (not validated): ").append(
|
||||||
totalOpenFilesBlocks).append(")");
|
totalOpenFilesBlocks).append(")");
|
||||||
}
|
}
|
||||||
if (corruptFiles > 0) {
|
if (corruptFiles > 0 || numUnderMinReplicatedBlocks>0) {
|
||||||
res.append("\n ********************************").append(
|
res.append("\n ********************************");
|
||||||
"\n CORRUPT FILES:\t").append(corruptFiles);
|
if(numUnderMinReplicatedBlocks>0){
|
||||||
if (missingSize > 0) {
|
res.append("\n UNDER MIN REPL'D BLOCKS:\t").append(numUnderMinReplicatedBlocks);
|
||||||
res.append("\n MISSING BLOCKS:\t").append(missingIds.size()).append(
|
if(totalBlocks>0){
|
||||||
"\n MISSING SIZE:\t\t").append(missingSize).append(" B");
|
res.append(" (").append(
|
||||||
|
((float) (numUnderMinReplicatedBlocks * 100) / (float) totalBlocks))
|
||||||
|
.append(" %)");
|
||||||
|
}
|
||||||
|
res.append("\n ").append("DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY:\t")
|
||||||
|
.append(minReplication);
|
||||||
}
|
}
|
||||||
if (corruptBlocks > 0) {
|
if(corruptFiles>0) {
|
||||||
res.append("\n CORRUPT BLOCKS: \t").append(corruptBlocks);
|
res.append(
|
||||||
|
"\n CORRUPT FILES:\t").append(corruptFiles);
|
||||||
|
if (missingSize > 0) {
|
||||||
|
res.append("\n MISSING BLOCKS:\t").append(missingIds.size()).append(
|
||||||
|
"\n MISSING SIZE:\t\t").append(missingSize).append(" B");
|
||||||
|
}
|
||||||
|
if (corruptBlocks > 0) {
|
||||||
|
res.append("\n CORRUPT BLOCKS: \t").append(corruptBlocks);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
res.append("\n ********************************");
|
res.append("\n ********************************");
|
||||||
}
|
}
|
||||||
|
|
|
@ -693,7 +693,86 @@ public class TestFsck {
|
||||||
if (cluster != null) {cluster.shutdown();}
|
if (cluster != null) {cluster.shutdown();}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUnderMinReplicatedBlock() throws Exception {
|
||||||
|
Configuration conf = new HdfsConfiguration();
|
||||||
|
conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
|
||||||
|
// Set short retry timeouts so this test runs faster
|
||||||
|
conf.setInt(DFSConfigKeys.DFS_CLIENT_RETRY_WINDOW_BASE, 10);
|
||||||
|
// Set minReplication to 2
|
||||||
|
short minReplication=2;
|
||||||
|
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY,minReplication);
|
||||||
|
FileSystem fs = null;
|
||||||
|
DFSClient dfsClient = null;
|
||||||
|
LocatedBlocks blocks = null;
|
||||||
|
int replicaCount = 0;
|
||||||
|
Random random = new Random();
|
||||||
|
String outStr = null;
|
||||||
|
short factor = 1;
|
||||||
|
MiniDFSCluster cluster = null;
|
||||||
|
try {
|
||||||
|
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
|
||||||
|
cluster.waitActive();
|
||||||
|
fs = cluster.getFileSystem();
|
||||||
|
Path file1 = new Path("/testUnderMinReplicatedBlock");
|
||||||
|
DFSTestUtil.createFile(fs, file1, 1024, minReplication, 0);
|
||||||
|
// Wait until file replication has completed
|
||||||
|
DFSTestUtil.waitReplication(fs, file1, minReplication);
|
||||||
|
ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, file1);
|
||||||
|
|
||||||
|
// Make sure filesystem is in healthy state
|
||||||
|
outStr = runFsck(conf, 0, true, "/");
|
||||||
|
System.out.println(outStr);
|
||||||
|
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
|
||||||
|
|
||||||
|
// corrupt the first replica
|
||||||
|
File blockFile = cluster.getBlockFile(0, block);
|
||||||
|
if (blockFile != null && blockFile.exists()) {
|
||||||
|
RandomAccessFile raFile = new RandomAccessFile(blockFile, "rw");
|
||||||
|
FileChannel channel = raFile.getChannel();
|
||||||
|
String badString = "BADBAD";
|
||||||
|
int rand = random.nextInt((int) channel.size()/2);
|
||||||
|
raFile.seek(rand);
|
||||||
|
raFile.write(badString.getBytes());
|
||||||
|
raFile.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
dfsClient = new DFSClient(new InetSocketAddress("localhost",
|
||||||
|
cluster.getNameNodePort()), conf);
|
||||||
|
blocks = dfsClient.getNamenode().
|
||||||
|
getBlockLocations(file1.toString(), 0, Long.MAX_VALUE);
|
||||||
|
replicaCount = blocks.get(0).getLocations().length;
|
||||||
|
while (replicaCount != factor) {
|
||||||
|
try {
|
||||||
|
Thread.sleep(100);
|
||||||
|
// Read the file to trigger reportBadBlocks
|
||||||
|
try {
|
||||||
|
IOUtils.copyBytes(fs.open(file1), new IOUtils.NullOutputStream(), conf,
|
||||||
|
true);
|
||||||
|
} catch (IOException ie) {
|
||||||
|
// Ignore exception
|
||||||
|
}
|
||||||
|
System.out.println("sleep in try: replicaCount="+replicaCount+" factor="+factor);
|
||||||
|
} catch (InterruptedException ignore) {
|
||||||
|
}
|
||||||
|
blocks = dfsClient.getNamenode().
|
||||||
|
getBlockLocations(file1.toString(), 0, Long.MAX_VALUE);
|
||||||
|
replicaCount = blocks.get(0).getLocations().length;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if fsck reports the same
|
||||||
|
outStr = runFsck(conf, 0, true, "/");
|
||||||
|
System.out.println(outStr);
|
||||||
|
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
|
||||||
|
assertTrue(outStr.contains("UNDER MIN REPL'D BLOCKS:\t1 (100.0 %)"));
|
||||||
|
assertTrue(outStr.contains("DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY:\t2"));
|
||||||
|
} finally {
|
||||||
|
if (cluster != null) {cluster.shutdown();}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Test if fsck can return -1 in case of failure
|
/** Test if fsck can return -1 in case of failure
|
||||||
*
|
*
|
||||||
* @throws Exception
|
* @throws Exception
|
||||||
|
|
Loading…
Reference in New Issue