HDFS-7281. Missing block is marked as corrupted block (Ming Ma via Yongjun Zhang)
This commit is contained in:
parent
1b3b9e5c31
commit
279958b772
|
@ -20,6 +20,9 @@ Trunk (Unreleased)
|
||||||
|
|
||||||
HDFS-7985. WebHDFS should be always enabled. (Li Lu via wheat9)
|
HDFS-7985. WebHDFS should be always enabled. (Li Lu via wheat9)
|
||||||
|
|
||||||
|
HDFS-7281. Missing block is marked as corrupted block (Ming Ma via
|
||||||
|
Yongjun Zhang)
|
||||||
|
|
||||||
NEW FEATURES
|
NEW FEATURES
|
||||||
|
|
||||||
HDFS-3125. Add JournalService to enable Journal Daemon. (suresh)
|
HDFS-3125. Add JournalService to enable Journal Daemon. (suresh)
|
||||||
|
|
|
@ -849,7 +849,8 @@ public class BlockManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
final int numNodes = blocksMap.numNodes(blk);
|
final int numNodes = blocksMap.numNodes(blk);
|
||||||
final boolean isCorrupt = numCorruptNodes == numNodes;
|
final boolean isCorrupt = numCorruptNodes != 0 &&
|
||||||
|
numCorruptNodes == numNodes;
|
||||||
final int numMachines = isCorrupt ? numNodes: numNodes - numCorruptNodes;
|
final int numMachines = isCorrupt ? numNodes: numNodes - numCorruptNodes;
|
||||||
final DatanodeStorageInfo[] machines = new DatanodeStorageInfo[numMachines];
|
final DatanodeStorageInfo[] machines = new DatanodeStorageInfo[numMachines];
|
||||||
int j = 0;
|
int j = 0;
|
||||||
|
|
|
@ -531,6 +531,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
int missing = 0;
|
int missing = 0;
|
||||||
int corrupt = 0;
|
int corrupt = 0;
|
||||||
long missize = 0;
|
long missize = 0;
|
||||||
|
long corruptSize = 0;
|
||||||
int underReplicatedPerFile = 0;
|
int underReplicatedPerFile = 0;
|
||||||
int misReplicatedPerFile = 0;
|
int misReplicatedPerFile = 0;
|
||||||
StringBuilder report = new StringBuilder();
|
StringBuilder report = new StringBuilder();
|
||||||
|
@ -570,10 +571,11 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
// count corrupt blocks
|
// count corrupt blocks
|
||||||
boolean isCorrupt = lBlk.isCorrupt();
|
boolean isCorrupt = lBlk.isCorrupt();
|
||||||
if (isCorrupt) {
|
if (isCorrupt) {
|
||||||
|
res.addCorrupt(block.getNumBytes());
|
||||||
corrupt++;
|
corrupt++;
|
||||||
res.corruptBlocks++;
|
corruptSize += block.getNumBytes();
|
||||||
out.print("\n" + path + ": CORRUPT blockpool " + block.getBlockPoolId() +
|
out.print("\n" + path + ": CORRUPT blockpool " +
|
||||||
" block " + block.getBlockName()+"\n");
|
block.getBlockPoolId() + " block " + block.getBlockName() + "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// count minimally replicated blocks
|
// count minimally replicated blocks
|
||||||
|
@ -619,7 +621,11 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
// report
|
// report
|
||||||
String blkName = block.toString();
|
String blkName = block.toString();
|
||||||
report.append(blockNumber + ". " + blkName + " len=" + block.getNumBytes());
|
report.append(blockNumber + ". " + blkName + " len=" + block.getNumBytes());
|
||||||
if (totalReplicasPerBlock == 0) {
|
if (totalReplicasPerBlock == 0 && !isCorrupt) {
|
||||||
|
// If the block is corrupted, it means all its available replicas are
|
||||||
|
// corrupted. We don't mark it as missing given these available replicas
|
||||||
|
// might still be accessible as the block might be incorrectly marked as
|
||||||
|
// corrupted by client machines.
|
||||||
report.append(" MISSING!");
|
report.append(" MISSING!");
|
||||||
res.addMissing(block.toString(), block.getNumBytes());
|
res.addMissing(block.toString(), block.getNumBytes());
|
||||||
missing++;
|
missing++;
|
||||||
|
@ -674,10 +680,16 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
|
|
||||||
// count corrupt file & move or delete if necessary
|
// count corrupt file & move or delete if necessary
|
||||||
if ((missing > 0) || (corrupt > 0)) {
|
if ((missing > 0) || (corrupt > 0)) {
|
||||||
if (!showFiles && (missing > 0)) {
|
if (!showFiles) {
|
||||||
|
if (missing > 0) {
|
||||||
out.print("\n" + path + ": MISSING " + missing
|
out.print("\n" + path + ": MISSING " + missing
|
||||||
+ " blocks of total size " + missize + " B.");
|
+ " blocks of total size " + missize + " B.");
|
||||||
}
|
}
|
||||||
|
if (corrupt > 0) {
|
||||||
|
out.print("\n" + path + ": CORRUPT " + corrupt
|
||||||
|
+ " blocks of total size " + corruptSize + " B.");
|
||||||
|
}
|
||||||
|
}
|
||||||
res.corruptFiles++;
|
res.corruptFiles++;
|
||||||
if (isOpen) {
|
if (isOpen) {
|
||||||
LOG.info("Fsck: ignoring open file " + path);
|
LOG.info("Fsck: ignoring open file " + path);
|
||||||
|
@ -688,8 +700,15 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (showFiles) {
|
if (showFiles) {
|
||||||
|
if (missing > 0 || corrupt > 0) {
|
||||||
if (missing > 0) {
|
if (missing > 0) {
|
||||||
out.print(" MISSING " + missing + " blocks of total size " + missize + " B\n");
|
out.print(" MISSING " + missing + " blocks of total size " +
|
||||||
|
missize + " B\n");
|
||||||
|
}
|
||||||
|
if (corrupt > 0) {
|
||||||
|
out.print(" CORRUPT " + corrupt + " blocks of total size " +
|
||||||
|
corruptSize + " B\n");
|
||||||
|
}
|
||||||
} else if (underReplicatedPerFile == 0 && misReplicatedPerFile == 0) {
|
} else if (underReplicatedPerFile == 0 && misReplicatedPerFile == 0) {
|
||||||
out.print(" OK\n");
|
out.print(" OK\n");
|
||||||
}
|
}
|
||||||
|
@ -956,6 +975,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
long missingSize = 0L;
|
long missingSize = 0L;
|
||||||
long corruptFiles = 0L;
|
long corruptFiles = 0L;
|
||||||
long corruptBlocks = 0L;
|
long corruptBlocks = 0L;
|
||||||
|
long corruptSize = 0L;
|
||||||
long excessiveReplicas = 0L;
|
long excessiveReplicas = 0L;
|
||||||
long missingReplicas = 0L;
|
long missingReplicas = 0L;
|
||||||
long decommissionedReplicas = 0L;
|
long decommissionedReplicas = 0L;
|
||||||
|
@ -999,6 +1019,12 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
missingSize += size;
|
missingSize += size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Add a corrupt block. */
|
||||||
|
void addCorrupt(long size) {
|
||||||
|
corruptBlocks++;
|
||||||
|
corruptSize += size;
|
||||||
|
}
|
||||||
|
|
||||||
/** Return the actual replication factor. */
|
/** Return the actual replication factor. */
|
||||||
float getReplicationFactor() {
|
float getReplicationFactor() {
|
||||||
if (totalBlocks == 0)
|
if (totalBlocks == 0)
|
||||||
|
@ -1051,7 +1077,8 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
"\n MISSING SIZE:\t\t").append(missingSize).append(" B");
|
"\n MISSING SIZE:\t\t").append(missingSize).append(" B");
|
||||||
}
|
}
|
||||||
if (corruptBlocks > 0) {
|
if (corruptBlocks > 0) {
|
||||||
res.append("\n CORRUPT BLOCKS: \t").append(corruptBlocks);
|
res.append("\n CORRUPT BLOCKS: \t").append(corruptBlocks).append(
|
||||||
|
"\n CORRUPT SIZE:\t\t").append(corruptSize).append(" B");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
res.append("\n ********************************");
|
res.append("\n ********************************");
|
||||||
|
@ -1086,7 +1113,8 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
}
|
}
|
||||||
res.append("\n Default replication factor:\t").append(replication)
|
res.append("\n Default replication factor:\t").append(replication)
|
||||||
.append("\n Average block replication:\t").append(
|
.append("\n Average block replication:\t").append(
|
||||||
getReplicationFactor()).append("\n Corrupt blocks:\t\t").append(
|
getReplicationFactor()).append("\n Missing blocks:\t\t").append(
|
||||||
|
missingIds.size()).append("\n Corrupt blocks:\t\t").append(
|
||||||
corruptBlocks).append("\n Missing replicas:\t\t").append(
|
corruptBlocks).append("\n Missing replicas:\t\t").append(
|
||||||
missingReplicas);
|
missingReplicas);
|
||||||
if (totalReplicas > 0) {
|
if (totalReplicas > 0) {
|
||||||
|
|
|
@ -121,6 +121,9 @@ public class TestFsck {
|
||||||
"cmd=getfileinfo\\ssrc=\\/\\sdst=null\\s" +
|
"cmd=getfileinfo\\ssrc=\\/\\sdst=null\\s" +
|
||||||
"perm=null\\s" + "proto=.*");
|
"perm=null\\s" + "proto=.*");
|
||||||
|
|
||||||
|
static final Pattern numMissingBlocksPattern = Pattern.compile(
|
||||||
|
".*Missing blocks:\t\t([0123456789]*).*");
|
||||||
|
|
||||||
static final Pattern numCorruptBlocksPattern = Pattern.compile(
|
static final Pattern numCorruptBlocksPattern = Pattern.compile(
|
||||||
".*Corrupt blocks:\t\t([0123456789]*).*");
|
".*Corrupt blocks:\t\t([0123456789]*).*");
|
||||||
|
|
||||||
|
@ -360,19 +363,27 @@ public class TestFsck {
|
||||||
// Wait for fsck to discover all the missing blocks
|
// Wait for fsck to discover all the missing blocks
|
||||||
while (true) {
|
while (true) {
|
||||||
outStr = runFsck(conf, 1, false, "/");
|
outStr = runFsck(conf, 1, false, "/");
|
||||||
|
String numMissing = null;
|
||||||
String numCorrupt = null;
|
String numCorrupt = null;
|
||||||
for (String line : outStr.split(LINE_SEPARATOR)) {
|
for (String line : outStr.split(LINE_SEPARATOR)) {
|
||||||
Matcher m = numCorruptBlocksPattern.matcher(line);
|
Matcher m = numMissingBlocksPattern.matcher(line);
|
||||||
|
if (m.matches()) {
|
||||||
|
numMissing = m.group(1);
|
||||||
|
}
|
||||||
|
m = numCorruptBlocksPattern.matcher(line);
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
numCorrupt = m.group(1);
|
numCorrupt = m.group(1);
|
||||||
|
}
|
||||||
|
if (numMissing != null && numCorrupt != null) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (numCorrupt == null) {
|
if (numMissing == null || numCorrupt == null) {
|
||||||
throw new IOException("failed to find number of corrupt " +
|
throw new IOException("failed to find number of missing or corrupt" +
|
||||||
"blocks in fsck output.");
|
" blocks in fsck output.");
|
||||||
}
|
}
|
||||||
if (numCorrupt.equals(Integer.toString(totalMissingBlocks))) {
|
if (numMissing.equals(Integer.toString(totalMissingBlocks))) {
|
||||||
|
assertTrue(numCorrupt.equals(Integer.toString(0)));
|
||||||
assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
|
assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue