HDFS-8563. Erasure Coding: fsck handles file smaller than a full stripe. Contributed by Walter Su.
This commit is contained in:
parent
48f3830f21
commit
2470a7bf88
|
@ -335,3 +335,6 @@
|
||||||
|
|
||||||
HDFS-8719. Erasure Coding: client generates too many small packets when
|
HDFS-8719. Erasure Coding: client generates too many small packets when
|
||||||
writing parity data. (Li Bo via waltersu4549)
|
writing parity data. (Li Bo via waltersu4549)
|
||||||
|
|
||||||
|
HDFS-8563. Erasure Coding: fsck handles file smaller than a full stripe.
|
||||||
|
(Walter Su via jing9)
|
||||||
|
|
|
@ -65,6 +65,7 @@ import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
||||||
import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
|
import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoStriped;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicies;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicies;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementStatus;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementStatus;
|
||||||
|
@ -74,7 +75,6 @@ import org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas;
|
||||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
|
import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
|
||||||
import org.apache.hadoop.hdfs.util.LightWeightLinkedSet;
|
import org.apache.hadoop.hdfs.util.LightWeightLinkedSet;
|
||||||
import org.apache.hadoop.io.erasurecode.ECSchema;
|
|
||||||
import org.apache.hadoop.net.NetUtils;
|
import org.apache.hadoop.net.NetUtils;
|
||||||
import org.apache.hadoop.net.NetworkTopology;
|
import org.apache.hadoop.net.NetworkTopology;
|
||||||
import org.apache.hadoop.net.NodeBase;
|
import org.apache.hadoop.net.NodeBase;
|
||||||
|
@ -247,7 +247,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
//get blockInfo
|
//get blockInfo
|
||||||
Block block = new Block(Block.getBlockId(blockId));
|
Block block = new Block(Block.getBlockId(blockId));
|
||||||
//find which file this block belongs to
|
//find which file this block belongs to
|
||||||
BlockInfo blockInfo = namenode.getNamesystem().getStoredBlock(block);
|
BlockInfo blockInfo = bm.getStoredBlock(block);
|
||||||
if(blockInfo == null) {
|
if(blockInfo == null) {
|
||||||
out.println("Block "+ blockId +" " + NONEXISTENT_STATUS);
|
out.println("Block "+ blockId +" " + NONEXISTENT_STATUS);
|
||||||
LOG.warn("Block "+ blockId + " " + NONEXISTENT_STATUS);
|
LOG.warn("Block "+ blockId + " " + NONEXISTENT_STATUS);
|
||||||
|
@ -556,6 +556,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
|
|
||||||
final BlockInfo storedBlock = bm.getStoredBlock(
|
final BlockInfo storedBlock = bm.getStoredBlock(
|
||||||
block.getLocalBlock());
|
block.getLocalBlock());
|
||||||
|
final int minReplication = bm.getMinStorageNum(storedBlock);
|
||||||
// count decommissionedReplicas / decommissioningReplicas
|
// count decommissionedReplicas / decommissioningReplicas
|
||||||
NumberReplicas numberReplicas = bm.countNodes(storedBlock);
|
NumberReplicas numberReplicas = bm.countNodes(storedBlock);
|
||||||
int decommissionedReplicas = numberReplicas.decommissioned();
|
int decommissionedReplicas = numberReplicas.decommissioned();
|
||||||
|
@ -571,26 +572,17 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
|
|
||||||
// count expected replicas
|
// count expected replicas
|
||||||
short targetFileReplication;
|
short targetFileReplication;
|
||||||
if(file.getReplication() == 0) {
|
if (file.getECSchema() != null) {
|
||||||
final FSNamesystem fsn = namenode.getNamesystem();
|
assert storedBlock instanceof BlockInfoStriped;
|
||||||
final ECSchema ecSchema;
|
targetFileReplication = ((BlockInfoStriped) storedBlock)
|
||||||
fsn.readLock();
|
.getRealTotalBlockNum();
|
||||||
try {
|
|
||||||
INode inode = namenode.getNamesystem().getFSDirectory()
|
|
||||||
.getINode(path);
|
|
||||||
INodesInPath iip = INodesInPath.fromINode(inode);
|
|
||||||
ecSchema = FSDirErasureCodingOp.getErasureCodingSchema(fsn, iip);
|
|
||||||
} finally {
|
|
||||||
fsn.readUnlock();
|
|
||||||
}
|
|
||||||
targetFileReplication = (short) (ecSchema.getNumDataUnits() + ecSchema.getNumParityUnits());
|
|
||||||
} else {
|
} else {
|
||||||
targetFileReplication = file.getReplication();
|
targetFileReplication = file.getReplication();
|
||||||
}
|
}
|
||||||
res.numExpectedReplicas += targetFileReplication;
|
res.numExpectedReplicas += targetFileReplication;
|
||||||
|
|
||||||
// count under min repl'd blocks
|
// count under min repl'd blocks
|
||||||
if(totalReplicasPerBlock < res.minReplication){
|
if(totalReplicasPerBlock < minReplication){
|
||||||
res.numUnderMinReplicatedBlocks++;
|
res.numUnderMinReplicatedBlocks++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -611,7 +603,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
}
|
}
|
||||||
|
|
||||||
// count minimally replicated blocks
|
// count minimally replicated blocks
|
||||||
if (totalReplicasPerBlock >= res.minReplication)
|
if (totalReplicasPerBlock >= minReplication)
|
||||||
res.numMinReplicatedBlocks++;
|
res.numMinReplicatedBlocks++;
|
||||||
|
|
||||||
// count missing replicas / under replicated blocks
|
// count missing replicas / under replicated blocks
|
||||||
|
@ -1026,12 +1018,6 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
long totalOpenFilesSize = 0L;
|
long totalOpenFilesSize = 0L;
|
||||||
long totalReplicas = 0L;
|
long totalReplicas = 0L;
|
||||||
|
|
||||||
final int minReplication;
|
|
||||||
|
|
||||||
Result(int minReplication) {
|
|
||||||
this.minReplication = minReplication;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* DFS is considered healthy if there are no missing blocks.
|
* DFS is considered healthy if there are no missing blocks.
|
||||||
*/
|
*/
|
||||||
|
@ -1062,12 +1048,13 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
static class ReplicationResult extends Result {
|
static class ReplicationResult extends Result {
|
||||||
final short replication;
|
final short replication;
|
||||||
|
final short minReplication;
|
||||||
|
|
||||||
ReplicationResult(Configuration conf) {
|
ReplicationResult(Configuration conf) {
|
||||||
super(conf.getInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY,
|
|
||||||
DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_DEFAULT));
|
|
||||||
this.replication = (short)conf.getInt(DFSConfigKeys.DFS_REPLICATION_KEY,
|
this.replication = (short)conf.getInt(DFSConfigKeys.DFS_REPLICATION_KEY,
|
||||||
DFSConfigKeys.DFS_REPLICATION_DEFAULT);
|
DFSConfigKeys.DFS_REPLICATION_DEFAULT);
|
||||||
|
this.minReplication = (short)conf.getInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY,
|
||||||
|
DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_DEFAULT);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -1171,15 +1158,11 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
static class ErasureCodingResult extends Result {
|
static class ErasureCodingResult extends Result {
|
||||||
final String ecSchema;
|
final String defaultSchema;
|
||||||
|
|
||||||
ErasureCodingResult(Configuration conf) {
|
ErasureCodingResult(Configuration conf) {
|
||||||
this(ErasureCodingSchemaManager.getSystemDefaultSchema());
|
defaultSchema = ErasureCodingSchemaManager.getSystemDefaultSchema()
|
||||||
}
|
.getSchemaName();
|
||||||
|
|
||||||
ErasureCodingResult(ECSchema ecSchema) {
|
|
||||||
super(ecSchema.getNumDataUnits());
|
|
||||||
this.ecSchema = ecSchema.getSchemaName();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -1213,8 +1196,6 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
((float) (numUnderMinReplicatedBlocks * 100) / (float) totalBlocks))
|
((float) (numUnderMinReplicatedBlocks * 100) / (float) totalBlocks))
|
||||||
.append(" %)");
|
.append(" %)");
|
||||||
}
|
}
|
||||||
res.append("\n ").append("MIN REQUIRED EC BLOCK:\t")
|
|
||||||
.append(minReplication);
|
|
||||||
}
|
}
|
||||||
if(corruptFiles>0) {
|
if(corruptFiles>0) {
|
||||||
res.append(
|
res.append(
|
||||||
|
@ -1251,18 +1232,18 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
((float) (numUnderReplicatedBlocks * 100) / (float) totalBlocks))
|
((float) (numUnderReplicatedBlocks * 100) / (float) totalBlocks))
|
||||||
.append(" %)");
|
.append(" %)");
|
||||||
}
|
}
|
||||||
res.append("\n Unsatisfactory placement block groups:\t\t")
|
res.append("\n Unsatisfactory placement block groups:\t")
|
||||||
.append(numMisReplicatedBlocks);
|
.append(numMisReplicatedBlocks);
|
||||||
if (totalBlocks > 0) {
|
if (totalBlocks > 0) {
|
||||||
res.append(" (").append(
|
res.append(" (").append(
|
||||||
((float) (numMisReplicatedBlocks * 100) / (float) totalBlocks))
|
((float) (numMisReplicatedBlocks * 100) / (float) totalBlocks))
|
||||||
.append(" %)");
|
.append(" %)");
|
||||||
}
|
}
|
||||||
res.append("\n Default schema:\t").append(ecSchema)
|
res.append("\n Default schema:\t\t").append(defaultSchema)
|
||||||
.append("\n Average block group size:\t").append(
|
.append("\n Average block group size:\t").append(
|
||||||
getReplicationFactor()).append("\n Missing block groups:\t\t").append(
|
getReplicationFactor()).append("\n Missing block groups:\t\t").append(
|
||||||
missingIds.size()).append("\n Corrupt block groups:\t\t").append(
|
missingIds.size()).append("\n Corrupt block groups:\t\t").append(
|
||||||
corruptBlocks).append("\n Missing ec-blocks:\t\t").append(
|
corruptBlocks).append("\n Missing internal blocks:\t").append(
|
||||||
missingReplicas);
|
missingReplicas);
|
||||||
if (totalReplicas > 0) {
|
if (totalReplicas > 0) {
|
||||||
res.append(" (").append(
|
res.append(" (").append(
|
||||||
|
@ -1270,11 +1251,11 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
|
||||||
" %)");
|
" %)");
|
||||||
}
|
}
|
||||||
if (decommissionedReplicas > 0) {
|
if (decommissionedReplicas > 0) {
|
||||||
res.append("\n Decommissioned ec-blocks:\t").append(
|
res.append("\n Decommissioned internal blocks:\t").append(
|
||||||
decommissionedReplicas);
|
decommissionedReplicas);
|
||||||
}
|
}
|
||||||
if (decommissioningReplicas > 0) {
|
if (decommissioningReplicas > 0) {
|
||||||
res.append("\n Decommissioning ec-blocks:\t").append(
|
res.append("\n Decommissioning internal blocks:\t").append(
|
||||||
decommissioningReplicas);
|
decommissioningReplicas);
|
||||||
}
|
}
|
||||||
return res.toString();
|
return res.toString();
|
||||||
|
|
|
@ -1648,23 +1648,31 @@ public class TestFsck {
|
||||||
+ ErasureCodingSchemaManager.getSystemDefaultSchema().getNumParityUnits();
|
+ ErasureCodingSchemaManager.getSystemDefaultSchema().getNumParityUnits();
|
||||||
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(totalSize).build();
|
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(totalSize).build();
|
||||||
fs = cluster.getFileSystem();
|
fs = cluster.getFileSystem();
|
||||||
|
|
||||||
|
// create a contiguous file
|
||||||
Path replDirPath = new Path("/replicated");
|
Path replDirPath = new Path("/replicated");
|
||||||
Path replFilePath = new Path(replDirPath, "replfile");
|
Path replFilePath = new Path(replDirPath, "replfile");
|
||||||
final short factor = 3;
|
final short factor = 3;
|
||||||
DFSTestUtil.createFile(fs, replFilePath, 1024, factor, 0);
|
DFSTestUtil.createFile(fs, replFilePath, 1024, factor, 0);
|
||||||
DFSTestUtil.waitReplication(fs, replFilePath, factor);
|
DFSTestUtil.waitReplication(fs, replFilePath, factor);
|
||||||
|
|
||||||
|
// create a large striped file
|
||||||
Path ecDirPath = new Path("/striped");
|
Path ecDirPath = new Path("/striped");
|
||||||
Path ecFilePath = new Path(ecDirPath, "ecfile");
|
Path largeFilePath = new Path(ecDirPath, "largeFile");
|
||||||
final int numBlocks = 4;
|
DFSTestUtil.createStripedFile(cluster, largeFilePath, ecDirPath, 1, 2, true);
|
||||||
DFSTestUtil.createStripedFile(cluster, ecFilePath, ecDirPath, numBlocks, 2, true);
|
|
||||||
|
// create a small striped file
|
||||||
|
Path smallFilePath = new Path(ecDirPath, "smallFile");
|
||||||
|
DFSTestUtil.writeFile(fs, smallFilePath, "hello world!");
|
||||||
|
|
||||||
long replTime = fs.getFileStatus(replFilePath).getAccessTime();
|
long replTime = fs.getFileStatus(replFilePath).getAccessTime();
|
||||||
long ecTime = fs.getFileStatus(ecFilePath).getAccessTime();
|
long ecTime = fs.getFileStatus(largeFilePath).getAccessTime();
|
||||||
Thread.sleep(precision);
|
Thread.sleep(precision);
|
||||||
setupAuditLogs();
|
setupAuditLogs();
|
||||||
String outStr = runFsck(conf, 0, true, "/");
|
String outStr = runFsck(conf, 0, true, "/");
|
||||||
verifyAuditLogs();
|
verifyAuditLogs();
|
||||||
assertEquals(replTime, fs.getFileStatus(replFilePath).getAccessTime());
|
assertEquals(replTime, fs.getFileStatus(replFilePath).getAccessTime());
|
||||||
assertEquals(ecTime, fs.getFileStatus(ecFilePath).getAccessTime());
|
assertEquals(ecTime, fs.getFileStatus(largeFilePath).getAccessTime());
|
||||||
System.out.println(outStr);
|
System.out.println(outStr);
|
||||||
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
|
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
|
||||||
if (fs != null) {try{fs.close();} catch(Exception e){}}
|
if (fs != null) {try{fs.close();} catch(Exception e){}}
|
||||||
|
|
Loading…
Reference in New Issue