HDFS-8623. Refactor NameNode handling of invalid, corrupt, and under-recovery blocks. Contributed by Zhe Zhang.

(cherry picked from commit de480d6c89)
This commit is contained in:
Jing Zhao 2015-06-26 10:49:01 -07:00
parent 8552af91f4
commit 83d76151e2
13 changed files with 361 additions and 302 deletions

View File

@ -343,6 +343,9 @@ Release 2.8.0 - UNRELEASED
HDFS-8651. Make hadoop-hdfs-project Native code -Wall-clean (Alan Burlison HDFS-8651. Make hadoop-hdfs-project Native code -Wall-clean (Alan Burlison
via Colin P. McCabe) via Colin P. McCabe)
HDFS-8623. Refactor NameNode handling of invalid, corrupt, and under-recovery
blocks. (Zhe Zhang via jing9)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than

View File

@ -175,19 +175,23 @@ public abstract class BlockInfo extends Block
public abstract int numNodes(); public abstract int numNodes();
/** /**
* Add a {@link DatanodeStorageInfo} location for a block. * Add a {@link DatanodeStorageInfo} location for a block
* @param storage The storage to add
* @param reportedBlock The block reported from the datanode. This is only
* used by erasure coded blocks, this block's id contains
* information indicating the index of the block in the
* corresponding block group.
*/ */
abstract boolean addStorage(DatanodeStorageInfo storage); abstract boolean addStorage(DatanodeStorageInfo storage, Block reportedBlock);
/** /**
* Remove {@link DatanodeStorageInfo} location for a block * Remove {@link DatanodeStorageInfo} location for a block
*/ */
abstract boolean removeStorage(DatanodeStorageInfo storage); abstract boolean removeStorage(DatanodeStorageInfo storage);
/** /**
* Replace the current BlockInfo with the new one in corresponding * Replace the current BlockInfo with the new one in corresponding
* DatanodeStorageInfo's linked list * DatanodeStorageInfo's linked list.
*/ */
abstract void replaceBlock(BlockInfo newBlock); abstract void replaceBlock(BlockInfo newBlock);

View File

@ -45,7 +45,7 @@ public class BlockInfoContiguous extends BlockInfo {
} }
@Override @Override
boolean addStorage(DatanodeStorageInfo storage) { boolean addStorage(DatanodeStorageInfo storage, Block reportedBlock) {
return ContiguousBlockStorageOp.addStorage(this, storage); return ContiguousBlockStorageOp.addStorage(this, storage);
} }

View File

@ -69,7 +69,7 @@ public class BlockInfoUnderConstructionContiguous extends
} }
@Override @Override
boolean addStorage(DatanodeStorageInfo storage) { boolean addStorage(DatanodeStorageInfo storage, Block reportedBlock) {
return ContiguousBlockStorageOp.addStorage(this, storage); return ContiguousBlockStorageOp.addStorage(this, storage);
} }

View File

@ -24,6 +24,7 @@ import java.util.List;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.fs.StorageType;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage.State; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage.State;
@ -226,7 +227,7 @@ public class DatanodeStorageInfo {
return blockPoolUsed; return blockPoolUsed;
} }
public AddBlockResult addBlock(BlockInfo b) { public AddBlockResult addBlock(BlockInfo b, Block reportedBlock) {
// First check whether the block belongs to a different storage // First check whether the block belongs to a different storage
// on the same DN. // on the same DN.
AddBlockResult result = AddBlockResult.ADDED; AddBlockResult result = AddBlockResult.ADDED;
@ -245,10 +246,18 @@ public class DatanodeStorageInfo {
} }
// add to the head of the data-node list // add to the head of the data-node list
b.addStorage(this); b.addStorage(this, reportedBlock);
insertToList(b);
return result;
}
AddBlockResult addBlock(BlockInfo b) {
return addBlock(b, b);
}
public void insertToList(BlockInfo b) {
blockList = b.listInsert(blockList, this); blockList = b.listInsert(blockList, this);
numBlocks++; numBlocks++;
return result;
} }
public boolean removeBlock(BlockInfo b) { public boolean removeBlock(BlockInfo b) {

View File

@ -147,7 +147,6 @@ import org.apache.hadoop.fs.CacheFlag;
import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.CreateFlag;
import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileEncryptionInfo;
import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FsServerDefaults; import org.apache.hadoop.fs.FsServerDefaults;
@ -3133,7 +3132,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
if (trackBlockCounts) { if (trackBlockCounts) {
if (b.isComplete()) { if (b.isComplete()) {
numRemovedComplete++; numRemovedComplete++;
if (blockManager.checkMinReplication(b)) { if (blockManager.hasMinStorage(b)) {
numRemovedSafe++; numRemovedSafe++;
} }
} }
@ -3365,7 +3364,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
curBlock = blocks[nrCompleteBlocks]; curBlock = blocks[nrCompleteBlocks];
if(!curBlock.isComplete()) if(!curBlock.isComplete())
break; break;
assert blockManager.checkMinReplication(curBlock) : assert blockManager.hasMinStorage(curBlock) :
"A COMPLETE block is not minimally replicated in " + src; "A COMPLETE block is not minimally replicated in " + src;
} }
@ -3401,7 +3400,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
// If penultimate block doesn't exist then its minReplication is met // If penultimate block doesn't exist then its minReplication is met
boolean penultimateBlockMinReplication = penultimateBlock == null ? true : boolean penultimateBlockMinReplication = penultimateBlock == null ? true :
blockManager.checkMinReplication(penultimateBlock); blockManager.hasMinStorage(penultimateBlock);
switch(lastBlockState) { switch(lastBlockState) {
case COMPLETE: case COMPLETE:
@ -3410,7 +3409,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
case COMMITTED: case COMMITTED:
// Close file if committed blocks are minimally replicated // Close file if committed blocks are minimally replicated
if(penultimateBlockMinReplication && if(penultimateBlockMinReplication &&
blockManager.checkMinReplication(lastBlock)) { blockManager.hasMinStorage(lastBlock)) {
finalizeINodeFileUnderConstruction(src, pendingFile, finalizeINodeFileUnderConstruction(src, pendingFile,
iip.getLatestSnapshotId()); iip.getLatestSnapshotId());
NameNode.stateChangeLog.warn("BLOCK*" NameNode.stateChangeLog.warn("BLOCK*"
@ -3702,9 +3701,9 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
trimmedTargets.get(i).getStorageInfo(trimmedStorages.get(i)); trimmedTargets.get(i).getStorageInfo(trimmedStorages.get(i));
if (storageInfo != null) { if (storageInfo != null) {
if(copyTruncate) { if(copyTruncate) {
storageInfo.addBlock(truncatedBlock); storageInfo.addBlock(truncatedBlock, truncatedBlock);
} else { } else {
storageInfo.addBlock(storedBlock); storageInfo.addBlock(storedBlock, storedBlock);
} }
} }
} }
@ -3720,8 +3719,9 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
} else { } else {
iFile.setLastBlock(storedBlock, trimmedStorageInfos); iFile.setLastBlock(storedBlock, trimmedStorageInfos);
if (closeFile) { if (closeFile) {
blockManager.markBlockReplicasAsCorrupt(storedBlock, blockManager.markBlockReplicasAsCorrupt(oldBlock.getLocalBlock(),
oldGenerationStamp, oldNumBytes, trimmedStorageInfos); storedBlock, oldGenerationStamp, oldNumBytes,
trimmedStorageInfos);
} }
} }
} }

View File

@ -636,7 +636,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
.getStorageType())); .getStorageType()));
} }
if (showReplicaDetails) { if (showReplicaDetails) {
LightWeightLinkedSet<Block> blocksExcess = LightWeightLinkedSet<BlockInfo> blocksExcess =
bm.excessReplicateMap.get(dnDesc.getDatanodeUuid()); bm.excessReplicateMap.get(dnDesc.getDatanodeUuid());
Collection<DatanodeDescriptor> corruptReplicas = Collection<DatanodeDescriptor> corruptReplicas =
bm.getCorruptReplicas(block.getLocalBlock()); bm.getCorruptReplicas(block.getLocalBlock());

View File

@ -63,7 +63,7 @@ public class TestBlockInfo {
final DatanodeStorageInfo storage = DFSTestUtil.createDatanodeStorageInfo("storageID", "127.0.0.1"); final DatanodeStorageInfo storage = DFSTestUtil.createDatanodeStorageInfo("storageID", "127.0.0.1");
boolean added = blockInfo.addStorage(storage); boolean added = blockInfo.addStorage(storage, blockInfo);
Assert.assertTrue(added); Assert.assertTrue(added);
Assert.assertEquals(storage, blockInfo.getStorageInfo(0)); Assert.assertEquals(storage, blockInfo.getStorageInfo(0));

View File

@ -383,7 +383,7 @@ public class TestBlockManager {
for (int i = 1; i < pipeline.length; i++) { for (int i = 1; i < pipeline.length; i++) {
DatanodeStorageInfo storage = pipeline[i]; DatanodeStorageInfo storage = pipeline[i];
bm.addBlock(storage, blockInfo, null); bm.addBlock(storage, blockInfo, null);
blockInfo.addStorage(storage); blockInfo.addStorage(storage, blockInfo);
} }
} }
@ -393,7 +393,7 @@ public class TestBlockManager {
for (DatanodeDescriptor dn : nodes) { for (DatanodeDescriptor dn : nodes) {
for (DatanodeStorageInfo storage : dn.getStorageInfos()) { for (DatanodeStorageInfo storage : dn.getStorageInfos()) {
blockInfo.addStorage(storage); blockInfo.addStorage(storage, blockInfo);
} }
} }
return blockInfo; return blockInfo;

View File

@ -100,7 +100,7 @@ public class TestNodeCount {
DatanodeDescriptor nonExcessDN = null; DatanodeDescriptor nonExcessDN = null;
for(DatanodeStorageInfo storage : bm.blocksMap.getStorages(block.getLocalBlock())) { for(DatanodeStorageInfo storage : bm.blocksMap.getStorages(block.getLocalBlock())) {
final DatanodeDescriptor dn = storage.getDatanodeDescriptor(); final DatanodeDescriptor dn = storage.getDatanodeDescriptor();
Collection<Block> blocks = bm.excessReplicateMap.get(dn.getDatanodeUuid()); Collection<BlockInfo> blocks = bm.excessReplicateMap.get(dn.getDatanodeUuid());
if (blocks == null || !blocks.contains(block.getLocalBlock()) ) { if (blocks == null || !blocks.contains(block.getLocalBlock()) ) {
nonExcessDN = dn; nonExcessDN = dn;
break; break;

View File

@ -34,7 +34,6 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties; import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.DataNode;
@ -42,7 +41,6 @@ import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.util.Time;
import org.junit.Test; import org.junit.Test;
public class TestOverReplicatedBlocks { public class TestOverReplicatedBlocks {
@ -185,7 +183,7 @@ public class TestOverReplicatedBlocks {
// All replicas for deletion should be scheduled on lastDN. // All replicas for deletion should be scheduled on lastDN.
// And should not actually be deleted, because lastDN does not heartbeat. // And should not actually be deleted, because lastDN does not heartbeat.
namesystem.readLock(); namesystem.readLock();
Collection<Block> dnBlocks = Collection<BlockInfo> dnBlocks =
namesystem.getBlockManager().excessReplicateMap.get(lastDNid); namesystem.getBlockManager().excessReplicateMap.get(lastDNid);
assertEquals("Replicas on node " + lastDNid + " should have been deleted", assertEquals("Replicas on node " + lastDNid + " should have been deleted",
SMALL_FILE_LENGTH / SMALL_BLOCK_SIZE, dnBlocks.size()); SMALL_FILE_LENGTH / SMALL_BLOCK_SIZE, dnBlocks.size());

View File

@ -1250,7 +1250,7 @@ public class TestReplicationPolicy {
when(storage.removeBlock(any(BlockInfo.class))).thenReturn(true); when(storage.removeBlock(any(BlockInfo.class))).thenReturn(true);
when(storage.addBlock(any(BlockInfo.class))).thenReturn when(storage.addBlock(any(BlockInfo.class))).thenReturn
(DatanodeStorageInfo.AddBlockResult.ADDED); (DatanodeStorageInfo.AddBlockResult.ADDED);
ucBlock.addStorage(storage); ucBlock.addStorage(storage, ucBlock);
when(mbc.setLastBlock((BlockInfo) any(), (DatanodeStorageInfo[]) any())) when(mbc.setLastBlock((BlockInfo) any(), (DatanodeStorageInfo[]) any()))
.thenReturn(ucBlock); .thenReturn(ucBlock);