From 74bd9666d1860067940a7c8b8ff673acc01e78e1 Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Thu, 7 May 2015 11:36:35 -0700 Subject: [PATCH] HDFS-7980. Incremental BlockReport will dramatically slow down namenode startup. Contributed by Walter Su --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../server/blockmanagement/BlockManager.java | 8 +- .../blockmanagement/TestBlockManager.java | 108 ++++++++++++++++++ 3 files changed, 115 insertions(+), 4 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index eb027595b4a..eaade89bf5e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -65,6 +65,9 @@ Release 2.7.1 - UNRELEASED HDFS-8305: HDFS INotify: the destination field of RenameOp should always end with the file name (cmccabe) + HDFS-7980. Incremental BlockReport will dramatically slow down namenode + startup. (Walter Su via szetszwo) + Release 2.7.0 - 2015-04-20 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index e0f87c7abf4..d1c8d768ae8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -1805,7 +1805,7 @@ public class BlockManager { return !node.hasStaleStorages(); } - if (storageInfo.numBlocks() == 0) { + if (storageInfo.getBlockReportCount() == 0) { // The first block report can be processed a lot more efficiently than // ordinary block reports. This shortens restart times. processFirstBlockReport(storageInfo, newReport); @@ -2059,7 +2059,7 @@ public class BlockManager { final BlockListAsLongs report) throws IOException { if (report == null) return; assert (namesystem.hasWriteLock()); - assert (storageInfo.numBlocks() == 0); + assert (storageInfo.getBlockReportCount() == 0); for (BlockReportReplica iblk : report) { ReplicaState reportedState = iblk.getState(); @@ -2471,14 +2471,14 @@ public class BlockManager { } // just add it - storageInfo.addBlock(storedBlock); + AddBlockResult result = storageInfo.addBlock(storedBlock); // Now check for completion of blocks and safe block count int numCurrentReplica = countLiveNodes(storedBlock); if (storedBlock.getBlockUCState() == BlockUCState.COMMITTED && numCurrentReplica >= minReplication) { completeBlock(storedBlock.getBlockCollection(), storedBlock, false); - } else if (storedBlock.isComplete()) { + } else if (storedBlock.isComplete() && result == AddBlockResult.ADDED) { // check whether safe replication is reached for the block // only complete blocks are counted towards that. // In the case that the block just became complete above, completeBlock() diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java index 91abb2a5f5c..fba840ed8ed 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java @@ -47,10 +47,14 @@ import org.apache.hadoop.hdfs.protocol.BlockListAsLongs; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.BlockTargetPair; import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils; +import org.apache.hadoop.hdfs.server.datanode.FinalizedReplica; +import org.apache.hadoop.hdfs.server.datanode.ReplicaBeingWritten; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; +import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo; +import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.net.NetworkTopology; import org.junit.Assert; @@ -645,6 +649,110 @@ public class TestBlockManager { BlockListAsLongs.EMPTY, null, false); assertEquals(1, ds.getBlockReportCount()); } + + /** + * test when NN starts and in same mode, it receives an incremental blockReport + * firstly. Then receives first full block report. + */ + @Test + public void testSafeModeIBRBeforeFirstFullBR() throws Exception { + // pretend to be in safemode + doReturn(true).when(fsn).isInStartupSafeMode(); + + DatanodeDescriptor node = nodes.get(0); + DatanodeStorageInfo ds = node.getStorageInfos()[0]; + node.isAlive = true; + DatanodeRegistration nodeReg = new DatanodeRegistration(node, null, null, ""); + + // register new node + bm.getDatanodeManager().registerDatanode(nodeReg); + bm.getDatanodeManager().addDatanode(node); + assertEquals(node, bm.getDatanodeManager().getDatanode(node)); + assertEquals(0, ds.getBlockReportCount()); + // Build a incremental report + List rdbiList = new ArrayList<>(); + // Build a full report + BlockListAsLongs.Builder builder = BlockListAsLongs.builder(); + + // blk_42 is finalized. + long receivedBlockId = 42; // arbitrary + BlockInfoContiguous receivedBlock = addBlockToBM(receivedBlockId); + rdbiList.add(new ReceivedDeletedBlockInfo(new Block(receivedBlock), + ReceivedDeletedBlockInfo.BlockStatus.RECEIVED_BLOCK, null)); + builder.add(new FinalizedReplica(receivedBlock, null, null)); + + // blk_43 is under construction. + long receivingBlockId = 43; + BlockInfoContiguous receivingBlock = addUcBlockToBM(receivingBlockId); + rdbiList.add(new ReceivedDeletedBlockInfo(new Block(receivingBlock), + ReceivedDeletedBlockInfo.BlockStatus.RECEIVING_BLOCK, null)); + builder.add(new ReplicaBeingWritten(receivingBlock, null, null, null)); + + // blk_44 has 2 records in IBR. It's finalized. So full BR has 1 record. + long receivingReceivedBlockId = 44; + BlockInfoContiguous receivingReceivedBlock = addBlockToBM(receivingReceivedBlockId); + rdbiList.add(new ReceivedDeletedBlockInfo(new Block(receivingReceivedBlock), + ReceivedDeletedBlockInfo.BlockStatus.RECEIVING_BLOCK, null)); + rdbiList.add(new ReceivedDeletedBlockInfo(new Block(receivingReceivedBlock), + ReceivedDeletedBlockInfo.BlockStatus.RECEIVED_BLOCK, null)); + builder.add(new FinalizedReplica(receivingReceivedBlock, null, null)); + + // blk_45 is not in full BR, because it's deleted. + long ReceivedDeletedBlockId = 45; + rdbiList.add(new ReceivedDeletedBlockInfo( + new Block(ReceivedDeletedBlockId), + ReceivedDeletedBlockInfo.BlockStatus.RECEIVED_BLOCK, null)); + rdbiList.add(new ReceivedDeletedBlockInfo( + new Block(ReceivedDeletedBlockId), + ReceivedDeletedBlockInfo.BlockStatus.DELETED_BLOCK, null)); + + // blk_46 exists in DN for a long time, so it's in full BR, but not in IBR. + long existedBlockId = 46; + BlockInfoContiguous existedBlock = addBlockToBM(existedBlockId); + builder.add(new FinalizedReplica(existedBlock, null, null)); + + // process IBR and full BR + StorageReceivedDeletedBlocks srdb = + new StorageReceivedDeletedBlocks(new DatanodeStorage(ds.getStorageID()), + rdbiList.toArray(new ReceivedDeletedBlockInfo[rdbiList.size()])); + bm.processIncrementalBlockReport(node, srdb); + // Make sure it's the first full report + assertEquals(0, ds.getBlockReportCount()); + bm.processReport(node, new DatanodeStorage(ds.getStorageID()), + builder.build(), null, false); + assertEquals(1, ds.getBlockReportCount()); + + // verify the storage info is correct + assertTrue(bm.getStoredBlock(new Block(receivedBlockId)).findStorageInfo + (ds) >= 0); + assertTrue(((BlockInfoContiguousUnderConstruction) bm. + getStoredBlock(new Block(receivingBlockId))).getNumExpectedLocations() > 0); + assertTrue(bm.getStoredBlock(new Block(receivingReceivedBlockId)) + .findStorageInfo(ds) >= 0); + assertNull(bm.getStoredBlock(new Block(ReceivedDeletedBlockId))); + assertTrue(bm.getStoredBlock(new Block(existedBlock)).findStorageInfo + (ds) >= 0); + } + + private BlockInfoContiguous addBlockToBM(long blkId) { + Block block = new Block(blkId); + BlockInfoContiguous blockInfo = + new BlockInfoContiguous(block, (short) 3); + BlockCollection bc = Mockito.mock(BlockCollection.class); + Mockito.doReturn((short) 3).when(bc).getBlockReplication(); + bm.blocksMap.addBlockCollection(blockInfo, bc); + return blockInfo; + } + + private BlockInfoContiguous addUcBlockToBM(long blkId) { + Block block = new Block(blkId); + BlockInfoContiguousUnderConstruction blockInfo = + new BlockInfoContiguousUnderConstruction(block, (short) 3); + BlockCollection bc = Mockito.mock(BlockCollection.class); + Mockito.doReturn((short) 3).when(bc).getBlockReplication(); + bm.blocksMap.addBlockCollection(blockInfo, bc); + return blockInfo; + } /** * Tests that a namenode doesn't choose a datanode with full disks to