From 8c7f2ddc10f3f222a520d9037e7beb0cbcca8f47 Mon Sep 17 00:00:00 2001 From: Kidd5368 <57645247+Kidd53685368@users.noreply.github.com> Date: Wed, 30 Nov 2022 09:43:15 +0800 Subject: [PATCH] HDFS-16839 It should consider EC reconstruction work when we determine if a node is busy (#5128) Co-authored-by: Takanobu Asanuma Reviewed-by: Tao Li (cherry picked from commit 72749a4ff8a240867a822bb9ae1382d3235083ea) --- .../server/blockmanagement/BlockManager.java | 6 ++- .../blockmanagement/TestBlockManager.java | 52 +++++++++++++++++++ 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 4d07910a68c..c340e9a00d7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -2497,7 +2497,8 @@ DatanodeDescriptor[] chooseSourceDatanodes(BlockInfo block, if (priority != LowRedundancyBlocks.QUEUE_HIGHEST_PRIORITY && (!node.isDecommissionInProgress() && !node.isEnteringMaintenance()) - && node.getNumberOfBlocksToBeReplicated() >= maxReplicationStreams) { + && node.getNumberOfBlocksToBeReplicated() + + node.getNumberOfBlocksToBeErasureCoded() >= maxReplicationStreams) { if (isStriped && (state == StoredReplicaState.LIVE || state == StoredReplicaState.DECOMMISSIONING)) { liveBusyBlockIndices.add(blockIndex); @@ -2507,7 +2508,8 @@ DatanodeDescriptor[] chooseSourceDatanodes(BlockInfo block, continue; // already reached replication limit } - if (node.getNumberOfBlocksToBeReplicated() >= replicationStreamsHardLimit) { + if (node.getNumberOfBlocksToBeReplicated() + + node.getNumberOfBlocksToBeErasureCoded() >= replicationStreamsHardLimit) { if (isStriped && (state == StoredReplicaState.LIVE || state == StoredReplicaState.DECOMMISSIONING)) { liveBusyBlockIndices.add(blockIndex); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java index bf8af46cc85..cfa707f4b0d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java @@ -957,6 +957,58 @@ public void testSkipReconstructionWithManyBusyNodes2() { assertNull(work); } + @Test + public void testSkipReconstructionWithManyBusyNodes3() { + NameNode.initMetrics(new Configuration(), HdfsServerConstants.NamenodeRole.NAMENODE); + long blockId = -9223372036854775776L; // Real ec block id + // RS-3-2 EC policy + ErasureCodingPolicy ecPolicy = + SystemErasureCodingPolicies.getPolicies().get(1); + + // Create an EC block group: 3 data blocks + 2 parity blocks. + Block aBlockGroup = new Block(blockId, ecPolicy.getCellSize() * ecPolicy.getNumDataUnits(), 0); + BlockInfoStriped aBlockInfoStriped = new BlockInfoStriped(aBlockGroup, ecPolicy); + + // Create 4 storageInfo, which means 1 block is missing. + DatanodeStorageInfo ds1 = DFSTestUtil.createDatanodeStorageInfo( + "storage1", "1.1.1.1", "rack1", "host1"); + DatanodeStorageInfo ds2 = DFSTestUtil.createDatanodeStorageInfo( + "storage2", "2.2.2.2", "rack2", "host2"); + DatanodeStorageInfo ds3 = DFSTestUtil.createDatanodeStorageInfo( + "storage3", "3.3.3.3", "rack3", "host3"); + DatanodeStorageInfo ds4 = DFSTestUtil.createDatanodeStorageInfo( + "storage4", "4.4.4.4", "rack4", "host4"); + + // Link block with storage. + aBlockInfoStriped.addStorage(ds1, aBlockGroup); + aBlockInfoStriped.addStorage(ds2, new Block(blockId + 1, 0, 0)); + aBlockInfoStriped.addStorage(ds3, new Block(blockId + 2, 0, 0)); + aBlockInfoStriped.addStorage(ds4, new Block(blockId + 3, 0, 0)); + + addEcBlockToBM(blockId, ecPolicy); + aBlockInfoStriped.setBlockCollectionId(mockINodeId); + + // Reconstruction should be scheduled. + BlockReconstructionWork work = bm.scheduleReconstruction(aBlockInfoStriped, 3); + assertNotNull(work); + + ExtendedBlock dummyBlock = new ExtendedBlock("bpid", 1, 1, 1); + DatanodeDescriptor dummyDD = ds1.getDatanodeDescriptor(); + DatanodeDescriptor[] dummyDDArray = new DatanodeDescriptor[]{dummyDD}; + DatanodeStorageInfo[] dummyDSArray = new DatanodeStorageInfo[]{ds1}; + // Simulate the 2 nodes reach maxReplicationStreams. + for(int i = 0; i < bm.maxReplicationStreams; i++){ //Add some dummy EC reconstruction task. + ds3.getDatanodeDescriptor().addBlockToBeErasureCoded(dummyBlock, dummyDDArray, + dummyDSArray, new byte[0], new byte[0], ecPolicy); + ds4.getDatanodeDescriptor().addBlockToBeErasureCoded(dummyBlock, dummyDDArray, + dummyDSArray, new byte[0], new byte[0], ecPolicy); + } + + // Reconstruction should be skipped since the number of non-busy nodes are not enough. + work = bm.scheduleReconstruction(aBlockInfoStriped, 3); + assertNull(work); + } + @Test public void testFavorDecomUntilHardLimit() throws Exception { bm.maxReplicationStreams = 0;