HDFS-16839 It should consider EC reconstruction work when we determine if a node is busy (#5128)

Co-authored-by: Takanobu Asanuma <tasanuma@apache.org>
Reviewed-by: Tao Li <tomscut@apache.org>
This commit is contained in:
Kidd5368 2022-11-30 09:43:15 +08:00 committed by GitHub
parent 03471a736c
commit 72749a4ff8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 56 additions and 2 deletions

View File

@ -2599,7 +2599,8 @@ public class BlockManager implements BlockStatsMXBean {
if (priority != LowRedundancyBlocks.QUEUE_HIGHEST_PRIORITY if (priority != LowRedundancyBlocks.QUEUE_HIGHEST_PRIORITY
&& (!node.isDecommissionInProgress() && !node.isEnteringMaintenance()) && (!node.isDecommissionInProgress() && !node.isEnteringMaintenance())
&& node.getNumberOfBlocksToBeReplicated() >= maxReplicationStreams) { && node.getNumberOfBlocksToBeReplicated() +
node.getNumberOfBlocksToBeErasureCoded() >= maxReplicationStreams) {
if (isStriped && (state == StoredReplicaState.LIVE if (isStriped && (state == StoredReplicaState.LIVE
|| state == StoredReplicaState.DECOMMISSIONING)) { || state == StoredReplicaState.DECOMMISSIONING)) {
liveBusyBlockIndices.add(blockIndex); liveBusyBlockIndices.add(blockIndex);
@ -2609,7 +2610,8 @@ public class BlockManager implements BlockStatsMXBean {
continue; // already reached replication limit continue; // already reached replication limit
} }
if (node.getNumberOfBlocksToBeReplicated() >= replicationStreamsHardLimit) { if (node.getNumberOfBlocksToBeReplicated() +
node.getNumberOfBlocksToBeErasureCoded() >= replicationStreamsHardLimit) {
if (isStriped && (state == StoredReplicaState.LIVE if (isStriped && (state == StoredReplicaState.LIVE
|| state == StoredReplicaState.DECOMMISSIONING)) { || state == StoredReplicaState.DECOMMISSIONING)) {
liveBusyBlockIndices.add(blockIndex); liveBusyBlockIndices.add(blockIndex);

View File

@ -957,6 +957,58 @@ public class TestBlockManager {
assertNull(work); assertNull(work);
} }
@Test
public void testSkipReconstructionWithManyBusyNodes3() {
NameNode.initMetrics(new Configuration(), HdfsServerConstants.NamenodeRole.NAMENODE);
long blockId = -9223372036854775776L; // Real ec block id
// RS-3-2 EC policy
ErasureCodingPolicy ecPolicy =
SystemErasureCodingPolicies.getPolicies().get(1);
// Create an EC block group: 3 data blocks + 2 parity blocks.
Block aBlockGroup = new Block(blockId, ecPolicy.getCellSize() * ecPolicy.getNumDataUnits(), 0);
BlockInfoStriped aBlockInfoStriped = new BlockInfoStriped(aBlockGroup, ecPolicy);
// Create 4 storageInfo, which means 1 block is missing.
DatanodeStorageInfo ds1 = DFSTestUtil.createDatanodeStorageInfo(
"storage1", "1.1.1.1", "rack1", "host1");
DatanodeStorageInfo ds2 = DFSTestUtil.createDatanodeStorageInfo(
"storage2", "2.2.2.2", "rack2", "host2");
DatanodeStorageInfo ds3 = DFSTestUtil.createDatanodeStorageInfo(
"storage3", "3.3.3.3", "rack3", "host3");
DatanodeStorageInfo ds4 = DFSTestUtil.createDatanodeStorageInfo(
"storage4", "4.4.4.4", "rack4", "host4");
// Link block with storage.
aBlockInfoStriped.addStorage(ds1, aBlockGroup);
aBlockInfoStriped.addStorage(ds2, new Block(blockId + 1, 0, 0));
aBlockInfoStriped.addStorage(ds3, new Block(blockId + 2, 0, 0));
aBlockInfoStriped.addStorage(ds4, new Block(blockId + 3, 0, 0));
addEcBlockToBM(blockId, ecPolicy);
aBlockInfoStriped.setBlockCollectionId(mockINodeId);
// Reconstruction should be scheduled.
BlockReconstructionWork work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
assertNotNull(work);
ExtendedBlock dummyBlock = new ExtendedBlock("bpid", 1, 1, 1);
DatanodeDescriptor dummyDD = ds1.getDatanodeDescriptor();
DatanodeDescriptor[] dummyDDArray = new DatanodeDescriptor[]{dummyDD};
DatanodeStorageInfo[] dummyDSArray = new DatanodeStorageInfo[]{ds1};
// Simulate the 2 nodes reach maxReplicationStreams.
for(int i = 0; i < bm.maxReplicationStreams; i++){ //Add some dummy EC reconstruction task.
ds3.getDatanodeDescriptor().addBlockToBeErasureCoded(dummyBlock, dummyDDArray,
dummyDSArray, new byte[0], new byte[0], ecPolicy);
ds4.getDatanodeDescriptor().addBlockToBeErasureCoded(dummyBlock, dummyDDArray,
dummyDSArray, new byte[0], new byte[0], ecPolicy);
}
// Reconstruction should be skipped since the number of non-busy nodes are not enough.
work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
assertNull(work);
}
@Test @Test
public void testFavorDecomUntilHardLimit() throws Exception { public void testFavorDecomUntilHardLimit() throws Exception {
bm.maxReplicationStreams = 0; bm.maxReplicationStreams = 0;