HDFS-16479. EC: NameNode should not send a reconstruction work when the source datanodes are insufficient (#4138)
(cherry picked from commit 2efab92959
)
This commit is contained in:
parent
cb14e8de81
commit
b8c6ba6046
|
@ -2060,6 +2060,16 @@ public class BlockManager implements BlockStatsMXBean {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// skip if source datanodes for reconstructing ec block are not enough
|
||||||
|
if (block.isStriped()) {
|
||||||
|
BlockInfoStriped stripedBlock = (BlockInfoStriped) block;
|
||||||
|
if (stripedBlock.getRealDataBlockNum() > srcNodes.length) {
|
||||||
|
LOG.debug("Block {} cannot be reconstructed due to shortage of source datanodes ", block);
|
||||||
|
NameNode.getNameNodeMetrics().incNumTimesReReplicationNotScheduled();
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// liveReplicaNodes can include READ_ONLY_SHARED replicas which are
|
// liveReplicaNodes can include READ_ONLY_SHARED replicas which are
|
||||||
// not included in the numReplicas.liveReplicas() count
|
// not included in the numReplicas.liveReplicas() count
|
||||||
assert liveReplicaNodes.size() >= numReplicas.liveReplicas();
|
assert liveReplicaNodes.size() >= numReplicas.liveReplicas();
|
||||||
|
|
|
@ -842,6 +842,102 @@ public class TestBlockManager {
|
||||||
0, numReplicas.redundantInternalBlocks());
|
0, numReplicas.redundantInternalBlocks());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSkipReconstructionWithManyBusyNodes() {
|
||||||
|
long blockId = -9223372036854775776L; // real ec block id
|
||||||
|
// RS-3-2 EC policy
|
||||||
|
ErasureCodingPolicy ecPolicy =
|
||||||
|
SystemErasureCodingPolicies.getPolicies().get(1);
|
||||||
|
|
||||||
|
// create an EC block group: 3 data blocks + 2 parity blocks
|
||||||
|
Block aBlockGroup = new Block(blockId, ecPolicy.getCellSize() * ecPolicy.getNumDataUnits(), 0);
|
||||||
|
BlockInfoStriped aBlockInfoStriped = new BlockInfoStriped(aBlockGroup, ecPolicy);
|
||||||
|
|
||||||
|
// create 4 storageInfo, which means 1 block is missing
|
||||||
|
DatanodeStorageInfo ds1 = DFSTestUtil.createDatanodeStorageInfo(
|
||||||
|
"storage1", "1.1.1.1", "rack1", "host1");
|
||||||
|
DatanodeStorageInfo ds2 = DFSTestUtil.createDatanodeStorageInfo(
|
||||||
|
"storage2", "2.2.2.2", "rack2", "host2");
|
||||||
|
DatanodeStorageInfo ds3 = DFSTestUtil.createDatanodeStorageInfo(
|
||||||
|
"storage3", "3.3.3.3", "rack3", "host3");
|
||||||
|
DatanodeStorageInfo ds4 = DFSTestUtil.createDatanodeStorageInfo(
|
||||||
|
"storage4", "4.4.4.4", "rack4", "host4");
|
||||||
|
|
||||||
|
// link block with storage
|
||||||
|
aBlockInfoStriped.addStorage(ds1, aBlockGroup);
|
||||||
|
aBlockInfoStriped.addStorage(ds2, new Block(blockId + 1, 0, 0));
|
||||||
|
aBlockInfoStriped.addStorage(ds3, new Block(blockId + 2, 0, 0));
|
||||||
|
aBlockInfoStriped.addStorage(ds4, new Block(blockId + 3, 0, 0));
|
||||||
|
|
||||||
|
addEcBlockToBM(blockId, ecPolicy);
|
||||||
|
aBlockInfoStriped.setBlockCollectionId(mockINodeId);
|
||||||
|
|
||||||
|
// reconstruction should be scheduled
|
||||||
|
BlockReconstructionWork work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
|
||||||
|
assertNotNull(work);
|
||||||
|
|
||||||
|
// simulate the 2 nodes reach maxReplicationStreams
|
||||||
|
for(int i = 0; i < bm.maxReplicationStreams; i++){
|
||||||
|
ds3.getDatanodeDescriptor().incrementPendingReplicationWithoutTargets();
|
||||||
|
ds4.getDatanodeDescriptor().incrementPendingReplicationWithoutTargets();
|
||||||
|
}
|
||||||
|
|
||||||
|
// reconstruction should be skipped since the number of non-busy nodes are not enough
|
||||||
|
work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
|
||||||
|
assertNull(work);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSkipReconstructionWithManyBusyNodes2() {
|
||||||
|
long blockId = -9223372036854775776L; // real ec block id
|
||||||
|
// RS-3-2 EC policy
|
||||||
|
ErasureCodingPolicy ecPolicy =
|
||||||
|
SystemErasureCodingPolicies.getPolicies().get(1);
|
||||||
|
|
||||||
|
// create an EC block group: 2 data blocks + 2 parity blocks
|
||||||
|
Block aBlockGroup = new Block(blockId,
|
||||||
|
ecPolicy.getCellSize() * (ecPolicy.getNumDataUnits() - 1), 0);
|
||||||
|
BlockInfoStriped aBlockInfoStriped = new BlockInfoStriped(aBlockGroup, ecPolicy);
|
||||||
|
|
||||||
|
// create 3 storageInfo, which means 1 block is missing
|
||||||
|
DatanodeStorageInfo ds1 = DFSTestUtil.createDatanodeStorageInfo(
|
||||||
|
"storage1", "1.1.1.1", "rack1", "host1");
|
||||||
|
DatanodeStorageInfo ds2 = DFSTestUtil.createDatanodeStorageInfo(
|
||||||
|
"storage2", "2.2.2.2", "rack2", "host2");
|
||||||
|
DatanodeStorageInfo ds3 = DFSTestUtil.createDatanodeStorageInfo(
|
||||||
|
"storage3", "3.3.3.3", "rack3", "host3");
|
||||||
|
|
||||||
|
// link block with storage
|
||||||
|
aBlockInfoStriped.addStorage(ds1, aBlockGroup);
|
||||||
|
aBlockInfoStriped.addStorage(ds2, new Block(blockId + 1, 0, 0));
|
||||||
|
aBlockInfoStriped.addStorage(ds3, new Block(blockId + 2, 0, 0));
|
||||||
|
|
||||||
|
addEcBlockToBM(blockId, ecPolicy);
|
||||||
|
aBlockInfoStriped.setBlockCollectionId(mockINodeId);
|
||||||
|
|
||||||
|
// reconstruction should be scheduled
|
||||||
|
BlockReconstructionWork work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
|
||||||
|
assertNotNull(work);
|
||||||
|
|
||||||
|
// simulate the 1 node reaches maxReplicationStreams
|
||||||
|
for(int i = 0; i < bm.maxReplicationStreams; i++){
|
||||||
|
ds2.getDatanodeDescriptor().incrementPendingReplicationWithoutTargets();
|
||||||
|
}
|
||||||
|
|
||||||
|
// reconstruction should still be scheduled since there are 2 source nodes to create 2 blocks
|
||||||
|
work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
|
||||||
|
assertNotNull(work);
|
||||||
|
|
||||||
|
// simulate the 1 more node reaches maxReplicationStreams
|
||||||
|
for(int i = 0; i < bm.maxReplicationStreams; i++){
|
||||||
|
ds3.getDatanodeDescriptor().incrementPendingReplicationWithoutTargets();
|
||||||
|
}
|
||||||
|
|
||||||
|
// reconstruction should be skipped since the number of non-busy nodes are not enough
|
||||||
|
work = bm.scheduleReconstruction(aBlockInfoStriped, 3);
|
||||||
|
assertNull(work);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testFavorDecomUntilHardLimit() throws Exception {
|
public void testFavorDecomUntilHardLimit() throws Exception {
|
||||||
bm.maxReplicationStreams = 0;
|
bm.maxReplicationStreams = 0;
|
||||||
|
|
Loading…
Reference in New Issue