HDFS-12072. Provide fairness between EC and non-EC recovery tasks. Contributed by Eddy Xu.
This commit is contained in:
parent
ab1a8ae85f
commit
b298948897
|
@ -661,7 +661,11 @@ public class DatanodeDescriptor extends DatanodeInfo {
|
||||||
return erasurecodeBlocks.size();
|
return erasurecodeBlocks.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<BlockTargetPair> getReplicationCommand(int maxTransfers) {
|
int getNumberOfReplicateBlocks() {
|
||||||
|
return replicateBlocks.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
List<BlockTargetPair> getReplicationCommand(int maxTransfers) {
|
||||||
return replicateBlocks.poll(maxTransfers);
|
return replicateBlocks.poll(maxTransfers);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1663,21 +1663,38 @@ public class DatanodeManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
final List<DatanodeCommand> cmds = new ArrayList<>();
|
final List<DatanodeCommand> cmds = new ArrayList<>();
|
||||||
// check pending replication
|
// Allocate _approximately_ maxTransfers pending tasks to DataNode.
|
||||||
|
// NN chooses pending tasks based on the ratio between the lengths of
|
||||||
|
// replication and erasure-coded block queues.
|
||||||
|
int totalReplicateBlocks = nodeinfo.getNumberOfReplicateBlocks();
|
||||||
|
int totalECBlocks = nodeinfo.getNumberOfBlocksToBeErasureCoded();
|
||||||
|
int totalBlocks = totalReplicateBlocks + totalECBlocks;
|
||||||
|
if (totalBlocks > 0) {
|
||||||
|
int numReplicationTasks = (int) Math.ceil(
|
||||||
|
(double) (totalReplicateBlocks * maxTransfers) / totalBlocks);
|
||||||
|
int numECTasks = (int) Math.ceil(
|
||||||
|
(double) (totalECBlocks * maxTransfers) / totalBlocks);
|
||||||
|
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Pending replication tasks: " + numReplicationTasks
|
||||||
|
+ " erasure-coded tasks: " + numECTasks);
|
||||||
|
}
|
||||||
|
// check pending replication tasks
|
||||||
List<BlockTargetPair> pendingList = nodeinfo.getReplicationCommand(
|
List<BlockTargetPair> pendingList = nodeinfo.getReplicationCommand(
|
||||||
maxTransfers);
|
numReplicationTasks);
|
||||||
if (pendingList != null) {
|
if (pendingList != null && !pendingList.isEmpty()) {
|
||||||
cmds.add(new BlockCommand(DatanodeProtocol.DNA_TRANSFER, blockPoolId,
|
cmds.add(new BlockCommand(DatanodeProtocol.DNA_TRANSFER, blockPoolId,
|
||||||
pendingList));
|
pendingList));
|
||||||
maxTransfers -= pendingList.size();
|
|
||||||
}
|
}
|
||||||
// check pending erasure coding tasks
|
// check pending erasure coding tasks
|
||||||
List<BlockECReconstructionInfo> pendingECList = nodeinfo
|
List<BlockECReconstructionInfo> pendingECList = nodeinfo
|
||||||
.getErasureCodeCommand(maxTransfers);
|
.getErasureCodeCommand(numECTasks);
|
||||||
if (pendingECList != null) {
|
if (pendingECList != null && !pendingECList.isEmpty()) {
|
||||||
cmds.add(new BlockECReconstructionCommand(
|
cmds.add(new BlockECReconstructionCommand(
|
||||||
DNA_ERASURE_CODING_RECONSTRUCTION, pendingECList));
|
DNA_ERASURE_CODING_RECONSTRUCTION, pendingECList));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// check block invalidation
|
// check block invalidation
|
||||||
Block[] blks = nodeinfo.getInvalidateBlocks(blockInvalidateLimit);
|
Block[] blks = nodeinfo.getInvalidateBlocks(blockInvalidateLimit);
|
||||||
if (blks != null) {
|
if (blks != null) {
|
||||||
|
|
|
@ -25,6 +25,7 @@ import java.net.URL;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
@ -500,46 +501,93 @@ public class TestDatanodeManager {
|
||||||
"127.0.0.1:23456", bothAgain.get(1).getInfoAddr());
|
"127.0.0.1:23456", bothAgain.get(1).getInfoAddr());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
/**
|
||||||
public void testPendingRecoveryTasks() throws IOException {
|
* Verify the correctness of pending recovery process.
|
||||||
|
*
|
||||||
|
* @param numReplicationBlocks the number of replication blocks in the queue.
|
||||||
|
* @param numECBlocks number of EC blocks in the queue.
|
||||||
|
* @param maxTransfers the maxTransfer value.
|
||||||
|
* @param numReplicationTasks the number of replication tasks polled from
|
||||||
|
* the queue.
|
||||||
|
* @param numECTasks the number of EC tasks polled from the queue.
|
||||||
|
*
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
private void verifyPendingRecoveryTasks(
|
||||||
|
int numReplicationBlocks, int numECBlocks,
|
||||||
|
int maxTransfers, int numReplicationTasks, int numECTasks)
|
||||||
|
throws IOException {
|
||||||
FSNamesystem fsn = Mockito.mock(FSNamesystem.class);
|
FSNamesystem fsn = Mockito.mock(FSNamesystem.class);
|
||||||
Mockito.when(fsn.hasWriteLock()).thenReturn(true);
|
Mockito.when(fsn.hasWriteLock()).thenReturn(true);
|
||||||
Configuration conf = new Configuration();
|
Configuration conf = new Configuration();
|
||||||
DatanodeManager dm = Mockito.spy(mockDatanodeManager(fsn, conf));
|
DatanodeManager dm = Mockito.spy(mockDatanodeManager(fsn, conf));
|
||||||
|
|
||||||
int maxTransfers = 20;
|
|
||||||
int numPendingTasks = 7;
|
|
||||||
int numECTasks = maxTransfers - numPendingTasks;
|
|
||||||
|
|
||||||
DatanodeDescriptor nodeInfo = Mockito.mock(DatanodeDescriptor.class);
|
DatanodeDescriptor nodeInfo = Mockito.mock(DatanodeDescriptor.class);
|
||||||
Mockito.when(nodeInfo.isRegistered()).thenReturn(true);
|
Mockito.when(nodeInfo.isRegistered()).thenReturn(true);
|
||||||
Mockito.when(nodeInfo.getStorageInfos())
|
Mockito.when(nodeInfo.getStorageInfos())
|
||||||
.thenReturn(new DatanodeStorageInfo[0]);
|
.thenReturn(new DatanodeStorageInfo[0]);
|
||||||
|
|
||||||
List<BlockTargetPair> pendingList =
|
if (numReplicationBlocks > 0) {
|
||||||
Collections.nCopies(numPendingTasks, new BlockTargetPair(null, null));
|
Mockito.when(nodeInfo.getNumberOfReplicateBlocks())
|
||||||
Mockito.when(nodeInfo.getReplicationCommand(maxTransfers))
|
.thenReturn(numReplicationBlocks);
|
||||||
.thenReturn(pendingList);
|
|
||||||
List<BlockECReconstructionInfo> ecPendingList =
|
|
||||||
Collections.nCopies(numECTasks, null);
|
|
||||||
|
|
||||||
|
List<BlockTargetPair> tasks =
|
||||||
|
Collections.nCopies(
|
||||||
|
Math.min(numReplicationTasks, numReplicationBlocks),
|
||||||
|
new BlockTargetPair(null, null));
|
||||||
|
Mockito.when(nodeInfo.getReplicationCommand(numReplicationTasks))
|
||||||
|
.thenReturn(tasks);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (numECBlocks > 0) {
|
||||||
|
Mockito.when(nodeInfo.getNumberOfBlocksToBeErasureCoded())
|
||||||
|
.thenReturn(numECBlocks);
|
||||||
|
|
||||||
|
List<BlockECReconstructionInfo> tasks =
|
||||||
|
Collections.nCopies(numECTasks, null);
|
||||||
Mockito.when(nodeInfo.getErasureCodeCommand(numECTasks))
|
Mockito.when(nodeInfo.getErasureCodeCommand(numECTasks))
|
||||||
.thenReturn(ecPendingList);
|
.thenReturn(tasks);
|
||||||
|
}
|
||||||
|
|
||||||
DatanodeRegistration dnReg = Mockito.mock(DatanodeRegistration.class);
|
DatanodeRegistration dnReg = Mockito.mock(DatanodeRegistration.class);
|
||||||
Mockito.when(dm.getDatanode(dnReg)).thenReturn(nodeInfo);
|
Mockito.when(dm.getDatanode(dnReg)).thenReturn(nodeInfo);
|
||||||
|
|
||||||
DatanodeCommand[] cmds = dm.handleHeartbeat(
|
DatanodeCommand[] cmds = dm.handleHeartbeat(
|
||||||
dnReg, new StorageReport[1], "bp-123", 0, 0, 10, maxTransfers, 0, null,
|
dnReg, new StorageReport[1], "bp-123", 0, 0, 10, maxTransfers, 0, null,
|
||||||
SlowPeerReports.EMPTY_REPORT, SlowDiskReports.EMPTY_REPORT);
|
SlowPeerReports.EMPTY_REPORT, SlowDiskReports.EMPTY_REPORT);
|
||||||
|
|
||||||
assertEquals(2, cmds.length);
|
long expectedNumCmds = Arrays.stream(
|
||||||
assertTrue(cmds[0] instanceof BlockCommand);
|
new int[]{numReplicationTasks, numECTasks})
|
||||||
BlockCommand replicaCmd = (BlockCommand) cmds[0];
|
.filter(x -> x > 0)
|
||||||
assertEquals(numPendingTasks, replicaCmd.getBlocks().length);
|
.count();
|
||||||
assertEquals(numPendingTasks, replicaCmd.getTargets().length);
|
assertEquals(expectedNumCmds, cmds.length);
|
||||||
assertTrue(cmds[1] instanceof BlockECReconstructionCommand);
|
|
||||||
BlockECReconstructionCommand ecRecoveryCmd =
|
int idx = 0;
|
||||||
(BlockECReconstructionCommand) cmds[1];
|
if (numReplicationTasks > 0) {
|
||||||
assertEquals(numECTasks, ecRecoveryCmd.getECTasks().size());
|
assertTrue(cmds[idx] instanceof BlockCommand);
|
||||||
|
BlockCommand cmd = (BlockCommand) cmds[0];
|
||||||
|
assertEquals(numReplicationTasks, cmd.getBlocks().length);
|
||||||
|
assertEquals(numReplicationTasks, cmd.getTargets().length);
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (numECTasks > 0) {
|
||||||
|
assertTrue(cmds[idx] instanceof BlockECReconstructionCommand);
|
||||||
|
BlockECReconstructionCommand cmd =
|
||||||
|
(BlockECReconstructionCommand) cmds[idx];
|
||||||
|
assertEquals(numECTasks, cmd.getECTasks().size());
|
||||||
|
}
|
||||||
|
|
||||||
|
Mockito.verify(nodeInfo).getReplicationCommand(numReplicationTasks);
|
||||||
|
Mockito.verify(nodeInfo).getErasureCodeCommand(numECTasks);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPendingRecoveryTasks() throws IOException {
|
||||||
|
// Tasks are slitted according to the ratio between queue lengths.
|
||||||
|
verifyPendingRecoveryTasks(20, 20, 20, 10, 10);
|
||||||
|
verifyPendingRecoveryTasks(40, 10, 20, 16, 4);
|
||||||
|
|
||||||
|
// Approximately load tasks if the ratio between queue length is large.
|
||||||
|
verifyPendingRecoveryTasks(400, 1, 20, 20, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue