HDFS-7742. Favoring decommissioning node for replication can cause a block to stay
underreplicated for long periods. Contributed by Nathan Roberts. (cherry picked from commit04ee18ed48
) (cherry picked from commitc4cedfc1d6
) (cherry picked from commit c6b68a82adea8de488b255594d35db8e01f5fc8f)
This commit is contained in:
parent
0b1e66f01d
commit
bc8728cd27
|
@ -109,6 +109,9 @@ Release 2.6.1 - UNRELEASED
|
||||||
HDFS-7960. The full block report should prune zombie storages even if
|
HDFS-7960. The full block report should prune zombie storages even if
|
||||||
they're not empty. (cmccabe and Eddy Xu via wang)
|
they're not empty. (cmccabe and Eddy Xu via wang)
|
||||||
|
|
||||||
|
HDFS-7742. Favoring decommissioning node for replication can cause a block
|
||||||
|
to stay underreplicated for long periods (Nathan Roberts via kihwal)
|
||||||
|
|
||||||
Release 2.6.0 - 2014-11-18
|
Release 2.6.0 - 2014-11-18
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -1653,6 +1653,7 @@ public class BlockManager {
|
||||||
if ((nodesCorrupt != null) && nodesCorrupt.contains(node))
|
if ((nodesCorrupt != null) && nodesCorrupt.contains(node))
|
||||||
continue;
|
continue;
|
||||||
if(priority != UnderReplicatedBlocks.QUEUE_HIGHEST_PRIORITY
|
if(priority != UnderReplicatedBlocks.QUEUE_HIGHEST_PRIORITY
|
||||||
|
&& !node.isDecommissionInProgress()
|
||||||
&& node.getNumberOfBlocksToBeReplicated() >= maxReplicationStreams)
|
&& node.getNumberOfBlocksToBeReplicated() >= maxReplicationStreams)
|
||||||
{
|
{
|
||||||
continue; // already reached replication limit
|
continue; // already reached replication limit
|
||||||
|
@ -1667,13 +1668,12 @@ public class BlockManager {
|
||||||
// never use already decommissioned nodes
|
// never use already decommissioned nodes
|
||||||
if(node.isDecommissioned())
|
if(node.isDecommissioned())
|
||||||
continue;
|
continue;
|
||||||
// we prefer nodes that are in DECOMMISSION_INPROGRESS state
|
|
||||||
if(node.isDecommissionInProgress() || srcNode == null) {
|
// We got this far, current node is a reasonable choice
|
||||||
|
if (srcNode == null) {
|
||||||
srcNode = node;
|
srcNode = node;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if(srcNode.isDecommissionInProgress())
|
|
||||||
continue;
|
|
||||||
// switch to a different node randomly
|
// switch to a different node randomly
|
||||||
// this to prevent from deterministically selecting the same node even
|
// this to prevent from deterministically selecting the same node even
|
||||||
// if the node failed to replicate the block on previous iterations
|
// if the node failed to replicate the block on previous iterations
|
||||||
|
|
|
@ -535,6 +535,48 @@ public class TestBlockManager {
|
||||||
UnderReplicatedBlocks.QUEUE_HIGHEST_PRIORITY));
|
UnderReplicatedBlocks.QUEUE_HIGHEST_PRIORITY));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFavorDecomUntilHardLimit() throws Exception {
|
||||||
|
bm.maxReplicationStreams = 0;
|
||||||
|
bm.replicationStreamsHardLimit = 1;
|
||||||
|
|
||||||
|
long blockId = 42; // arbitrary
|
||||||
|
Block aBlock = new Block(blockId, 0, 0);
|
||||||
|
List<DatanodeDescriptor> origNodes = getNodes(0, 1);
|
||||||
|
// Add the block to the first node.
|
||||||
|
addBlockOnNodes(blockId,origNodes.subList(0,1));
|
||||||
|
origNodes.get(0).startDecommission();
|
||||||
|
|
||||||
|
List<DatanodeDescriptor> cntNodes = new LinkedList<DatanodeDescriptor>();
|
||||||
|
List<DatanodeStorageInfo> liveNodes = new LinkedList<DatanodeStorageInfo>();
|
||||||
|
|
||||||
|
assertNotNull("Chooses decommissioning source node for a normal replication"
|
||||||
|
+ " if all available source nodes have reached their replication"
|
||||||
|
+ " limits below the hard limit.",
|
||||||
|
bm.chooseSourceDatanode(
|
||||||
|
aBlock,
|
||||||
|
cntNodes,
|
||||||
|
liveNodes,
|
||||||
|
new NumberReplicas(),
|
||||||
|
UnderReplicatedBlocks.QUEUE_UNDER_REPLICATED));
|
||||||
|
|
||||||
|
|
||||||
|
// Increase the replication count to test replication count > hard limit
|
||||||
|
DatanodeStorageInfo targets[] = { origNodes.get(1).getStorageInfos()[0] };
|
||||||
|
origNodes.get(0).addBlockToBeReplicated(aBlock, targets);
|
||||||
|
|
||||||
|
assertNull("Does not choose a source decommissioning node for a normal"
|
||||||
|
+ " replication when all available nodes exceed the hard limit.",
|
||||||
|
bm.chooseSourceDatanode(
|
||||||
|
aBlock,
|
||||||
|
cntNodes,
|
||||||
|
liveNodes,
|
||||||
|
new NumberReplicas(),
|
||||||
|
UnderReplicatedBlocks.QUEUE_UNDER_REPLICATED));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSafeModeIBR() throws Exception {
|
public void testSafeModeIBR() throws Exception {
|
||||||
DatanodeDescriptor node = spy(nodes.get(0));
|
DatanodeDescriptor node = spy(nodes.get(0));
|
||||||
|
|
Loading…
Reference in New Issue