HDFS-7742. Favoring decommissioning node for replication can cause a block to stay

underreplicated for long periods. Contributed by Nathan Roberts.
This commit is contained in:
Kihwal Lee 2015-03-30 10:10:11 -05:00
parent ae3e8c61ff
commit 04ee18ed48
3 changed files with 50 additions and 5 deletions

View File

@ -829,6 +829,9 @@ Release 2.7.0 - UNRELEASED
HDFS-7410. Support CreateFlags with append() to support hsync() for HDFS-7410. Support CreateFlags with append() to support hsync() for
appending streams (Vinayakumar B via Colin P. McCabe) appending streams (Vinayakumar B via Colin P. McCabe)
HDFS-7742. Favoring decommissioning node for replication can cause a block
to stay underreplicated for long periods (Nathan Roberts via kihwal)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-7454. Reduce memory footprint for AclEntries in NameNode. HDFS-7454. Reduce memory footprint for AclEntries in NameNode.

View File

@ -1637,7 +1637,8 @@ public class BlockManager {
// If so, do not select the node as src node // If so, do not select the node as src node
if ((nodesCorrupt != null) && nodesCorrupt.contains(node)) if ((nodesCorrupt != null) && nodesCorrupt.contains(node))
continue; continue;
if(priority != UnderReplicatedBlocks.QUEUE_HIGHEST_PRIORITY if(priority != UnderReplicatedBlocks.QUEUE_HIGHEST_PRIORITY
&& !node.isDecommissionInProgress()
&& node.getNumberOfBlocksToBeReplicated() >= maxReplicationStreams) && node.getNumberOfBlocksToBeReplicated() >= maxReplicationStreams)
{ {
continue; // already reached replication limit continue; // already reached replication limit
@ -1652,13 +1653,12 @@ public class BlockManager {
// never use already decommissioned nodes // never use already decommissioned nodes
if(node.isDecommissioned()) if(node.isDecommissioned())
continue; continue;
// we prefer nodes that are in DECOMMISSION_INPROGRESS state
if(node.isDecommissionInProgress() || srcNode == null) { // We got this far, current node is a reasonable choice
if (srcNode == null) {
srcNode = node; srcNode = node;
continue; continue;
} }
if(srcNode.isDecommissionInProgress())
continue;
// switch to a different node randomly // switch to a different node randomly
// this to prevent from deterministically selecting the same node even // this to prevent from deterministically selecting the same node even
// if the node failed to replicate the block on previous iterations // if the node failed to replicate the block on previous iterations

View File

@ -534,6 +534,48 @@ public class TestBlockManager {
UnderReplicatedBlocks.QUEUE_HIGHEST_PRIORITY)); UnderReplicatedBlocks.QUEUE_HIGHEST_PRIORITY));
} }
@Test
public void testFavorDecomUntilHardLimit() throws Exception {
bm.maxReplicationStreams = 0;
bm.replicationStreamsHardLimit = 1;
long blockId = 42; // arbitrary
Block aBlock = new Block(blockId, 0, 0);
List<DatanodeDescriptor> origNodes = getNodes(0, 1);
// Add the block to the first node.
addBlockOnNodes(blockId,origNodes.subList(0,1));
origNodes.get(0).startDecommission();
List<DatanodeDescriptor> cntNodes = new LinkedList<DatanodeDescriptor>();
List<DatanodeStorageInfo> liveNodes = new LinkedList<DatanodeStorageInfo>();
assertNotNull("Chooses decommissioning source node for a normal replication"
+ " if all available source nodes have reached their replication"
+ " limits below the hard limit.",
bm.chooseSourceDatanode(
aBlock,
cntNodes,
liveNodes,
new NumberReplicas(),
UnderReplicatedBlocks.QUEUE_UNDER_REPLICATED));
// Increase the replication count to test replication count > hard limit
DatanodeStorageInfo targets[] = { origNodes.get(1).getStorageInfos()[0] };
origNodes.get(0).addBlockToBeReplicated(aBlock, targets);
assertNull("Does not choose a source decommissioning node for a normal"
+ " replication when all available nodes exceed the hard limit.",
bm.chooseSourceDatanode(
aBlock,
cntNodes,
liveNodes,
new NumberReplicas(),
UnderReplicatedBlocks.QUEUE_UNDER_REPLICATED));
}
@Test @Test
public void testSafeModeIBR() throws Exception { public void testSafeModeIBR() throws Exception {
DatanodeDescriptor node = spy(nodes.get(0)); DatanodeDescriptor node = spy(nodes.get(0));