diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index f8f74527412..aa26088d86a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -2187,6 +2187,9 @@ Release 2.8.0 - UNRELEASED HDFS-9297. Decomissioned capacity should not be considered for configured/used capacity (Contributed by Kuhu Shukla) + HDFS-9044. Give Priority to FavouredNodes , before selecting + nodes from FavouredNode's Node Group (J.Andreina via vinayakumarb) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java index ad1a739eb56..d9b8d60b36a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java @@ -138,20 +138,9 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy { numOfReplicas = maxNodesAndReplicas[0]; int maxNodesPerRack = maxNodesAndReplicas[1]; - for (int i = 0; i < favoredNodes.size() && results.size() < numOfReplicas; i++) { - DatanodeDescriptor favoredNode = favoredNodes.get(i); - // Choose a single node which is local to favoredNode. - // 'results' is updated within chooseLocalNode - final DatanodeStorageInfo target = chooseLocalStorage(favoredNode, - favoriteAndExcludedNodes, blocksize, maxNodesPerRack, - results, avoidStaleNodes, storageTypes, false); - if (target == null) { - LOG.warn("Could not find a target for file " + src - + " with favored node " + favoredNode); - continue; - } - favoriteAndExcludedNodes.add(target.getDatanodeDescriptor()); - } + chooseFavouredNodes(src, numOfReplicas, favoredNodes, + favoriteAndExcludedNodes, blocksize, maxNodesPerRack, results, + avoidStaleNodes, storageTypes); if (results.size() < numOfReplicas) { // Not enough favored nodes, choose other nodes. @@ -177,6 +166,29 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy { } } + protected void chooseFavouredNodes(String src, int numOfReplicas, + List favoredNodes, + Set favoriteAndExcludedNodes, long blocksize, int maxNodesPerRack, + List results, boolean avoidStaleNodes, + EnumMap storageTypes) + throws NotEnoughReplicasException { + for (int i = 0; i < favoredNodes.size() && results.size() < numOfReplicas; + i++) { + DatanodeDescriptor favoredNode = favoredNodes.get(i); + // Choose a single node which is local to favoredNode. + // 'results' is updated within chooseLocalNode + final DatanodeStorageInfo target = + chooseLocalStorage(favoredNode, favoriteAndExcludedNodes, blocksize, + maxNodesPerRack, results, avoidStaleNodes, storageTypes, false); + if (target == null) { + LOG.warn("Could not find a target for file " + src + + " with favored node " + favoredNode); + continue; + } + favoriteAndExcludedNodes.add(target.getDatanodeDescriptor()); + } + } + /** This is the implementation. */ private DatanodeStorageInfo[] chooseTarget(int numOfReplicas, Node writer, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyWithNodeGroup.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyWithNodeGroup.java index 89f47ad9d64..187d8d6c2b6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyWithNodeGroup.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyWithNodeGroup.java @@ -54,16 +54,79 @@ public class BlockPlacementPolicyWithNodeGroup extends BlockPlacementPolicyDefau super.initialize(conf, stats, clusterMap, host2datanodeMap); } - /** choose local node of localMachine as the target. - * if localMachine is not available, choose a node on the same nodegroup or - * rack instead. + /** + * choose all good favored nodes as target. + * If no enough targets, then choose one replica from + * each bad favored node's node group. + * @throws NotEnoughReplicasException + */ + @Override + protected void chooseFavouredNodes(String src, int numOfReplicas, + List favoredNodes, + Set favoriteAndExcludedNodes, long blocksize, + int maxNodesPerRack, List results, + boolean avoidStaleNodes, EnumMap storageTypes) + throws NotEnoughReplicasException { + super.chooseFavouredNodes(src, numOfReplicas, favoredNodes, + favoriteAndExcludedNodes, blocksize, maxNodesPerRack, results, + avoidStaleNodes, storageTypes); + if (results.size() < numOfReplicas) { + // Not enough replicas, choose from unselected Favorednode's Nodegroup + for (int i = 0; + i < favoredNodes.size() && results.size() < numOfReplicas; i++) { + DatanodeDescriptor favoredNode = favoredNodes.get(i); + boolean chosenNode = + isNodeChosen(results, favoredNode); + if (chosenNode) { + continue; + } + NetworkTopologyWithNodeGroup clusterMapNodeGroup = + (NetworkTopologyWithNodeGroup) clusterMap; + // try a node on FavouredNode's node group + DatanodeStorageInfo target = null; + String scope = + clusterMapNodeGroup.getNodeGroup(favoredNode.getNetworkLocation()); + try { + target = + chooseRandom(scope, favoriteAndExcludedNodes, blocksize, + maxNodesPerRack, results, avoidStaleNodes, storageTypes); + } catch (NotEnoughReplicasException e) { + // catch Exception and continue with other favored nodes + continue; + } + if (target == null) { + LOG.warn("Could not find a target for file " + + src + " within nodegroup of favored node " + favoredNode); + continue; + } + favoriteAndExcludedNodes.add(target.getDatanodeDescriptor()); + } + } + } + + private boolean isNodeChosen( + List results, DatanodeDescriptor favoredNode) { + boolean chosenNode = false; + for (int j = 0; j < results.size(); j++) { + if (results.get(j).getDatanodeDescriptor().equals(favoredNode)) { + chosenNode = true; + break; + } + } + return chosenNode; + } + + /** choose local node of localMachine as the target. + * If localMachine is not available, will fallback to nodegroup/rack + * when flag fallbackToNodeGroupAndLocalRack is set. * @return the chosen node */ @Override protected DatanodeStorageInfo chooseLocalStorage(Node localMachine, Set excludedNodes, long blocksize, int maxNodesPerRack, List results, boolean avoidStaleNodes, - EnumMap storageTypes, boolean fallbackToLocalRack) + EnumMap storageTypes, + boolean fallbackToNodeGroupAndLocalRack) throws NotEnoughReplicasException { DatanodeStorageInfo localStorage = chooseLocalStorage(localMachine, excludedNodes, blocksize, maxNodesPerRack, results, @@ -72,6 +135,9 @@ public class BlockPlacementPolicyWithNodeGroup extends BlockPlacementPolicyDefau return localStorage; } + if (!fallbackToNodeGroupAndLocalRack) { + return null; + } // try a node on local node group DatanodeStorageInfo chosenStorage = chooseLocalNodeGroup( (NetworkTopologyWithNodeGroup)clusterMap, localMachine, excludedNodes, @@ -79,10 +145,6 @@ public class BlockPlacementPolicyWithNodeGroup extends BlockPlacementPolicyDefau if (chosenStorage != null) { return chosenStorage; } - - if (!fallbackToLocalRack) { - return null; - } // try a node on local rack return chooseLocalRack(localMachine, excludedNodes, blocksize, maxNodesPerRack, results, avoidStaleNodes, storageTypes); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java index 528021d156a..0ff77702522 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java @@ -162,6 +162,16 @@ public class TestReplicationPolicyWithNodeGroup extends BaseReplicationPolicyTes return cluster.isOnSameNodeGroup(left, right.getDatanodeDescriptor()); } + private DatanodeStorageInfo[] chooseTarget( + int numOfReplicas, + DatanodeDescriptor writer, + Set excludedNodes, + List favoredNodes) { + return replicator.chooseTarget(filename, numOfReplicas, writer, + excludedNodes, BLOCK_SIZE, favoredNodes, + TestBlockStoragePolicy.DEFAULT_STORAGE_POLICY); + } + /** * In this testcase, client is dataNodes[0]. So the 1st replica should be * placed on dataNodes[0], the 2nd replica should be placed on @@ -723,4 +733,52 @@ public class TestReplicationPolicyWithNodeGroup extends BaseReplicationPolicyTes assertTrue(excludedNodes.contains(dataNodesForDependencies[i])); } } + + /** + * In this testcase, favored node is dataNodes[6]. + * 1st replica should be placed on favored node. + * @throws Exception + */ + @Test + public void testChooseTargetAsFavouredNodes() throws Exception { + DatanodeStorageInfo[] targets; + List favoredNodes = + new ArrayList(); + favoredNodes.add(dataNodes[6]); + favoredNodes.add(dataNodes[0]); + favoredNodes.add(dataNodes[1]); + targets = chooseTarget(1, dataNodes[7], null, favoredNodes); + assertEquals(targets.length, 1); + assertTrue(favoredNodes.contains(targets[0].getDatanodeDescriptor())); + } + + /** + * In this testcase, passed 2 favored nodes + * dataNodes[0](Good Node), dataNodes[3](Bad node). + * 1st replica should be placed on good favored node dataNodes[0]. + * 2nd replica should be on bad favored node's nodegroup dataNodes[4]. + * @throws Exception + */ + @Test + public void testChooseFavoredNodesNodeGroup() throws Exception { + updateHeartbeatWithUsage(dataNodes[3], + 2* HdfsServerConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L, + (HdfsServerConstants.MIN_BLOCKS_FOR_WRITE-1)*BLOCK_SIZE, 0L, + 0L, 0L, 0, 0); // no space + + DatanodeStorageInfo[] targets; + List expectedTargets = + new ArrayList(); + expectedTargets.add(dataNodes[0]); + expectedTargets.add(dataNodes[4]); + List favouredNodes = + new ArrayList(); + favouredNodes.add(dataNodes[3]); + favouredNodes.add(dataNodes[0]); + targets = chooseTarget(2, dataNodes[7], null, favouredNodes); + assertTrue("1st Replica is incorrect", + expectedTargets.contains(targets[0].getDatanodeDescriptor())); + assertTrue("2nd Replica is incorrect", + expectedTargets.contains(targets[1].getDatanodeDescriptor())); + } }