HDFS-6701. Make seed optional in NetworkTopology#sortByDistance. Contributed by Ashwin Shankar.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1612625 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Wang 2014-07-22 17:47:49 +00:00
parent 0c72ca9e7a
commit c83c5b868e
9 changed files with 87 additions and 27 deletions

View File

@ -883,8 +883,8 @@ public class NetworkTopology {
* @param seed Used to seed the pseudo-random generator that randomizes the
* set of nodes at each network distance.
*/
public void sortByDistance(Node reader, Node[] nodes,
int activeLen, long seed) {
public void sortByDistance(Node reader, Node[] nodes, int activeLen,
long seed, boolean randomizeBlockLocationsPerBlock) {
/** Sort weights for the nodes array */
int[] weights = new int[activeLen];
for (int i=0; i<activeLen; i++) {
@ -906,8 +906,11 @@ public class NetworkTopology {
// Seed is normally the block id
// This means we use the same pseudo-random order for each block, for
// potentially better page cache usage.
// Seed is not used if we want to randomize block location for every block
Random rand = getRandom();
if (!randomizeBlockLocationsPerBlock) {
rand.setSeed(seed);
}
int idx = 0;
for (List<Node> list: tree.values()) {
if (list != null) {

View File

@ -279,8 +279,8 @@ public class NetworkTopologyWithNodeGroup extends NetworkTopology {
* set of nodes at each network distance.
*/
@Override
public void sortByDistance( Node reader, Node[] nodes,
int activeLen, long seed) {
public void sortByDistance(Node reader, Node[] nodes, int activeLen,
long seed, boolean randomizeBlockLocationsPerBlock) {
// If reader is not a datanode (not in NetworkTopology tree), we need to
// replace this reader with a sibling leaf node in tree.
if (reader != null && !this.contains(reader)) {
@ -293,7 +293,8 @@ public class NetworkTopologyWithNodeGroup extends NetworkTopology {
return;
}
}
super.sortByDistance(reader, nodes, nodes.length, seed);
super.sortByDistance(reader, nodes, nodes.length, seed,
randomizeBlockLocationsPerBlock);
}
/** InnerNodeWithNodeGroup represents a switch/router of a data center, rack

View File

@ -105,7 +105,7 @@ public class TestNetworkTopologyWithNodeGroup {
testNodes[2] = dataNodes[3];
testNodes[3] = dataNodes[0];
cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEADBEEF);
testNodes.length, 0xDEADBEEF, false);
assertTrue(testNodes[0] == dataNodes[0]);
assertTrue(testNodes[1] == dataNodes[1]);
assertTrue(testNodes[2] == dataNodes[2]);
@ -117,7 +117,7 @@ public class TestNetworkTopologyWithNodeGroup {
testNodes[2] = dataNodes[1];
testNodes[3] = dataNodes[0];
cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEADBEEF);
testNodes.length, 0xDEADBEEF, false);
assertTrue(testNodes[0] == dataNodes[0]);
assertTrue(testNodes[1] == dataNodes[1]);
@ -127,7 +127,7 @@ public class TestNetworkTopologyWithNodeGroup {
testNodes[2] = dataNodes[2];
testNodes[3] = dataNodes[0];
cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEADBEEF);
testNodes.length, 0xDEADBEEF, false);
assertTrue(testNodes[0] == dataNodes[0]);
assertTrue(testNodes[1] == dataNodes[2]);
@ -137,7 +137,7 @@ public class TestNetworkTopologyWithNodeGroup {
testNodes[2] = dataNodes[2];
testNodes[3] = dataNodes[0];
cluster.sortByDistance(computeNode, testNodes,
testNodes.length, 0xDEADBEEF);
testNodes.length, 0xDEADBEEF, false);
assertTrue(testNodes[0] == dataNodes[0]);
assertTrue(testNodes[1] == dataNodes[2]);
}

View File

@ -311,6 +311,9 @@ Release 2.6.0 - UNRELEASED
datanodes and change datanode to write block replicas using the specified
storage type. (szetszwo)
HDFS-6701. Make seed optional in NetworkTopology#sortByDistance.
(Ashwin Shankar via wang)
OPTIMIZATIONS
HDFS-6690. Deduplicate xattr names in memory. (wang)

View File

@ -214,6 +214,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final String DFS_NAMENODE_MIN_SUPPORTED_DATANODE_VERSION_KEY = "dfs.namenode.min.supported.datanode.version";
public static final String DFS_NAMENODE_MIN_SUPPORTED_DATANODE_VERSION_DEFAULT = "3.0.0-SNAPSHOT";
public static final String DFS_NAMENODE_RANDOMIZE_BLOCK_LOCATIONS_PER_BLOCK = "dfs.namenode.randomize-block-locations-per-block";
public static final boolean DFS_NAMENODE_RANDOMIZE_BLOCK_LOCATIONS_PER_BLOCK_DEFAULT = false;
public static final String DFS_NAMENODE_EDITS_DIR_MINIMUM_KEY = "dfs.namenode.edits.dir.minimum";
public static final int DFS_NAMENODE_EDITS_DIR_MINIMUM_DEFAULT = 1;

View File

@ -345,7 +345,8 @@ public class DatanodeManager {
/** Sort the located blocks by the distance to the target host. */
public void sortLocatedBlocks(final String targethost,
final List<LocatedBlock> locatedblocks) {
final List<LocatedBlock> locatedblocks,
boolean randomizeBlockLocationsPerBlock) {
//sort the blocks
// As it is possible for the separation of node manager and datanode,
// here we should get node but not datanode only .
@ -372,8 +373,8 @@ public class DatanodeManager {
--lastActiveIndex;
}
int activeLen = lastActiveIndex + 1;
networktopology.sortByDistance(client, b.getLocations(), activeLen,
b.getBlock().getBlockId());
networktopology.sortByDistance(client, b.getLocations(), activeLen, b
.getBlock().getBlockId(), randomizeBlockLocationsPerBlock);
}
}

View File

@ -83,6 +83,9 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROU
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RANDOMIZE_BLOCK_LOCATIONS_PER_BLOCK;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RANDOMIZE_BLOCK_LOCATIONS_PER_BLOCK_DEFAULT;
import static org.apache.hadoop.util.Time.now;
import java.io.BufferedWriter;
@ -528,6 +531,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
private final FSImage fsImage;
private boolean randomizeBlockLocationsPerBlock;
/**
* Notify that loading of this FSDirectory is complete, and
* it is imageLoaded for use
@ -838,6 +843,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY,
DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT);
this.randomizeBlockLocationsPerBlock = conf.getBoolean(
DFS_NAMENODE_RANDOMIZE_BLOCK_LOCATIONS_PER_BLOCK,
DFS_NAMENODE_RANDOMIZE_BLOCK_LOCATIONS_PER_BLOCK_DEFAULT);
this.dtSecretManager = createDelegationTokenSecretManager(conf);
this.dir = new FSDirectory(this, conf);
this.snapshotManager = new SnapshotManager(dir);
@ -1700,8 +1709,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
LocatedBlocks blocks = getBlockLocations(src, offset, length, true, true,
true);
if (blocks != null) {
blockManager.getDatanodeManager().sortLocatedBlocks(
clientMachine, blocks.getLocatedBlocks());
blockManager.getDatanodeManager().sortLocatedBlocks(clientMachine,
blocks.getLocatedBlocks(), randomizeBlockLocationsPerBlock);
// lastBlock is not part of getLocatedBlocks(), might need to sort it too
LocatedBlock lastBlock = blocks.getLastLocatedBlock();
@ -1709,8 +1718,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
ArrayList<LocatedBlock> lastBlockList =
Lists.newArrayListWithCapacity(1);
lastBlockList.add(lastBlock);
blockManager.getDatanodeManager().sortLocatedBlocks(
clientMachine, lastBlockList);
blockManager.getDatanodeManager().sortLocatedBlocks(clientMachine,
lastBlockList, randomizeBlockLocationsPerBlock);
}
}
return blocks;

View File

@ -2039,4 +2039,17 @@
</description>
</property>
<property>
<name>dfs.namenode.randomize-block-locations-per-block</name>
<value>false</value>
<description>When fetching replica locations of a block, the replicas
are sorted based on network distance. This configuration parameter
determines whether the replicas at the same network distance are randomly
shuffled. By default, this is false, such that repeated requests for a block's
replicas always result in the same order. This potentially improves page cache
behavior. However, for some network topologies, it is desirable to shuffle this
order for better load balancing.
</description>
</property>
</configuration>

View File

@ -60,7 +60,14 @@ public class TestNetworkTopology {
DFSTestUtil.getDatanodeDescriptor("10.10.10.10", "/d3/r1"),
DFSTestUtil.getDatanodeDescriptor("11.11.11.11", "/d3/r1"),
DFSTestUtil.getDatanodeDescriptor("12.12.12.12", "/d3/r2"),
DFSTestUtil.getDatanodeDescriptor("13.13.13.13", "/d3/r2")
DFSTestUtil.getDatanodeDescriptor("13.13.13.13", "/d3/r2"),
DFSTestUtil.getDatanodeDescriptor("14.14.14.14", "/d4/r1"),
DFSTestUtil.getDatanodeDescriptor("15.15.15.15", "/d4/r1"),
DFSTestUtil.getDatanodeDescriptor("16.16.16.16", "/d4/r1"),
DFSTestUtil.getDatanodeDescriptor("17.17.17.17", "/d4/r1"),
DFSTestUtil.getDatanodeDescriptor("18.18.18.18", "/d4/r1"),
DFSTestUtil.getDatanodeDescriptor("19.19.19.19", "/d4/r1"),
DFSTestUtil.getDatanodeDescriptor("20.20.20.20", "/d4/r1"),
};
for (int i = 0; i < dataNodes.length; i++) {
cluster.add(dataNodes[i]);
@ -107,7 +114,7 @@ public class TestNetworkTopology {
@Test
public void testRacks() throws Exception {
assertEquals(cluster.getNumOfRacks(), 5);
assertEquals(cluster.getNumOfRacks(), 6);
assertTrue(cluster.isOnSameRack(dataNodes[0], dataNodes[1]));
assertFalse(cluster.isOnSameRack(dataNodes[1], dataNodes[2]));
assertTrue(cluster.isOnSameRack(dataNodes[2], dataNodes[3]));
@ -133,7 +140,7 @@ public class TestNetworkTopology {
testNodes[1] = dataNodes[2];
testNodes[2] = dataNodes[0];
cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEADBEEF);
testNodes.length, 0xDEADBEEF, false);
assertTrue(testNodes[0] == dataNodes[0]);
assertTrue(testNodes[1] == dataNodes[1]);
assertTrue(testNodes[2] == dataNodes[2]);
@ -146,7 +153,7 @@ public class TestNetworkTopology {
dtestNodes[3] = dataNodes[9];
dtestNodes[4] = dataNodes[10];
cluster.sortByDistance(dataNodes[8], dtestNodes,
dtestNodes.length - 2, 0xDEADBEEF);
dtestNodes.length - 2, 0xDEADBEEF, false);
assertTrue(dtestNodes[0] == dataNodes[8]);
assertTrue(dtestNodes[1] == dataNodes[11]);
assertTrue(dtestNodes[2] == dataNodes[12]);
@ -158,7 +165,7 @@ public class TestNetworkTopology {
testNodes[1] = dataNodes[3];
testNodes[2] = dataNodes[0];
cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEADBEEF);
testNodes.length, 0xDEADBEEF, false);
assertTrue(testNodes[0] == dataNodes[0]);
assertTrue(testNodes[1] == dataNodes[1]);
assertTrue(testNodes[2] == dataNodes[3]);
@ -168,7 +175,7 @@ public class TestNetworkTopology {
testNodes[1] = dataNodes[3];
testNodes[2] = dataNodes[1];
cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEADBEEF);
testNodes.length, 0xDEADBEEF, false);
assertTrue(testNodes[0] == dataNodes[1]);
assertTrue(testNodes[1] == dataNodes[3]);
assertTrue(testNodes[2] == dataNodes[5]);
@ -178,7 +185,7 @@ public class TestNetworkTopology {
testNodes[1] = dataNodes[5];
testNodes[2] = dataNodes[3];
cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEADBEEF);
testNodes.length, 0xDEADBEEF, false);
assertTrue(testNodes[0] == dataNodes[1]);
assertTrue(testNodes[1] == dataNodes[3]);
assertTrue(testNodes[2] == dataNodes[5]);
@ -188,7 +195,7 @@ public class TestNetworkTopology {
testNodes[1] = dataNodes[5];
testNodes[2] = dataNodes[3];
cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEAD);
testNodes.length, 0xDEAD, false);
// sortByDistance does not take the "data center" layer into consideration
// and it doesn't sort by getDistance, so 1, 5, 3 is also valid here
assertTrue(testNodes[0] == dataNodes[1]);
@ -204,7 +211,27 @@ public class TestNetworkTopology {
testNodes[1] = dataNodes[6];
testNodes[2] = dataNodes[7];
cluster.sortByDistance(dataNodes[i], testNodes,
testNodes.length, 0xBEADED+i);
testNodes.length, 0xBEADED+i, false);
if (first == null) {
first = testNodes[0];
} else {
if (first != testNodes[0]) {
foundRandom = true;
break;
}
}
}
assertTrue("Expected to find a different first location", foundRandom);
// Array of rack local nodes with randomizeBlockLocationsPerBlock set to
// true
// Expect random order of block locations for same block
first = null;
for (int i = 1; i <= 4; i++) {
testNodes[0] = dataNodes[13];
testNodes[1] = dataNodes[14];
testNodes[2] = dataNodes[15];
cluster.sortByDistance(dataNodes[15 + i], testNodes, testNodes.length,
0xBEADED, true);
if (first == null) {
first = testNodes[0];
} else {