HDFS-6460. Ignore stale and decommissioned nodes in NetworkTopology#sortByDistance. Contributed by Yongjun Zhang.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1601535 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Wang 2014-06-10 00:39:20 +00:00
parent c94f2cec3a
commit 123c563fe6
7 changed files with 78 additions and 23 deletions

View File

@ -883,15 +883,16 @@ public class NetworkTopology {
* @param seed Used to seed the pseudo-random generator that randomizes the
* set of nodes at each network distance.
*/
public void sortByDistance(Node reader, Node[] nodes, long seed) {
public void sortByDistance(Node reader, Node[] nodes,
int activeLen, long seed) {
/** Sort weights for the nodes array */
int[] weights = new int[nodes.length];
for (int i=0; i<nodes.length; i++) {
int[] weights = new int[activeLen];
for (int i=0; i<activeLen; i++) {
weights[i] = getWeight(reader, nodes[i]);
}
// Add weight/node pairs to a TreeMap to sort
TreeMap<Integer, List<Node>> tree = new TreeMap<Integer, List<Node>>();
for (int i=0; i<nodes.length; i++) {
for (int i=0; i<activeLen; i++) {
int weight = weights[i];
Node node = nodes[i];
List<Node> list = tree.get(weight);
@ -917,7 +918,7 @@ public class NetworkTopology {
}
}
}
Preconditions.checkState(idx == nodes.length,
Preconditions.checkState(idx == activeLen,
"Sorted the wrong number of nodes!");
}
}

View File

@ -279,7 +279,8 @@ public class NetworkTopologyWithNodeGroup extends NetworkTopology {
* set of nodes at each network distance.
*/
@Override
public void sortByDistance( Node reader, Node[] nodes, long seed) {
public void sortByDistance( Node reader, Node[] nodes,
int activeLen, long seed) {
// If reader is not a datanode (not in NetworkTopology tree), we need to
// replace this reader with a sibling leaf node in tree.
if (reader != null && !this.contains(reader)) {
@ -292,7 +293,7 @@ public class NetworkTopologyWithNodeGroup extends NetworkTopology {
return;
}
}
super.sortByDistance(reader, nodes, seed);
super.sortByDistance(reader, nodes, nodes.length, seed);
}
/** InnerNodeWithNodeGroup represents a switch/router of a data center, rack

View File

@ -104,7 +104,8 @@ public class TestNetworkTopologyWithNodeGroup {
testNodes[1] = dataNodes[2];
testNodes[2] = dataNodes[3];
testNodes[3] = dataNodes[0];
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEADBEEF);
assertTrue(testNodes[0] == dataNodes[0]);
assertTrue(testNodes[1] == dataNodes[1]);
assertTrue(testNodes[2] == dataNodes[2]);
@ -115,7 +116,8 @@ public class TestNetworkTopologyWithNodeGroup {
testNodes[1] = dataNodes[4];
testNodes[2] = dataNodes[1];
testNodes[3] = dataNodes[0];
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEADBEEF);
assertTrue(testNodes[0] == dataNodes[0]);
assertTrue(testNodes[1] == dataNodes[1]);
@ -124,7 +126,8 @@ public class TestNetworkTopologyWithNodeGroup {
testNodes[1] = dataNodes[3];
testNodes[2] = dataNodes[2];
testNodes[3] = dataNodes[0];
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEADBEEF);
assertTrue(testNodes[0] == dataNodes[0]);
assertTrue(testNodes[1] == dataNodes[2]);
@ -133,7 +136,8 @@ public class TestNetworkTopologyWithNodeGroup {
testNodes[1] = dataNodes[7];
testNodes[2] = dataNodes[2];
testNodes[3] = dataNodes[0];
cluster.sortByDistance(computeNode, testNodes, 0xDEADBEEF);
cluster.sortByDistance(computeNode, testNodes,
testNodes.length, 0xDEADBEEF);
assertTrue(testNodes[0] == dataNodes[0]);
assertTrue(testNodes[1] == dataNodes[2]);
}

View File

@ -489,6 +489,9 @@ Release 2.5.0 - UNRELEASED
HDFS-6214. Webhdfs has poor throughput for files >2GB (daryn)
HDFS-6460. Ignore stale and decommissioned nodes in
NetworkTopology#sortByDistance. (Yongjun Zhang via wang)
BUG FIXES
HDFS-6112. NFS Gateway docs are incorrect for allowed hosts configuration.

View File

@ -26,7 +26,6 @@ import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
import org.apache.hadoop.security.token.Token;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
/**

View File

@ -331,6 +331,18 @@ public class DatanodeManager {
return heartbeatManager;
}
private boolean isInactive(DatanodeInfo datanode) {
if (datanode.isDecommissioned()) {
return true;
}
if (avoidStaleDataNodesForRead) {
return datanode.isStale(staleInterval);
}
return false;
}
/** Sort the located blocks by the distance to the target host. */
public void sortLocatedBlocks(final String targethost,
final List<LocatedBlock> locatedblocks) {
@ -351,10 +363,17 @@ public class DatanodeManager {
DFSUtil.DECOM_COMPARATOR;
for (LocatedBlock b : locatedblocks) {
networktopology.sortByDistance(client, b.getLocations(), b
.getBlock().getBlockId());
DatanodeInfo[] di = b.getLocations();
// Move decommissioned/stale datanodes to the bottom
Arrays.sort(b.getLocations(), comparator);
Arrays.sort(di, comparator);
int lastActiveIndex = di.length - 1;
while (lastActiveIndex > 0 && isInactive(di[lastActiveIndex])) {
--lastActiveIndex;
}
int activeLen = lastActiveIndex + 1;
networktopology.sortByDistance(client, b.getLocations(), activeLen,
b.getBlock().getBlockId());
}
}

View File

@ -55,11 +55,18 @@ public class TestNetworkTopology {
DFSTestUtil.getDatanodeDescriptor("5.5.5.5", "/d1/r2"),
DFSTestUtil.getDatanodeDescriptor("6.6.6.6", "/d2/r3"),
DFSTestUtil.getDatanodeDescriptor("7.7.7.7", "/d2/r3"),
DFSTestUtil.getDatanodeDescriptor("8.8.8.8", "/d2/r3")
DFSTestUtil.getDatanodeDescriptor("8.8.8.8", "/d2/r3"),
DFSTestUtil.getDatanodeDescriptor("9.9.9.9", "/d3/r1"),
DFSTestUtil.getDatanodeDescriptor("10.10.10.10", "/d3/r1"),
DFSTestUtil.getDatanodeDescriptor("11.11.11.11", "/d3/r1"),
DFSTestUtil.getDatanodeDescriptor("12.12.12.12", "/d3/r2"),
DFSTestUtil.getDatanodeDescriptor("13.13.13.13", "/d3/r2")
};
for (int i = 0; i < dataNodes.length; i++) {
cluster.add(dataNodes[i]);
}
dataNodes[9].setDecommissioned();
dataNodes[10].setDecommissioned();
}
@Test
@ -100,7 +107,7 @@ public class TestNetworkTopology {
@Test
public void testRacks() throws Exception {
assertEquals(cluster.getNumOfRacks(), 3);
assertEquals(cluster.getNumOfRacks(), 5);
assertTrue(cluster.isOnSameRack(dataNodes[0], dataNodes[1]));
assertFalse(cluster.isOnSameRack(dataNodes[1], dataNodes[2]));
assertTrue(cluster.isOnSameRack(dataNodes[2], dataNodes[3]));
@ -125,16 +132,33 @@ public class TestNetworkTopology {
testNodes[0] = dataNodes[1];
testNodes[1] = dataNodes[2];
testNodes[2] = dataNodes[0];
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEADBEEF);
assertTrue(testNodes[0] == dataNodes[0]);
assertTrue(testNodes[1] == dataNodes[1]);
assertTrue(testNodes[2] == dataNodes[2]);
// array contains both local node & local rack node & decommissioned node
DatanodeDescriptor[] dtestNodes = new DatanodeDescriptor[5];
dtestNodes[0] = dataNodes[8];
dtestNodes[1] = dataNodes[12];
dtestNodes[2] = dataNodes[11];
dtestNodes[3] = dataNodes[9];
dtestNodes[4] = dataNodes[10];
cluster.sortByDistance(dataNodes[8], dtestNodes,
dtestNodes.length - 2, 0xDEADBEEF);
assertTrue(dtestNodes[0] == dataNodes[8]);
assertTrue(dtestNodes[1] == dataNodes[11]);
assertTrue(dtestNodes[2] == dataNodes[12]);
assertTrue(dtestNodes[3] == dataNodes[9]);
assertTrue(dtestNodes[4] == dataNodes[10]);
// array contains local node
testNodes[0] = dataNodes[1];
testNodes[1] = dataNodes[3];
testNodes[2] = dataNodes[0];
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEADBEEF);
assertTrue(testNodes[0] == dataNodes[0]);
assertTrue(testNodes[1] == dataNodes[1]);
assertTrue(testNodes[2] == dataNodes[3]);
@ -143,7 +167,8 @@ public class TestNetworkTopology {
testNodes[0] = dataNodes[5];
testNodes[1] = dataNodes[3];
testNodes[2] = dataNodes[1];
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEADBEEF);
assertTrue(testNodes[0] == dataNodes[1]);
assertTrue(testNodes[1] == dataNodes[3]);
assertTrue(testNodes[2] == dataNodes[5]);
@ -152,7 +177,8 @@ public class TestNetworkTopology {
testNodes[0] = dataNodes[1];
testNodes[1] = dataNodes[5];
testNodes[2] = dataNodes[3];
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEADBEEF);
assertTrue(testNodes[0] == dataNodes[1]);
assertTrue(testNodes[1] == dataNodes[3]);
assertTrue(testNodes[2] == dataNodes[5]);
@ -161,7 +187,8 @@ public class TestNetworkTopology {
testNodes[0] = dataNodes[1];
testNodes[1] = dataNodes[5];
testNodes[2] = dataNodes[3];
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEAD);
cluster.sortByDistance(dataNodes[0], testNodes,
testNodes.length, 0xDEAD);
// sortByDistance does not take the "data center" layer into consideration
// and it doesn't sort by getDistance, so 1, 5, 3 is also valid here
assertTrue(testNodes[0] == dataNodes[1]);
@ -176,7 +203,8 @@ public class TestNetworkTopology {
testNodes[0] = dataNodes[5];
testNodes[1] = dataNodes[6];
testNodes[2] = dataNodes[7];
cluster.sortByDistance(dataNodes[i], testNodes, 0xBEADED+i);
cluster.sortByDistance(dataNodes[i], testNodes,
testNodes.length, 0xBEADED+i);
if (first == null) {
first = testNodes[0];
} else {