HDFS-6460. Ignore stale and decommissioned nodes in NetworkTopology#sortByDistance. Contributed by Yongjun Zhang.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1601535 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c94f2cec3a
commit
123c563fe6
|
@ -883,15 +883,16 @@ public class NetworkTopology {
|
|||
* @param seed Used to seed the pseudo-random generator that randomizes the
|
||||
* set of nodes at each network distance.
|
||||
*/
|
||||
public void sortByDistance(Node reader, Node[] nodes, long seed) {
|
||||
public void sortByDistance(Node reader, Node[] nodes,
|
||||
int activeLen, long seed) {
|
||||
/** Sort weights for the nodes array */
|
||||
int[] weights = new int[nodes.length];
|
||||
for (int i=0; i<nodes.length; i++) {
|
||||
int[] weights = new int[activeLen];
|
||||
for (int i=0; i<activeLen; i++) {
|
||||
weights[i] = getWeight(reader, nodes[i]);
|
||||
}
|
||||
// Add weight/node pairs to a TreeMap to sort
|
||||
TreeMap<Integer, List<Node>> tree = new TreeMap<Integer, List<Node>>();
|
||||
for (int i=0; i<nodes.length; i++) {
|
||||
for (int i=0; i<activeLen; i++) {
|
||||
int weight = weights[i];
|
||||
Node node = nodes[i];
|
||||
List<Node> list = tree.get(weight);
|
||||
|
@ -917,7 +918,7 @@ public class NetworkTopology {
|
|||
}
|
||||
}
|
||||
}
|
||||
Preconditions.checkState(idx == nodes.length,
|
||||
Preconditions.checkState(idx == activeLen,
|
||||
"Sorted the wrong number of nodes!");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -279,7 +279,8 @@ public class NetworkTopologyWithNodeGroup extends NetworkTopology {
|
|||
* set of nodes at each network distance.
|
||||
*/
|
||||
@Override
|
||||
public void sortByDistance( Node reader, Node[] nodes, long seed) {
|
||||
public void sortByDistance( Node reader, Node[] nodes,
|
||||
int activeLen, long seed) {
|
||||
// If reader is not a datanode (not in NetworkTopology tree), we need to
|
||||
// replace this reader with a sibling leaf node in tree.
|
||||
if (reader != null && !this.contains(reader)) {
|
||||
|
@ -292,7 +293,7 @@ public class NetworkTopologyWithNodeGroup extends NetworkTopology {
|
|||
return;
|
||||
}
|
||||
}
|
||||
super.sortByDistance(reader, nodes, seed);
|
||||
super.sortByDistance(reader, nodes, nodes.length, seed);
|
||||
}
|
||||
|
||||
/** InnerNodeWithNodeGroup represents a switch/router of a data center, rack
|
||||
|
|
|
@ -104,7 +104,8 @@ public class TestNetworkTopologyWithNodeGroup {
|
|||
testNodes[1] = dataNodes[2];
|
||||
testNodes[2] = dataNodes[3];
|
||||
testNodes[3] = dataNodes[0];
|
||||
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
|
||||
cluster.sortByDistance(dataNodes[0], testNodes,
|
||||
testNodes.length, 0xDEADBEEF);
|
||||
assertTrue(testNodes[0] == dataNodes[0]);
|
||||
assertTrue(testNodes[1] == dataNodes[1]);
|
||||
assertTrue(testNodes[2] == dataNodes[2]);
|
||||
|
@ -115,7 +116,8 @@ public class TestNetworkTopologyWithNodeGroup {
|
|||
testNodes[1] = dataNodes[4];
|
||||
testNodes[2] = dataNodes[1];
|
||||
testNodes[3] = dataNodes[0];
|
||||
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
|
||||
cluster.sortByDistance(dataNodes[0], testNodes,
|
||||
testNodes.length, 0xDEADBEEF);
|
||||
assertTrue(testNodes[0] == dataNodes[0]);
|
||||
assertTrue(testNodes[1] == dataNodes[1]);
|
||||
|
||||
|
@ -124,7 +126,8 @@ public class TestNetworkTopologyWithNodeGroup {
|
|||
testNodes[1] = dataNodes[3];
|
||||
testNodes[2] = dataNodes[2];
|
||||
testNodes[3] = dataNodes[0];
|
||||
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
|
||||
cluster.sortByDistance(dataNodes[0], testNodes,
|
||||
testNodes.length, 0xDEADBEEF);
|
||||
assertTrue(testNodes[0] == dataNodes[0]);
|
||||
assertTrue(testNodes[1] == dataNodes[2]);
|
||||
|
||||
|
@ -133,7 +136,8 @@ public class TestNetworkTopologyWithNodeGroup {
|
|||
testNodes[1] = dataNodes[7];
|
||||
testNodes[2] = dataNodes[2];
|
||||
testNodes[3] = dataNodes[0];
|
||||
cluster.sortByDistance(computeNode, testNodes, 0xDEADBEEF);
|
||||
cluster.sortByDistance(computeNode, testNodes,
|
||||
testNodes.length, 0xDEADBEEF);
|
||||
assertTrue(testNodes[0] == dataNodes[0]);
|
||||
assertTrue(testNodes[1] == dataNodes[2]);
|
||||
}
|
||||
|
|
|
@ -489,6 +489,9 @@ Release 2.5.0 - UNRELEASED
|
|||
|
||||
HDFS-6214. Webhdfs has poor throughput for files >2GB (daryn)
|
||||
|
||||
HDFS-6460. Ignore stale and decommissioned nodes in
|
||||
NetworkTopology#sortByDistance. (Yongjun Zhang via wang)
|
||||
|
||||
BUG FIXES
|
||||
|
||||
HDFS-6112. NFS Gateway docs are incorrect for allowed hosts configuration.
|
||||
|
|
|
@ -26,7 +26,6 @@ import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
|||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
|
||||
import org.apache.hadoop.security.token.Token;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
|
|
|
@ -331,6 +331,18 @@ public class DatanodeManager {
|
|||
return heartbeatManager;
|
||||
}
|
||||
|
||||
private boolean isInactive(DatanodeInfo datanode) {
|
||||
if (datanode.isDecommissioned()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (avoidStaleDataNodesForRead) {
|
||||
return datanode.isStale(staleInterval);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Sort the located blocks by the distance to the target host. */
|
||||
public void sortLocatedBlocks(final String targethost,
|
||||
final List<LocatedBlock> locatedblocks) {
|
||||
|
@ -351,10 +363,17 @@ public class DatanodeManager {
|
|||
DFSUtil.DECOM_COMPARATOR;
|
||||
|
||||
for (LocatedBlock b : locatedblocks) {
|
||||
networktopology.sortByDistance(client, b.getLocations(), b
|
||||
.getBlock().getBlockId());
|
||||
DatanodeInfo[] di = b.getLocations();
|
||||
// Move decommissioned/stale datanodes to the bottom
|
||||
Arrays.sort(b.getLocations(), comparator);
|
||||
Arrays.sort(di, comparator);
|
||||
|
||||
int lastActiveIndex = di.length - 1;
|
||||
while (lastActiveIndex > 0 && isInactive(di[lastActiveIndex])) {
|
||||
--lastActiveIndex;
|
||||
}
|
||||
int activeLen = lastActiveIndex + 1;
|
||||
networktopology.sortByDistance(client, b.getLocations(), activeLen,
|
||||
b.getBlock().getBlockId());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -55,11 +55,18 @@ public class TestNetworkTopology {
|
|||
DFSTestUtil.getDatanodeDescriptor("5.5.5.5", "/d1/r2"),
|
||||
DFSTestUtil.getDatanodeDescriptor("6.6.6.6", "/d2/r3"),
|
||||
DFSTestUtil.getDatanodeDescriptor("7.7.7.7", "/d2/r3"),
|
||||
DFSTestUtil.getDatanodeDescriptor("8.8.8.8", "/d2/r3")
|
||||
DFSTestUtil.getDatanodeDescriptor("8.8.8.8", "/d2/r3"),
|
||||
DFSTestUtil.getDatanodeDescriptor("9.9.9.9", "/d3/r1"),
|
||||
DFSTestUtil.getDatanodeDescriptor("10.10.10.10", "/d3/r1"),
|
||||
DFSTestUtil.getDatanodeDescriptor("11.11.11.11", "/d3/r1"),
|
||||
DFSTestUtil.getDatanodeDescriptor("12.12.12.12", "/d3/r2"),
|
||||
DFSTestUtil.getDatanodeDescriptor("13.13.13.13", "/d3/r2")
|
||||
};
|
||||
for (int i = 0; i < dataNodes.length; i++) {
|
||||
cluster.add(dataNodes[i]);
|
||||
}
|
||||
dataNodes[9].setDecommissioned();
|
||||
dataNodes[10].setDecommissioned();
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -100,7 +107,7 @@ public class TestNetworkTopology {
|
|||
|
||||
@Test
|
||||
public void testRacks() throws Exception {
|
||||
assertEquals(cluster.getNumOfRacks(), 3);
|
||||
assertEquals(cluster.getNumOfRacks(), 5);
|
||||
assertTrue(cluster.isOnSameRack(dataNodes[0], dataNodes[1]));
|
||||
assertFalse(cluster.isOnSameRack(dataNodes[1], dataNodes[2]));
|
||||
assertTrue(cluster.isOnSameRack(dataNodes[2], dataNodes[3]));
|
||||
|
@ -125,16 +132,33 @@ public class TestNetworkTopology {
|
|||
testNodes[0] = dataNodes[1];
|
||||
testNodes[1] = dataNodes[2];
|
||||
testNodes[2] = dataNodes[0];
|
||||
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
|
||||
cluster.sortByDistance(dataNodes[0], testNodes,
|
||||
testNodes.length, 0xDEADBEEF);
|
||||
assertTrue(testNodes[0] == dataNodes[0]);
|
||||
assertTrue(testNodes[1] == dataNodes[1]);
|
||||
assertTrue(testNodes[2] == dataNodes[2]);
|
||||
|
||||
// array contains both local node & local rack node & decommissioned node
|
||||
DatanodeDescriptor[] dtestNodes = new DatanodeDescriptor[5];
|
||||
dtestNodes[0] = dataNodes[8];
|
||||
dtestNodes[1] = dataNodes[12];
|
||||
dtestNodes[2] = dataNodes[11];
|
||||
dtestNodes[3] = dataNodes[9];
|
||||
dtestNodes[4] = dataNodes[10];
|
||||
cluster.sortByDistance(dataNodes[8], dtestNodes,
|
||||
dtestNodes.length - 2, 0xDEADBEEF);
|
||||
assertTrue(dtestNodes[0] == dataNodes[8]);
|
||||
assertTrue(dtestNodes[1] == dataNodes[11]);
|
||||
assertTrue(dtestNodes[2] == dataNodes[12]);
|
||||
assertTrue(dtestNodes[3] == dataNodes[9]);
|
||||
assertTrue(dtestNodes[4] == dataNodes[10]);
|
||||
|
||||
// array contains local node
|
||||
testNodes[0] = dataNodes[1];
|
||||
testNodes[1] = dataNodes[3];
|
||||
testNodes[2] = dataNodes[0];
|
||||
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
|
||||
cluster.sortByDistance(dataNodes[0], testNodes,
|
||||
testNodes.length, 0xDEADBEEF);
|
||||
assertTrue(testNodes[0] == dataNodes[0]);
|
||||
assertTrue(testNodes[1] == dataNodes[1]);
|
||||
assertTrue(testNodes[2] == dataNodes[3]);
|
||||
|
@ -143,7 +167,8 @@ public class TestNetworkTopology {
|
|||
testNodes[0] = dataNodes[5];
|
||||
testNodes[1] = dataNodes[3];
|
||||
testNodes[2] = dataNodes[1];
|
||||
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
|
||||
cluster.sortByDistance(dataNodes[0], testNodes,
|
||||
testNodes.length, 0xDEADBEEF);
|
||||
assertTrue(testNodes[0] == dataNodes[1]);
|
||||
assertTrue(testNodes[1] == dataNodes[3]);
|
||||
assertTrue(testNodes[2] == dataNodes[5]);
|
||||
|
@ -152,7 +177,8 @@ public class TestNetworkTopology {
|
|||
testNodes[0] = dataNodes[1];
|
||||
testNodes[1] = dataNodes[5];
|
||||
testNodes[2] = dataNodes[3];
|
||||
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
|
||||
cluster.sortByDistance(dataNodes[0], testNodes,
|
||||
testNodes.length, 0xDEADBEEF);
|
||||
assertTrue(testNodes[0] == dataNodes[1]);
|
||||
assertTrue(testNodes[1] == dataNodes[3]);
|
||||
assertTrue(testNodes[2] == dataNodes[5]);
|
||||
|
@ -161,7 +187,8 @@ public class TestNetworkTopology {
|
|||
testNodes[0] = dataNodes[1];
|
||||
testNodes[1] = dataNodes[5];
|
||||
testNodes[2] = dataNodes[3];
|
||||
cluster.sortByDistance(dataNodes[0], testNodes, 0xDEAD);
|
||||
cluster.sortByDistance(dataNodes[0], testNodes,
|
||||
testNodes.length, 0xDEAD);
|
||||
// sortByDistance does not take the "data center" layer into consideration
|
||||
// and it doesn't sort by getDistance, so 1, 5, 3 is also valid here
|
||||
assertTrue(testNodes[0] == dataNodes[1]);
|
||||
|
@ -176,7 +203,8 @@ public class TestNetworkTopology {
|
|||
testNodes[0] = dataNodes[5];
|
||||
testNodes[1] = dataNodes[6];
|
||||
testNodes[2] = dataNodes[7];
|
||||
cluster.sortByDistance(dataNodes[i], testNodes, 0xBEADED+i);
|
||||
cluster.sortByDistance(dataNodes[i], testNodes,
|
||||
testNodes.length, 0xBEADED+i);
|
||||
if (first == null) {
|
||||
first = testNodes[0];
|
||||
} else {
|
||||
|
|
Loading…
Reference in New Issue