HDFS-3990. NN's health report has severe performance problems (daryn)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1407333 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Daryn Sharp 2012-11-09 00:53:11 +00:00
parent 0f7a4337ac
commit be94bf6b57
4 changed files with 117 additions and 42 deletions

View File

@ -1960,6 +1960,8 @@ Release 0.23.5 - UNRELEASED
HDFS-4075. Reduce recommissioning overhead (Kihwal Lee via daryn) HDFS-4075. Reduce recommissioning overhead (Kihwal Lee via daryn)
HDFS-3990. NN's health report has severe performance problems (daryn)
BUG FIXES BUG FIXES
HDFS-3829. TestHftpURLTimeouts fails intermittently with JDK7 (Trevor HDFS-3829. TestHftpURLTimeouts fails intermittently with JDK7 (Trevor

View File

@ -38,7 +38,8 @@ public class DatanodeID implements Comparable<DatanodeID> {
public static final DatanodeID[] EMPTY_ARRAY = {}; public static final DatanodeID[] EMPTY_ARRAY = {};
private String ipAddr; // IP address private String ipAddr; // IP address
private String hostName; // hostname private String hostName; // hostname claimed by datanode
private String peerHostName; // hostname from the actual connection
private String storageID; // unique per cluster storageID private String storageID; // unique per cluster storageID
private int xferPort; // data streaming port private int xferPort; // data streaming port
private int infoPort; // info server port private int infoPort; // info server port
@ -51,6 +52,7 @@ public DatanodeID(DatanodeID from) {
from.getXferPort(), from.getXferPort(),
from.getInfoPort(), from.getInfoPort(),
from.getIpcPort()); from.getIpcPort());
this.peerHostName = from.getPeerHostName();
} }
/** /**
@ -76,6 +78,10 @@ public void setIpAddr(String ipAddr) {
this.ipAddr = ipAddr; this.ipAddr = ipAddr;
} }
public void setPeerHostName(String peerHostName) {
this.peerHostName = peerHostName;
}
public void setStorageID(String storageID) { public void setStorageID(String storageID) {
this.storageID = storageID; this.storageID = storageID;
} }
@ -94,6 +100,13 @@ public String getHostName() {
return hostName; return hostName;
} }
/**
* @return hostname from the actual connection
*/
public String getPeerHostName() {
return peerHostName;
}
/** /**
* @return IP:xferPort string * @return IP:xferPort string
*/ */
@ -202,6 +215,7 @@ public String toString() {
public void updateRegInfo(DatanodeID nodeReg) { public void updateRegInfo(DatanodeID nodeReg) {
ipAddr = nodeReg.getIpAddr(); ipAddr = nodeReg.getIpAddr();
hostName = nodeReg.getHostName(); hostName = nodeReg.getHostName();
peerHostName = nodeReg.getPeerHostName();
xferPort = nodeReg.getXferPort(); xferPort = nodeReg.getXferPort();
infoPort = nodeReg.getInfoPort(); infoPort = nodeReg.getInfoPort();
ipcPort = nodeReg.getIpcPort(); ipcPort = nodeReg.getIpcPort();

View File

@ -540,28 +540,16 @@ private void removeDecomNodeFromList(final List<DatanodeDescriptor> nodeList) {
private static boolean checkInList(final DatanodeID node, private static boolean checkInList(final DatanodeID node,
final Set<String> hostsList, final Set<String> hostsList,
final boolean isExcludeList) { final boolean isExcludeList) {
final InetAddress iaddr;
try {
iaddr = InetAddress.getByName(node.getIpAddr());
} catch (UnknownHostException e) {
LOG.warn("Unknown IP: " + node.getIpAddr(), e);
return isExcludeList;
}
// if include list is empty, host is in include list // if include list is empty, host is in include list
if ( (!isExcludeList) && (hostsList.isEmpty()) ){ if ( (!isExcludeList) && (hostsList.isEmpty()) ){
return true; return true;
} }
return // compare ipaddress(:port) for (String name : getNodeNamesForHostFiltering(node)) {
(hostsList.contains(iaddr.getHostAddress().toString())) if (hostsList.contains(name)) {
|| (hostsList.contains(iaddr.getHostAddress().toString() + ":" return true;
+ node.getXferPort())) }
// compare hostname(:port) }
|| (hostsList.contains(iaddr.getHostName())) return false;
|| (hostsList.contains(iaddr.getHostName() + ":" + node.getXferPort()))
|| ((node instanceof DatanodeInfo) && hostsList
.contains(((DatanodeInfo) node).getHostName()));
} }
/** /**
@ -644,16 +632,20 @@ private String newStorageID() {
*/ */
public void registerDatanode(DatanodeRegistration nodeReg) public void registerDatanode(DatanodeRegistration nodeReg)
throws DisallowedDatanodeException { throws DisallowedDatanodeException {
String dnAddress = Server.getRemoteAddress(); InetAddress dnAddress = Server.getRemoteIp();
if (dnAddress == null) { if (dnAddress != null) {
// Mostly called inside an RPC. // Mostly called inside an RPC, update ip and peer hostname
// But if not, use address passed by the data-node. String hostname = dnAddress.getHostName();
dnAddress = nodeReg.getIpAddr(); String ip = dnAddress.getHostAddress();
if (hostname.equals(ip)) {
LOG.warn("Unresolved datanode registration from " + ip);
throw new DisallowedDatanodeException(nodeReg);
}
// update node registration with the ip and hostname from rpc request
nodeReg.setIpAddr(ip);
nodeReg.setPeerHostName(hostname);
} }
// Update the IP to the address of the RPC request that is
// registering this datanode.
nodeReg.setIpAddr(dnAddress);
nodeReg.setExportedKeys(blockManager.getBlockKeys()); nodeReg.setExportedKeys(blockManager.getBlockKeys());
// Checks if the node is not on the hosts list. If it is not, then // Checks if the node is not on the hosts list. If it is not, then
@ -1033,19 +1025,8 @@ public List<DatanodeDescriptor> getDatanodeListForReport(
if ( (isDead && listDeadNodes) || (!isDead && listLiveNodes) ) { if ( (isDead && listDeadNodes) || (!isDead && listLiveNodes) ) {
nodes.add(dn); nodes.add(dn);
} }
// Remove any nodes we know about from the map for (String name : getNodeNamesForHostFiltering(dn)) {
try { mustList.remove(name);
InetAddress inet = InetAddress.getByName(dn.getIpAddr());
// compare hostname(:port)
mustList.remove(inet.getHostName());
mustList.remove(inet.getHostName()+":"+dn.getXferPort());
// compare ipaddress(:port)
mustList.remove(inet.getHostAddress().toString());
mustList.remove(inet.getHostAddress().toString()+ ":" +dn.getXferPort());
} catch (UnknownHostException e) {
mustList.remove(dn.getName());
mustList.remove(dn.getIpAddr());
LOG.warn(e);
} }
} }
} }
@ -1066,6 +1047,25 @@ public List<DatanodeDescriptor> getDatanodeListForReport(
return nodes; return nodes;
} }
private static List<String> getNodeNamesForHostFiltering(DatanodeID node) {
String ip = node.getIpAddr();
String regHostName = node.getHostName();
int xferPort = node.getXferPort();
List<String> names = new ArrayList<String>();
names.add(ip);
names.add(ip + ":" + xferPort);
names.add(regHostName);
names.add(regHostName + ":" + xferPort);
String peerHostName = node.getPeerHostName();
if (peerHostName != null) {
names.add(peerHostName);
names.add(peerHostName + ":" + xferPort);
}
return names;
}
private void setDatanodeDead(DatanodeDescriptor node) { private void setDatanodeDead(DatanodeDescriptor node) {
node.setLastUpdate(0); node.setLastUpdate(0);
} }

View File

@ -17,12 +17,12 @@
*/ */
package org.apache.hadoop.hdfs; package org.apache.hadoop.hdfs;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.*;
import static org.junit.Assert.fail;
import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock; import static org.mockito.Mockito.mock;
import java.net.InetSocketAddress; import java.net.InetSocketAddress;
import java.security.Permission;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -31,6 +31,7 @@
import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
import org.apache.hadoop.hdfs.server.common.IncorrectVersionException; import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
import org.apache.hadoop.hdfs.server.common.StorageInfo; import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
@ -46,6 +47,64 @@ public class TestDatanodeRegistration {
public static final Log LOG = LogFactory.getLog(TestDatanodeRegistration.class); public static final Log LOG = LogFactory.getLog(TestDatanodeRegistration.class);
private static class MonitorDNS extends SecurityManager {
int lookups = 0;
@Override
public void checkPermission(Permission perm) {}
@Override
public void checkConnect(String host, int port) {
if (port == -1) {
lookups++;
}
}
}
/**
* Ensure the datanode manager does not do host lookup after registration,
* especially for node reports.
* @throws Exception
*/
@Test
public void testDNSLookups() throws Exception {
MonitorDNS sm = new MonitorDNS();
System.setSecurityManager(sm);
MiniDFSCluster cluster = null;
try {
HdfsConfiguration conf = new HdfsConfiguration();
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(8).build();
cluster.waitActive();
int initialLookups = sm.lookups;
assertTrue("dns security manager is active", initialLookups != 0);
DatanodeManager dm =
cluster.getNamesystem().getBlockManager().getDatanodeManager();
// make sure no lookups occur
dm.refreshNodes(conf);
assertEquals(initialLookups, sm.lookups);
dm.refreshNodes(conf);
assertEquals(initialLookups, sm.lookups);
// ensure none of the reports trigger lookups
dm.getDatanodeListForReport(DatanodeReportType.ALL);
assertEquals(initialLookups, sm.lookups);
dm.getDatanodeListForReport(DatanodeReportType.LIVE);
assertEquals(initialLookups, sm.lookups);
dm.getDatanodeListForReport(DatanodeReportType.DEAD);
assertEquals(initialLookups, sm.lookups);
} finally {
if (cluster != null) {
cluster.shutdown();
}
System.setSecurityManager(null);
}
}
/** /**
* Regression test for HDFS-894 ensures that, when datanodes * Regression test for HDFS-894 ensures that, when datanodes
* are restarted, the new IPC port is registered with the * are restarted, the new IPC port is registered with the