From 9bb2801e8ce1e6298241944a65f593f555ae10e2 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Fri, 5 Oct 2018 15:52:46 -0500 Subject: [PATCH] YARN-4254. ApplicationAttempt stuck for ever due to UnknownHostException. Contributed by Bibin A Chundatt --- .../hadoop/yarn/conf/YarnConfiguration.java | 9 +++- .../src/main/resources/yarn-default.xml | 5 +++ .../ResourceTrackerService.java | 23 ++++++++++ .../TestResourceTrackerService.java | 45 +++++++++++++++++++ 4 files changed, 81 insertions(+), 1 deletion(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 95861d7fbfb..6488ebfc4ec 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -541,7 +541,14 @@ public class YarnConfiguration extends Configuration { public static final String RM_RESOURCE_TRACKER_CLIENT_THREAD_COUNT = RM_PREFIX + "resource-tracker.client.thread-count"; public static final int DEFAULT_RM_RESOURCE_TRACKER_CLIENT_THREAD_COUNT = 50; - + + /** Check IP and hostname resolution during nodemanager registration.*/ + public static final String RM_NM_REGISTRATION_IP_HOSTNAME_CHECK_KEY = + RM_PREFIX + "resource-tracker.nm.ip-hostname-check"; + + public static final boolean DEFAULT_RM_NM_REGISTRATION_IP_HOSTNAME_CHECK_KEY = + false; + /** The class to use as the resource scheduler.*/ public static final String RM_SCHEDULER = RM_PREFIX + "scheduler.class"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index e6f7b37a25a..8e9f15be79c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -264,6 +264,11 @@ ${yarn.resourcemanager.hostname}:8031 + + yarn.resourcemanager.resource-tracker.nm.ip-hostname-check + false + + Are acls enabled. yarn.acl.enable diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java index b67172e01cc..3d6eda2cf5c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager; import java.io.IOException; import java.io.InputStream; +import java.net.InetAddress; import java.net.InetSocketAddress; import java.nio.ByteBuffer; import java.util.Arrays; @@ -39,6 +40,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.ipc.Server; +import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.Node; import org.apache.hadoop.security.authorize.PolicyProvider; import org.apache.hadoop.service.AbstractService; @@ -126,6 +128,7 @@ public class ResourceTrackerService extends AbstractService implements private DynamicResourceConfiguration drConf; private final AtomicLong timelineCollectorVersion = new AtomicLong(0); + private boolean checkIpHostnameInRegistration; public ResourceTrackerService(RMContext rmContext, NodesListManager nodesListManager, @@ -162,6 +165,9 @@ public class ResourceTrackerService extends AbstractService implements + " should be larger than 0."); } + checkIpHostnameInRegistration = conf.getBoolean( + YarnConfiguration.RM_NM_REGISTRATION_IP_HOSTNAME_CHECK_KEY, + YarnConfiguration.DEFAULT_RM_NM_REGISTRATION_IP_HOSTNAME_CHECK_KEY); minAllocMb = conf.getInt( YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB); @@ -350,6 +356,23 @@ public class ResourceTrackerService extends AbstractService implements } } + if (checkIpHostnameInRegistration) { + InetSocketAddress nmAddress = + NetUtils.createSocketAddrForHost(host, cmPort); + InetAddress inetAddress = Server.getRemoteIp(); + if (inetAddress != null && nmAddress.isUnresolved()) { + // Reject registration of unresolved nm to prevent resourcemanager + // getting stuck at allocations. + final String message = + "hostname cannot be resolved (ip=" + inetAddress.getHostAddress() + + ", hostname=" + host + ")"; + LOG.warn("Unresolved nodemanager registration: " + message); + response.setDiagnosticsMessage(message); + response.setNodeAction(NodeAction.SHUTDOWN); + return response; + } + } + // Check if this node is a 'valid' node if (!this.nodesListManager.isValidNode(host) && !isNodeInDecommissioning(nodeId)) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java index e40b3c051c6..b451db1a7d5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java @@ -18,7 +18,10 @@ package org.apache.hadoop.yarn.server.resourcemanager; +import org.apache.hadoop.net.ServerSocketUtil; import org.apache.hadoop.yarn.nodelabels.NodeAttributeStore; +import org.apache.hadoop.yarn.server.api.ResourceTracker; +import org.apache.hadoop.yarn.server.api.ServerRMProxy; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.FileSystemNodeAttributeStore; import static org.mockito.Matchers.any; import static org.mockito.Mockito.mock; @@ -2402,4 +2405,46 @@ public class TestResourceTrackerService extends NodeLabelTestBase { Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeat.getNodeAction()); Assert.assertEquals(1, nodeHeartbeat.getResponseId()); } + + @Test + public void testNMIpHostNameResolution() throws Exception { + Configuration conf = new Configuration(); + conf.set(YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS, + "localhost:" + ServerSocketUtil.getPort(10000, 10)); + conf.setBoolean(YarnConfiguration.RM_NM_REGISTRATION_IP_HOSTNAME_CHECK_KEY, + true); + MockRM mockRM = new MockRM(conf) { + @Override + protected ResourceTrackerService createResourceTrackerService() { + return new ResourceTrackerService(getRMContext(), nodesListManager, + this.nmLivelinessMonitor, + rmContext.getContainerTokenSecretManager(), + rmContext.getNMTokenSecretManager()) { + }; + } + }; + mockRM.start(); + ResourceTracker rmTracker = + ServerRMProxy.createRMProxy(mockRM.getConfig(), ResourceTracker.class); + RegisterNodeManagerResponse response = rmTracker.registerNodeManager( + RegisterNodeManagerRequest.newInstance( + NodeId.newInstance("host1" + System.currentTimeMillis(), 1234), + 1236, Resource.newInstance(10000, 10), "2", new ArrayList<>(), + new ArrayList<>())); + Assert + .assertEquals("Shutdown signal should be received", NodeAction.SHUTDOWN, + response.getNodeAction()); + Assert.assertTrue("Diagnostic Message", response.getDiagnosticsMessage() + .contains("hostname cannot be resolved ")); + // Test success + rmTracker = + ServerRMProxy.createRMProxy(mockRM.getConfig(), ResourceTracker.class); + response = rmTracker.registerNodeManager(RegisterNodeManagerRequest + .newInstance(NodeId.newInstance("localhost", 1234), 1236, + Resource.newInstance(10000, 10), "2", new ArrayList<>(), + new ArrayList<>())); + Assert.assertEquals("Successfull registration", NodeAction.NORMAL, + response.getNodeAction()); + mockRM.stop(); + } }