From 034d458511692341636f0d2ef0574b7516c01ed6 Mon Sep 17 00:00:00 2001 From: Eric E Payne Date: Tue, 9 Jun 2020 18:43:16 +0000 Subject: [PATCH] YARN-10300: appMasterHost not set in RM ApplicationSummary when AM fails before first heartbeat. Contributed by Eric Badger (ebadger). (cherry picked from commit 56247db3022705635580c4d2f8b0abde109f954f) --- .../server/resourcemanager/RMAppManager.java | 13 ++++++- .../resourcemanager/TestAppManager.java | 15 ++++++++ .../TestApplicationMasterLauncher.java | 35 +++++++++++++++++++ 3 files changed, 62 insertions(+), 1 deletion(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java index 6623ab12e11..4b0d252a0f6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java @@ -26,6 +26,8 @@ import java.util.TreeSet; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.NodeId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -186,7 +188,16 @@ public class RMAppManager implements EventHandler, RMAppAttempt attempt = app.getCurrentAppAttempt(); if (attempt != null) { trackingUrl = attempt.getTrackingUrl(); - host = attempt.getHost(); + Container masterContainer = attempt.getMasterContainer(); + if (masterContainer != null) { + NodeId nodeId = masterContainer.getNodeId(); + if (nodeId != null) { + String amHost = nodeId.getHost(); + if (amHost != null) { + host = amHost; + } + } + } } RMAppMetrics metrics = app.getRMAppMetrics(); SummaryBuilder summary = new SummaryBuilder() diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java index 29d2e870111..fe4c8874520 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java @@ -22,6 +22,9 @@ package org.apache.hadoop.yarn.server.resourcemanager; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -1079,6 +1082,17 @@ public class TestAppManager extends AppManagerTestBase{ when(app.getSubmitTime()).thenReturn(1000L); when(app.getLaunchTime()).thenReturn(2000L); when(app.getApplicationTags()).thenReturn(Sets.newHashSet("tag2", "tag1")); + + RMAppAttempt mockRMAppAttempt = mock(RMAppAttempt.class); + Container mockContainer = mock(Container.class); + NodeId mockNodeId = mock(NodeId.class); + String host = "127.0.0.1"; + + when(mockNodeId.getHost()).thenReturn(host); + when(mockContainer.getNodeId()).thenReturn(mockNodeId); + when(mockRMAppAttempt.getMasterContainer()).thenReturn(mockContainer); + when(app.getCurrentAppAttempt()).thenReturn(mockRMAppAttempt); + Map resourceSecondsMap = new HashMap<>(); resourceSecondsMap.put(ResourceInformation.MEMORY_MB.getName(), 16384L); resourceSecondsMap.put(ResourceInformation.VCORES.getName(), 64L); @@ -1100,6 +1114,7 @@ public class TestAppManager extends AppManagerTestBase{ assertTrue(msg.contains("Multiline" + escaped +"AppName")); assertTrue(msg.contains("Multiline" + escaped +"UserName")); assertTrue(msg.contains("Multiline" + escaped +"QueueName")); + assertTrue(msg.contains("appMasterHost=" + host)); assertTrue(msg.contains("submitTime=1000")); assertTrue(msg.contains("launchTime=2000")); assertTrue(msg.contains("memorySeconds=16384")); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java index cc5112cea3a..25f8b27bd5f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java @@ -419,6 +419,41 @@ public class TestApplicationMasterLauncher { } } + @Test + public void testAMMasterContainerHost() throws Exception { + //Test that masterContainer and its associated host are + //set before the AM is even launched. + MockRM rm = new MockRM(); + rm.start(); + String host = "127.0.0.1"; + String port = "1234"; + MockNM nm1 = rm.registerNode(host + ":" + port, 5120); + RMApp app = rm.submitApp(2000); + // kick the scheduling + nm1.nodeHeartbeat(true); + RMAppAttempt attempt = app.getCurrentAppAttempt(); + + try { + GenericTestUtils.waitFor(new Supplier() { + @Override public Boolean get() { + return attempt.getMasterContainer() != null; + } + }, 10, 200 * 100); + } catch (TimeoutException e) { + fail("timed out while waiting for AM Launch to happen."); + } + + Assert.assertEquals( + app.getCurrentAppAttempt().getMasterContainer().getNodeId().getHost(), + host); + + //send kill before launch + rm.killApp(app.getApplicationId()); + rm.waitForState(app.getApplicationId(), RMAppState.KILLED); + + rm.stop(); + } + @Test public void testSetupTokens() throws Exception { MockRM rm = new MockRM();