YARN-10300: appMasterHost not set in RM ApplicationSummary when AM fails before first heartbeat. Contributed by Eric Badger (ebadger).

(cherry picked from commit 56247db302)
(cherry picked from commit 034d458511)
This commit is contained in:
Eric E Payne 2020-06-09 18:43:16 +00:00
parent 9f02fea756
commit 2e4892061a
3 changed files with 62 additions and 1 deletions

View File

@ -36,8 +36,10 @@ import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ApplicationTimeoutType; import org.apache.hadoop.yarn.api.records.ApplicationTimeoutType;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.ExecutionType;
import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest; import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.QueueACL; import org.apache.hadoop.yarn.api.records.QueueACL;
import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.ResourceRequest;
@ -180,7 +182,16 @@ public class RMAppManager implements EventHandler<RMAppManagerEvent>,
RMAppAttempt attempt = app.getCurrentAppAttempt(); RMAppAttempt attempt = app.getCurrentAppAttempt();
if (attempt != null) { if (attempt != null) {
trackingUrl = attempt.getTrackingUrl(); trackingUrl = attempt.getTrackingUrl();
host = attempt.getHost(); Container masterContainer = attempt.getMasterContainer();
if (masterContainer != null) {
NodeId nodeId = masterContainer.getNodeId();
if (nodeId != null) {
String amHost = nodeId.getHost();
if (amHost != null) {
host = amHost;
}
}
}
} }
RMAppMetrics metrics = app.getRMAppMetrics(); RMAppMetrics metrics = app.getRMAppMetrics();
SummaryBuilder summary = new SummaryBuilder() SummaryBuilder summary = new SummaryBuilder()

View File

@ -54,9 +54,11 @@ import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
import org.apache.hadoop.yarn.api.records.ApplicationAccessType; import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.ExecutionType;
import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest; import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.QueueInfo; import org.apache.hadoop.yarn.api.records.QueueInfo;
import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Resource;
@ -76,6 +78,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsMana
import org.apache.hadoop.yarn.server.resourcemanager.placement.ApplicationPlacementContext; import org.apache.hadoop.yarn.server.resourcemanager.placement.ApplicationPlacementContext;
import org.apache.hadoop.yarn.server.resourcemanager.placement.PlacementManager; import org.apache.hadoop.yarn.server.resourcemanager.placement.PlacementManager;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
@ -1007,6 +1010,17 @@ public class TestAppManager{
when(app.getSubmitTime()).thenReturn(1000L); when(app.getSubmitTime()).thenReturn(1000L);
when(app.getLaunchTime()).thenReturn(2000L); when(app.getLaunchTime()).thenReturn(2000L);
when(app.getApplicationTags()).thenReturn(Sets.newHashSet("tag2", "tag1")); when(app.getApplicationTags()).thenReturn(Sets.newHashSet("tag2", "tag1"));
RMAppAttempt mockRMAppAttempt = mock(RMAppAttempt.class);
Container mockContainer = mock(Container.class);
NodeId mockNodeId = mock(NodeId.class);
String host = "127.0.0.1";
when(mockNodeId.getHost()).thenReturn(host);
when(mockContainer.getNodeId()).thenReturn(mockNodeId);
when(mockRMAppAttempt.getMasterContainer()).thenReturn(mockContainer);
when(app.getCurrentAppAttempt()).thenReturn(mockRMAppAttempt);
Map<String, Long> resourceSecondsMap = new HashMap<>(); Map<String, Long> resourceSecondsMap = new HashMap<>();
resourceSecondsMap.put(ResourceInformation.MEMORY_MB.getName(), 16384L); resourceSecondsMap.put(ResourceInformation.MEMORY_MB.getName(), 16384L);
resourceSecondsMap.put(ResourceInformation.VCORES.getName(), 64L); resourceSecondsMap.put(ResourceInformation.VCORES.getName(), 64L);
@ -1028,6 +1042,7 @@ public class TestAppManager{
assertTrue(msg.contains("Multiline" + escaped +"AppName")); assertTrue(msg.contains("Multiline" + escaped +"AppName"));
assertTrue(msg.contains("Multiline" + escaped +"UserName")); assertTrue(msg.contains("Multiline" + escaped +"UserName"));
assertTrue(msg.contains("Multiline" + escaped +"QueueName")); assertTrue(msg.contains("Multiline" + escaped +"QueueName"));
assertTrue(msg.contains("appMasterHost=" + host));
assertTrue(msg.contains("submitTime=1000")); assertTrue(msg.contains("submitTime=1000"));
assertTrue(msg.contains("launchTime=2000")); assertTrue(msg.contains("launchTime=2000"));
assertTrue(msg.contains("memorySeconds=16384")); assertTrue(msg.contains("memorySeconds=16384"));

View File

@ -423,6 +423,41 @@ public class TestApplicationMasterLauncher {
} }
} }
@Test
public void testAMMasterContainerHost() throws Exception {
//Test that masterContainer and its associated host are
//set before the AM is even launched.
MockRM rm = new MockRM();
rm.start();
String host = "127.0.0.1";
String port = "1234";
MockNM nm1 = rm.registerNode(host + ":" + port, 5120);
RMApp app = rm.submitApp(2000);
// kick the scheduling
nm1.nodeHeartbeat(true);
RMAppAttempt attempt = app.getCurrentAppAttempt();
try {
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override public Boolean get() {
return attempt.getMasterContainer() != null;
}
}, 10, 200 * 100);
} catch (TimeoutException e) {
fail("timed out while waiting for AM Launch to happen.");
}
Assert.assertEquals(
app.getCurrentAppAttempt().getMasterContainer().getNodeId().getHost(),
host);
//send kill before launch
rm.killApp(app.getApplicationId());
rm.waitForState(app.getApplicationId(), RMAppState.KILLED);
rm.stop();
}
@Test @Test
public void testSetupTokens() throws Exception { public void testSetupTokens() throws Exception {
MockRM rm = new MockRM(); MockRM rm = new MockRM();