YARN-10300: appMasterHost not set in RM ApplicationSummary when AM fails before first heartbeat. Contributed by Eric Badger (ebadger).

(cherry picked from commit 56247db3022705635580c4d2f8b0abde109f954f)
(cherry picked from commit 034d458511692341636f0d2ef0574b7516c01ed6)
(cherry picked from commit 2e4892061a2ff1ae99ef5aacffd7b229dc3dac1b)
This commit is contained in:
Eric E Payne 2020-06-09 18:43:16 +00:00
parent d1f4c8f10f
commit af324e3153
3 changed files with 62 additions and 1 deletions

View File

@ -34,8 +34,10 @@
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ApplicationTimeoutType; import org.apache.hadoop.yarn.api.records.ApplicationTimeoutType;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.ExecutionType;
import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest; import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.QueueACL; import org.apache.hadoop.yarn.api.records.QueueACL;
import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.ResourceRequest;
@ -171,7 +173,16 @@ public static SummaryBuilder createAppSummary(RMApp app) {
RMAppAttempt attempt = app.getCurrentAppAttempt(); RMAppAttempt attempt = app.getCurrentAppAttempt();
if (attempt != null) { if (attempt != null) {
trackingUrl = attempt.getTrackingUrl(); trackingUrl = attempt.getTrackingUrl();
host = attempt.getHost(); Container masterContainer = attempt.getMasterContainer();
if (masterContainer != null) {
NodeId nodeId = masterContainer.getNodeId();
if (nodeId != null) {
String amHost = nodeId.getHost();
if (amHost != null) {
host = amHost;
}
}
}
} }
RMAppMetrics metrics = app.getRMAppMetrics(); RMAppMetrics metrics = app.getRMAppMetrics();
SummaryBuilder summary = new SummaryBuilder() SummaryBuilder summary = new SummaryBuilder()

View File

@ -52,9 +52,11 @@
import org.apache.hadoop.yarn.api.records.ApplicationAccessType; import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.ExecutionType;
import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest; import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.ResourceInformation;
@ -72,6 +74,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.placement.PlacementManager; import org.apache.hadoop.yarn.server.resourcemanager.placement.PlacementManager;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
@ -840,6 +843,17 @@ public void testEscapeApplicationSummary() {
when(app.getSubmitTime()).thenReturn(1000L); when(app.getSubmitTime()).thenReturn(1000L);
when(app.getLaunchTime()).thenReturn(2000L); when(app.getLaunchTime()).thenReturn(2000L);
when(app.getApplicationTags()).thenReturn(Sets.newHashSet("tag2", "tag1")); when(app.getApplicationTags()).thenReturn(Sets.newHashSet("tag2", "tag1"));
RMAppAttempt mockRMAppAttempt = mock(RMAppAttempt.class);
Container mockContainer = mock(Container.class);
NodeId mockNodeId = mock(NodeId.class);
String host = "127.0.0.1";
when(mockNodeId.getHost()).thenReturn(host);
when(mockContainer.getNodeId()).thenReturn(mockNodeId);
when(mockRMAppAttempt.getMasterContainer()).thenReturn(mockContainer);
when(app.getCurrentAppAttempt()).thenReturn(mockRMAppAttempt);
Map<String, Long> resourceSecondsMap = new HashMap<>(); Map<String, Long> resourceSecondsMap = new HashMap<>();
resourceSecondsMap.put(ResourceInformation.MEMORY_MB.getName(), 16384L); resourceSecondsMap.put(ResourceInformation.MEMORY_MB.getName(), 16384L);
resourceSecondsMap.put(ResourceInformation.VCORES.getName(), 64L); resourceSecondsMap.put(ResourceInformation.VCORES.getName(), 64L);
@ -861,6 +875,7 @@ public void testEscapeApplicationSummary() {
Assert.assertTrue(msg.contains("Multiline" + escaped +"AppName")); Assert.assertTrue(msg.contains("Multiline" + escaped +"AppName"));
Assert.assertTrue(msg.contains("Multiline" + escaped +"UserName")); Assert.assertTrue(msg.contains("Multiline" + escaped +"UserName"));
Assert.assertTrue(msg.contains("Multiline" + escaped +"QueueName")); Assert.assertTrue(msg.contains("Multiline" + escaped +"QueueName"));
Assert.assertTrue(msg.contains("appMasterHost=" + host));
Assert.assertTrue(msg.contains("submitTime=1000")); Assert.assertTrue(msg.contains("submitTime=1000"));
Assert.assertTrue(msg.contains("launchTime=2000")); Assert.assertTrue(msg.contains("launchTime=2000"));
Assert.assertTrue(msg.contains("memorySeconds=16384")); Assert.assertTrue(msg.contains("memorySeconds=16384"));

View File

@ -422,6 +422,41 @@ public void testallocateBeforeAMRegistration() throws Exception {
} }
} }
@Test
public void testAMMasterContainerHost() throws Exception {
//Test that masterContainer and its associated host are
//set before the AM is even launched.
MockRM rm = new MockRM();
rm.start();
String host = "127.0.0.1";
String port = "1234";
MockNM nm1 = rm.registerNode(host + ":" + port, 5120);
RMApp app = rm.submitApp(2000);
// kick the scheduling
nm1.nodeHeartbeat(true);
final RMAppAttempt attempt = app.getCurrentAppAttempt();
try {
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override public Boolean get() {
return attempt.getMasterContainer() != null;
}
}, 10, 200 * 100);
} catch (TimeoutException e) {
fail("timed out while waiting for AM Launch to happen.");
}
Assert.assertEquals(
app.getCurrentAppAttempt().getMasterContainer().getNodeId().getHost(),
host);
//send kill before launch
rm.killApp(app.getApplicationId());
rm.waitForState(app.getApplicationId(), RMAppState.KILLED);
rm.stop();
}
@Test @Test
public void testSetupTokens() throws Exception { public void testSetupTokens() throws Exception {
MockRM rm = new MockRM(); MockRM rm = new MockRM();