YARN-10300: appMasterHost not set in RM ApplicationSummary when AM fails before first heartbeat. Contributed by Eric Badger (ebadger).

(cherry picked from commit 56247db302)
This commit is contained in:
Eric E Payne 2020-06-09 18:43:16 +00:00
parent 452d801f1b
commit 034d458511
3 changed files with 62 additions and 1 deletions

View File

@ -26,6 +26,8 @@ import java.util.TreeSet;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future; import java.util.concurrent.Future;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
@ -186,7 +188,16 @@ public class RMAppManager implements EventHandler<RMAppManagerEvent>,
RMAppAttempt attempt = app.getCurrentAppAttempt(); RMAppAttempt attempt = app.getCurrentAppAttempt();
if (attempt != null) { if (attempt != null) {
trackingUrl = attempt.getTrackingUrl(); trackingUrl = attempt.getTrackingUrl();
host = attempt.getHost(); Container masterContainer = attempt.getMasterContainer();
if (masterContainer != null) {
NodeId nodeId = masterContainer.getNodeId();
if (nodeId != null) {
String amHost = nodeId.getHost();
if (amHost != null) {
host = amHost;
}
}
}
} }
RMAppMetrics metrics = app.getRMAppMetrics(); RMAppMetrics metrics = app.getRMAppMetrics();
SummaryBuilder summary = new SummaryBuilder() SummaryBuilder summary = new SummaryBuilder()

View File

@ -22,6 +22,9 @@ package org.apache.hadoop.yarn.server.resourcemanager;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.common.collect.Maps; import com.google.common.collect.Maps;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
@ -1079,6 +1082,17 @@ public class TestAppManager extends AppManagerTestBase{
when(app.getSubmitTime()).thenReturn(1000L); when(app.getSubmitTime()).thenReturn(1000L);
when(app.getLaunchTime()).thenReturn(2000L); when(app.getLaunchTime()).thenReturn(2000L);
when(app.getApplicationTags()).thenReturn(Sets.newHashSet("tag2", "tag1")); when(app.getApplicationTags()).thenReturn(Sets.newHashSet("tag2", "tag1"));
RMAppAttempt mockRMAppAttempt = mock(RMAppAttempt.class);
Container mockContainer = mock(Container.class);
NodeId mockNodeId = mock(NodeId.class);
String host = "127.0.0.1";
when(mockNodeId.getHost()).thenReturn(host);
when(mockContainer.getNodeId()).thenReturn(mockNodeId);
when(mockRMAppAttempt.getMasterContainer()).thenReturn(mockContainer);
when(app.getCurrentAppAttempt()).thenReturn(mockRMAppAttempt);
Map<String, Long> resourceSecondsMap = new HashMap<>(); Map<String, Long> resourceSecondsMap = new HashMap<>();
resourceSecondsMap.put(ResourceInformation.MEMORY_MB.getName(), 16384L); resourceSecondsMap.put(ResourceInformation.MEMORY_MB.getName(), 16384L);
resourceSecondsMap.put(ResourceInformation.VCORES.getName(), 64L); resourceSecondsMap.put(ResourceInformation.VCORES.getName(), 64L);
@ -1100,6 +1114,7 @@ public class TestAppManager extends AppManagerTestBase{
assertTrue(msg.contains("Multiline" + escaped +"AppName")); assertTrue(msg.contains("Multiline" + escaped +"AppName"));
assertTrue(msg.contains("Multiline" + escaped +"UserName")); assertTrue(msg.contains("Multiline" + escaped +"UserName"));
assertTrue(msg.contains("Multiline" + escaped +"QueueName")); assertTrue(msg.contains("Multiline" + escaped +"QueueName"));
assertTrue(msg.contains("appMasterHost=" + host));
assertTrue(msg.contains("submitTime=1000")); assertTrue(msg.contains("submitTime=1000"));
assertTrue(msg.contains("launchTime=2000")); assertTrue(msg.contains("launchTime=2000"));
assertTrue(msg.contains("memorySeconds=16384")); assertTrue(msg.contains("memorySeconds=16384"));

View File

@ -419,6 +419,41 @@ public class TestApplicationMasterLauncher {
} }
} }
@Test
public void testAMMasterContainerHost() throws Exception {
//Test that masterContainer and its associated host are
//set before the AM is even launched.
MockRM rm = new MockRM();
rm.start();
String host = "127.0.0.1";
String port = "1234";
MockNM nm1 = rm.registerNode(host + ":" + port, 5120);
RMApp app = rm.submitApp(2000);
// kick the scheduling
nm1.nodeHeartbeat(true);
RMAppAttempt attempt = app.getCurrentAppAttempt();
try {
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override public Boolean get() {
return attempt.getMasterContainer() != null;
}
}, 10, 200 * 100);
} catch (TimeoutException e) {
fail("timed out while waiting for AM Launch to happen.");
}
Assert.assertEquals(
app.getCurrentAppAttempt().getMasterContainer().getNodeId().getHost(),
host);
//send kill before launch
rm.killApp(app.getApplicationId());
rm.waitForState(app.getApplicationId(), RMAppState.KILLED);
rm.stop();
}
@Test @Test
public void testSetupTokens() throws Exception { public void testSetupTokens() throws Exception {
MockRM rm = new MockRM(); MockRM rm = new MockRM();