YARN-10300: appMasterHost not set in RM ApplicationSummary when AM fails before first heartbeat. Contributed by Eric Badger (ebadger).
(cherry picked from commit 56247db3022705635580c4d2f8b0abde109f954f) (cherry picked from commit 034d458511692341636f0d2ef0574b7516c01ed6) (cherry picked from commit 2e4892061a2ff1ae99ef5aacffd7b229dc3dac1b)
This commit is contained in:
parent
d1f4c8f10f
commit
af324e3153
@ -34,8 +34,10 @@
|
|||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
|
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationTimeoutType;
|
import org.apache.hadoop.yarn.api.records.ApplicationTimeoutType;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Container;
|
||||||
import org.apache.hadoop.yarn.api.records.ExecutionType;
|
import org.apache.hadoop.yarn.api.records.ExecutionType;
|
||||||
import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest;
|
import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest;
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
import org.apache.hadoop.yarn.api.records.Priority;
|
import org.apache.hadoop.yarn.api.records.Priority;
|
||||||
import org.apache.hadoop.yarn.api.records.QueueACL;
|
import org.apache.hadoop.yarn.api.records.QueueACL;
|
||||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
@ -171,7 +173,16 @@ public static SummaryBuilder createAppSummary(RMApp app) {
|
|||||||
RMAppAttempt attempt = app.getCurrentAppAttempt();
|
RMAppAttempt attempt = app.getCurrentAppAttempt();
|
||||||
if (attempt != null) {
|
if (attempt != null) {
|
||||||
trackingUrl = attempt.getTrackingUrl();
|
trackingUrl = attempt.getTrackingUrl();
|
||||||
host = attempt.getHost();
|
Container masterContainer = attempt.getMasterContainer();
|
||||||
|
if (masterContainer != null) {
|
||||||
|
NodeId nodeId = masterContainer.getNodeId();
|
||||||
|
if (nodeId != null) {
|
||||||
|
String amHost = nodeId.getHost();
|
||||||
|
if (amHost != null) {
|
||||||
|
host = amHost;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
RMAppMetrics metrics = app.getRMAppMetrics();
|
RMAppMetrics metrics = app.getRMAppMetrics();
|
||||||
SummaryBuilder summary = new SummaryBuilder()
|
SummaryBuilder summary = new SummaryBuilder()
|
||||||
|
@ -52,9 +52,11 @@
|
|||||||
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
|
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Container;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
|
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
|
||||||
import org.apache.hadoop.yarn.api.records.ExecutionType;
|
import org.apache.hadoop.yarn.api.records.ExecutionType;
|
||||||
import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest;
|
import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest;
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
import org.apache.hadoop.yarn.api.records.Priority;
|
import org.apache.hadoop.yarn.api.records.Priority;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
import org.apache.hadoop.yarn.api.records.ResourceInformation;
|
import org.apache.hadoop.yarn.api.records.ResourceInformation;
|
||||||
@ -72,6 +74,7 @@
|
|||||||
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
|
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.placement.PlacementManager;
|
import org.apache.hadoop.yarn.server.resourcemanager.placement.PlacementManager;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
|
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
|
||||||
@ -840,6 +843,17 @@ public void testEscapeApplicationSummary() {
|
|||||||
when(app.getSubmitTime()).thenReturn(1000L);
|
when(app.getSubmitTime()).thenReturn(1000L);
|
||||||
when(app.getLaunchTime()).thenReturn(2000L);
|
when(app.getLaunchTime()).thenReturn(2000L);
|
||||||
when(app.getApplicationTags()).thenReturn(Sets.newHashSet("tag2", "tag1"));
|
when(app.getApplicationTags()).thenReturn(Sets.newHashSet("tag2", "tag1"));
|
||||||
|
|
||||||
|
RMAppAttempt mockRMAppAttempt = mock(RMAppAttempt.class);
|
||||||
|
Container mockContainer = mock(Container.class);
|
||||||
|
NodeId mockNodeId = mock(NodeId.class);
|
||||||
|
String host = "127.0.0.1";
|
||||||
|
|
||||||
|
when(mockNodeId.getHost()).thenReturn(host);
|
||||||
|
when(mockContainer.getNodeId()).thenReturn(mockNodeId);
|
||||||
|
when(mockRMAppAttempt.getMasterContainer()).thenReturn(mockContainer);
|
||||||
|
when(app.getCurrentAppAttempt()).thenReturn(mockRMAppAttempt);
|
||||||
|
|
||||||
Map<String, Long> resourceSecondsMap = new HashMap<>();
|
Map<String, Long> resourceSecondsMap = new HashMap<>();
|
||||||
resourceSecondsMap.put(ResourceInformation.MEMORY_MB.getName(), 16384L);
|
resourceSecondsMap.put(ResourceInformation.MEMORY_MB.getName(), 16384L);
|
||||||
resourceSecondsMap.put(ResourceInformation.VCORES.getName(), 64L);
|
resourceSecondsMap.put(ResourceInformation.VCORES.getName(), 64L);
|
||||||
@ -861,6 +875,7 @@ public void testEscapeApplicationSummary() {
|
|||||||
Assert.assertTrue(msg.contains("Multiline" + escaped +"AppName"));
|
Assert.assertTrue(msg.contains("Multiline" + escaped +"AppName"));
|
||||||
Assert.assertTrue(msg.contains("Multiline" + escaped +"UserName"));
|
Assert.assertTrue(msg.contains("Multiline" + escaped +"UserName"));
|
||||||
Assert.assertTrue(msg.contains("Multiline" + escaped +"QueueName"));
|
Assert.assertTrue(msg.contains("Multiline" + escaped +"QueueName"));
|
||||||
|
Assert.assertTrue(msg.contains("appMasterHost=" + host));
|
||||||
Assert.assertTrue(msg.contains("submitTime=1000"));
|
Assert.assertTrue(msg.contains("submitTime=1000"));
|
||||||
Assert.assertTrue(msg.contains("launchTime=2000"));
|
Assert.assertTrue(msg.contains("launchTime=2000"));
|
||||||
Assert.assertTrue(msg.contains("memorySeconds=16384"));
|
Assert.assertTrue(msg.contains("memorySeconds=16384"));
|
||||||
|
@ -422,6 +422,41 @@ public void testallocateBeforeAMRegistration() throws Exception {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAMMasterContainerHost() throws Exception {
|
||||||
|
//Test that masterContainer and its associated host are
|
||||||
|
//set before the AM is even launched.
|
||||||
|
MockRM rm = new MockRM();
|
||||||
|
rm.start();
|
||||||
|
String host = "127.0.0.1";
|
||||||
|
String port = "1234";
|
||||||
|
MockNM nm1 = rm.registerNode(host + ":" + port, 5120);
|
||||||
|
RMApp app = rm.submitApp(2000);
|
||||||
|
// kick the scheduling
|
||||||
|
nm1.nodeHeartbeat(true);
|
||||||
|
final RMAppAttempt attempt = app.getCurrentAppAttempt();
|
||||||
|
|
||||||
|
try {
|
||||||
|
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
||||||
|
@Override public Boolean get() {
|
||||||
|
return attempt.getMasterContainer() != null;
|
||||||
|
}
|
||||||
|
}, 10, 200 * 100);
|
||||||
|
} catch (TimeoutException e) {
|
||||||
|
fail("timed out while waiting for AM Launch to happen.");
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert.assertEquals(
|
||||||
|
app.getCurrentAppAttempt().getMasterContainer().getNodeId().getHost(),
|
||||||
|
host);
|
||||||
|
|
||||||
|
//send kill before launch
|
||||||
|
rm.killApp(app.getApplicationId());
|
||||||
|
rm.waitForState(app.getApplicationId(), RMAppState.KILLED);
|
||||||
|
|
||||||
|
rm.stop();
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSetupTokens() throws Exception {
|
public void testSetupTokens() throws Exception {
|
||||||
MockRM rm = new MockRM();
|
MockRM rm = new MockRM();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user