YARN-4747. AHS error 500 due to NPE when container start event is missing. Contributed by Varun Saxena

(cherry picked from commit b2ed6ae731)
This commit is contained in:
Jason Lowe 2016-05-06 23:03:23 +00:00
parent 5f698df901
commit 09fc02be67
5 changed files with 60 additions and 11 deletions

View File

@ -136,6 +136,9 @@ Release 2.7.3 - UNRELEASED
YARN-4834. ProcfsBasedProcessTree doesn't track daemonized processes
(Nathan Roberts via jlowe)
YARN-4747. AHS error 500 due to NPE when container start event is missing
(Varun Saxena via jlowe)
Release 2.7.2 - 2016-01-25
INCOMPATIBLE CHANGES

View File

@ -517,19 +517,22 @@ public class ApplicationHistoryManagerOnTimelineStore extends AbstractService
}
}
}
NodeId allocatedNode = NodeId.newInstance(allocatedHost, allocatedPort);
ContainerId containerId =
ConverterUtils.toContainerId(entity.getEntityId());
String logUrl = WebAppUtils.getAggregatedLogURL(
String logUrl = null;
NodeId allocatedNode = null;
if (allocatedHost != null) {
allocatedNode = NodeId.newInstance(allocatedHost, allocatedPort);
logUrl = WebAppUtils.getAggregatedLogURL(
serverHttpAddress,
allocatedNode.toString(),
containerId.toString(),
containerId.toString(),
user);
}
return ContainerReport.newInstance(
ConverterUtils.toContainerId(entity.getEntityId()),
Resource.newInstance(allocatedMem, allocatedVcore),
NodeId.newInstance(allocatedHost, allocatedPort),
Resource.newInstance(allocatedMem, allocatedVcore), allocatedNode,
Priority.newInstance(allocatedPriority),
createdTime, finishedTime, diagnosticsInfo, logUrl, exitStatus, state,
nodeHttpAddress);

View File

@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.metrics;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.NodeId;
public class ContainerFinishedEvent extends SystemMetricsEvent {
@ -27,17 +28,20 @@ public class ContainerFinishedEvent extends SystemMetricsEvent {
private String diagnosticsInfo;
private int containerExitStatus;
private ContainerState state;
private NodeId allocatedNode;
public ContainerFinishedEvent(
ContainerId containerId,
String diagnosticsInfo,
int containerExitStatus,
ContainerState state,
long finishedTime) {
long finishedTime,
NodeId allocatedNode) {
super(SystemMetricsEventType.CONTAINER_FINISHED, finishedTime);
this.containerId = containerId;
this.diagnosticsInfo = diagnosticsInfo;
this.containerExitStatus = containerExitStatus;
this.allocatedNode = allocatedNode;
this.state = state;
}
@ -62,4 +66,7 @@ public class ContainerFinishedEvent extends SystemMetricsEvent {
return state;
}
public NodeId getAllocatedNode() {
return allocatedNode;
}
}

View File

@ -199,7 +199,7 @@ public class SystemMetricsPublisher extends CompositeService {
container.getDiagnosticsInfo(),
container.getContainerExitStatus(),
container.getContainerState(),
finishedTime));
finishedTime, container.getAllocatedNode()));
}
}
@ -421,6 +421,12 @@ public class SystemMetricsPublisher extends CompositeService {
event.getContainerExitStatus());
eventInfo.put(ContainerMetricsConstants.STATE_EVENT_INFO,
event.getContainerState().toString());
Map<String, Object> entityInfo = new HashMap<String, Object>();
entityInfo.put(ContainerMetricsConstants.ALLOCATED_HOST_ENTITY_INFO,
event.getAllocatedNode().getHost());
entityInfo.put(ContainerMetricsConstants.ALLOCATED_PORT_ENTITY_INFO,
event.getAllocatedNode().getPort());
entity.setOtherInfo(entityInfo);
tEvent.setEventInfo(eventInfo);
entity.addEvent(tEvent);
putEntity(entity);

View File

@ -281,6 +281,36 @@ public class TestSystemMetricsPublisher {
Assert.assertTrue(hasRegisteredEvent && hasFinishedEvent);
}
@Test(timeout = 10000)
public void testPublishHostPortInfoOnContainerFinished() throws Exception {
ContainerId containerId =
ContainerId.newContainerId(ApplicationAttemptId.newInstance(
ApplicationId.newInstance(0, 1), 1), 1);
RMContainer container = createRMContainer(containerId);
metricsPublisher.containerFinished(container, container.getFinishTime());
TimelineEntity entity = null;
do {
entity =
store.getEntity(containerId.toString(),
ContainerMetricsConstants.ENTITY_TYPE,
EnumSet.allOf(Field.class));
} while (entity == null || entity.getEvents().size() < 1);
Assert.assertNotNull(entity.getOtherInfo());
Assert.assertEquals(2, entity.getOtherInfo().size());
Assert.assertNotNull(entity.getOtherInfo().get(
ContainerMetricsConstants.ALLOCATED_HOST_ENTITY_INFO));
Assert.assertNotNull(entity.getOtherInfo().get(
ContainerMetricsConstants.ALLOCATED_PORT_ENTITY_INFO));
Assert.assertEquals(
container.getAllocatedNode().getHost(),
entity.getOtherInfo().get(
ContainerMetricsConstants.ALLOCATED_HOST_ENTITY_INFO));
Assert.assertEquals(
container.getAllocatedNode().getPort(),
entity.getOtherInfo().get(
ContainerMetricsConstants.ALLOCATED_PORT_ENTITY_INFO));
}
@Test(timeout = 10000)
public void testPublishContainerMetrics() throws Exception {
ContainerId containerId =