YARN-4711. NM is going down with NPE's due to single thread processing of events by Timeline client (Naganarasimha G R via sjlee)
This commit is contained in:
parent
6f6cc647d6
commit
84c35ac6c4
|
@ -117,7 +117,14 @@
|
||||||
|
|
||||||
<!-- Object cast is based on the event type -->
|
<!-- Object cast is based on the event type -->
|
||||||
<Match>
|
<Match>
|
||||||
<Class name="org.apache.hadoop.yarn.server.nodemanager.timelineservice.NMTimelinePublisher$ApplicationEventHandler" />
|
<Class name="org.apache.hadoop.yarn.server.nodemanager.timelineservice.NMTimelinePublisher" />
|
||||||
|
<Method name="publishApplicationEvent" />
|
||||||
|
<Bug pattern="BC_UNCONFIRMED_CAST" />
|
||||||
|
</Match>
|
||||||
|
|
||||||
|
<Match>
|
||||||
|
<Class name="org.apache.hadoop.yarn.server.nodemanager.timelineservice.NMTimelinePublisher" />
|
||||||
|
<Method name="publishLocalizationEvent" />
|
||||||
<Bug pattern="BC_UNCONFIRMED_CAST" />
|
<Bug pattern="BC_UNCONFIRMED_CAST" />
|
||||||
</Match>
|
</Match>
|
||||||
|
|
||||||
|
|
|
@ -17,15 +17,6 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.yarn.api.records.timelineservice;
|
package org.apache.hadoop.yarn.api.records.timelineservice;
|
||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.classification.InterfaceStability;
|
|
||||||
import org.apache.hadoop.yarn.util.TimelineServiceHelper;
|
|
||||||
import org.codehaus.jackson.annotate.JsonSetter;
|
|
||||||
|
|
||||||
import javax.xml.bind.annotation.XmlAccessType;
|
|
||||||
import javax.xml.bind.annotation.XmlAccessorType;
|
|
||||||
import javax.xml.bind.annotation.XmlElement;
|
|
||||||
import javax.xml.bind.annotation.XmlRootElement;
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -33,6 +24,16 @@ import java.util.NavigableSet;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
|
||||||
|
import javax.xml.bind.annotation.XmlAccessType;
|
||||||
|
import javax.xml.bind.annotation.XmlAccessorType;
|
||||||
|
import javax.xml.bind.annotation.XmlElement;
|
||||||
|
import javax.xml.bind.annotation.XmlRootElement;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.yarn.util.TimelineServiceHelper;
|
||||||
|
import org.codehaus.jackson.annotate.JsonSetter;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The basic timeline entity data structure for timeline service v2. Timeline
|
* The basic timeline entity data structure for timeline service v2. Timeline
|
||||||
* entity objects are not thread safe and should not be accessed concurrently.
|
* entity objects are not thread safe and should not be accessed concurrently.
|
||||||
|
@ -564,6 +565,10 @@ public class TimelineEntity implements Comparable<TimelineEntity> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
if (real == null) {
|
||||||
return identifier.toString();
|
return identifier.toString();
|
||||||
|
} else {
|
||||||
|
return real.toString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -429,9 +429,8 @@ public class TimelineClientImpl extends TimelineClient {
|
||||||
URI uri = constructResURI(getConfig(), timelineServiceAddress, true);
|
URI uri = constructResURI(getConfig(), timelineServiceAddress, true);
|
||||||
putObjects(uri, path, params, obj);
|
putObjects(uri, path, params, obj);
|
||||||
needRetry = false;
|
needRetry = false;
|
||||||
} catch (Exception e) {
|
} catch (IOException e) {
|
||||||
// TODO only handle exception for timelineServiceAddress being updated.
|
// handle exception for timelineServiceAddress being updated.
|
||||||
// skip retry for other exceptions.
|
|
||||||
checkRetryWithSleep(retries, e);
|
checkRetryWithSleep(retries, e);
|
||||||
retries--;
|
retries--;
|
||||||
}
|
}
|
||||||
|
@ -458,29 +457,27 @@ public class TimelineClientImpl extends TimelineClient {
|
||||||
* @param retries
|
* @param retries
|
||||||
* @param e
|
* @param e
|
||||||
*/
|
*/
|
||||||
private void checkRetryWithSleep(int retries, Exception e) throws
|
private void checkRetryWithSleep(int retries, IOException e)
|
||||||
YarnException, IOException {
|
throws YarnException, IOException {
|
||||||
if (retries > 0) {
|
if (retries > 0) {
|
||||||
try {
|
try {
|
||||||
Thread.sleep(this.serviceRetryInterval);
|
Thread.sleep(this.serviceRetryInterval);
|
||||||
} catch (InterruptedException ex) {
|
} catch (InterruptedException ex) {
|
||||||
Thread.currentThread().interrupt();
|
Thread.currentThread().interrupt();
|
||||||
|
throw new YarnException("Interrupted while retrying to connect to ATS");
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOG.error("TimelineClient has reached to max retry times :" +
|
StringBuilder msg =
|
||||||
this.maxServiceRetries + " for service address: " +
|
new StringBuilder("TimelineClient has reached to max retry times : ");
|
||||||
timelineServiceAddress);
|
msg.append(this.maxServiceRetries);
|
||||||
if (e instanceof YarnException) {
|
msg.append(" for service address: ");
|
||||||
throw (YarnException)e;
|
msg.append(timelineServiceAddress);
|
||||||
} else if (e instanceof IOException) {
|
LOG.error(msg.toString());
|
||||||
throw (IOException)e;
|
throw new IOException(msg.toString(), e);
|
||||||
} else {
|
|
||||||
throw new YarnException(e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void putObjects(
|
protected void putObjects(
|
||||||
URI base, String path, MultivaluedMap<String, String> params, Object obj)
|
URI base, String path, MultivaluedMap<String, String> params, Object obj)
|
||||||
throws IOException, YarnException {
|
throws IOException, YarnException {
|
||||||
ClientResponse resp;
|
ClientResponse resp;
|
||||||
|
@ -636,17 +633,19 @@ public class TimelineClientImpl extends TimelineClient {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Poll TimelineServiceAddress for maximum of retries times if it is null.
|
* Poll TimelineServiceAddress for maximum of retries times if it is null.
|
||||||
|
*
|
||||||
* @param retries
|
* @param retries
|
||||||
* @return the left retry times
|
* @return the left retry times
|
||||||
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
private int pollTimelineServiceAddress(int retries) {
|
private int pollTimelineServiceAddress(int retries) throws YarnException {
|
||||||
while (timelineServiceAddress == null && retries > 0) {
|
while (timelineServiceAddress == null && retries > 0) {
|
||||||
try {
|
try {
|
||||||
Thread.sleep(this.serviceRetryInterval);
|
Thread.sleep(this.serviceRetryInterval);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
Thread.currentThread().interrupt();
|
Thread.currentThread().interrupt();
|
||||||
|
throw new YarnException("Interrupted while trying to connect ATS");
|
||||||
}
|
}
|
||||||
// timelineServiceAddress = getTimelineServiceAddress();
|
|
||||||
retries--;
|
retries--;
|
||||||
}
|
}
|
||||||
return retries;
|
return retries;
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
package org.apache.hadoop.yarn.client.api.impl;
|
package org.apache.hadoop.yarn.client.api.impl;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.net.URI;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
@ -34,22 +35,32 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
import org.junit.Rule;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
import org.junit.rules.TestName;
|
||||||
|
|
||||||
public class TestTimelineClientV2Impl {
|
public class TestTimelineClientV2Impl {
|
||||||
private static final Log LOG =
|
private static final Log LOG =
|
||||||
LogFactory.getLog(TestTimelineClientV2Impl.class);
|
LogFactory.getLog(TestTimelineClientV2Impl.class);
|
||||||
private TestV2TimelineClient client;
|
private TestV2TimelineClient client;
|
||||||
private static long TIME_TO_SLEEP = 150;
|
private static long TIME_TO_SLEEP = 150;
|
||||||
|
private static final String EXCEPTION_MSG = "Exception in the content";
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setup() {
|
public void setup() {
|
||||||
YarnConfiguration conf = new YarnConfiguration();
|
conf = new YarnConfiguration();
|
||||||
conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true);
|
conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true);
|
||||||
conf.setFloat(YarnConfiguration.TIMELINE_SERVICE_VERSION, 1.0f);
|
conf.setFloat(YarnConfiguration.TIMELINE_SERVICE_VERSION, 1.0f);
|
||||||
conf.setInt(YarnConfiguration.NUMBER_OF_ASYNC_ENTITIES_TO_MERGE, 3);
|
conf.setInt(YarnConfiguration.NUMBER_OF_ASYNC_ENTITIES_TO_MERGE, 3);
|
||||||
|
if (!currTestName.getMethodName()
|
||||||
|
.contains("testRetryOnConnectionFailure")) {
|
||||||
client = createTimelineClient(conf);
|
client = createTimelineClient(conf);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Rule
|
||||||
|
public TestName currTestName = new TestName();
|
||||||
|
private YarnConfiguration conf;
|
||||||
|
|
||||||
private TestV2TimelineClient createTimelineClient(YarnConfiguration conf) {
|
private TestV2TimelineClient createTimelineClient(YarnConfiguration conf) {
|
||||||
ApplicationId id = ApplicationId.newInstance(0, 0);
|
ApplicationId id = ApplicationId.newInstance(0, 0);
|
||||||
|
@ -59,9 +70,34 @@ public class TestTimelineClientV2Impl {
|
||||||
return client;
|
return client;
|
||||||
}
|
}
|
||||||
|
|
||||||
private class TestV2TimelineClient extends TimelineClientImpl {
|
private class TestV2TimelineClientForExceptionHandling
|
||||||
|
extends TimelineClientImpl {
|
||||||
|
public TestV2TimelineClientForExceptionHandling(ApplicationId id) {
|
||||||
|
super(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected boolean throwYarnException;
|
||||||
|
|
||||||
|
public void setThrowYarnException(boolean throwYarnException) {
|
||||||
|
this.throwYarnException = throwYarnException;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void putObjects(URI base, String path,
|
||||||
|
MultivaluedMap<String, String> params, Object obj)
|
||||||
|
throws IOException, YarnException {
|
||||||
|
if (throwYarnException) {
|
||||||
|
throw new YarnException(EXCEPTION_MSG);
|
||||||
|
} else {
|
||||||
|
throw new IOException(
|
||||||
|
"Failed to get the response from the timeline server.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class TestV2TimelineClient
|
||||||
|
extends TestV2TimelineClientForExceptionHandling {
|
||||||
private boolean sleepBeforeReturn;
|
private boolean sleepBeforeReturn;
|
||||||
private boolean throwException;
|
|
||||||
|
|
||||||
private List<TimelineEntities> publishedEntities;
|
private List<TimelineEntities> publishedEntities;
|
||||||
|
|
||||||
|
@ -75,10 +111,6 @@ public class TestTimelineClientV2Impl {
|
||||||
this.sleepBeforeReturn = sleepBeforeReturn;
|
this.sleepBeforeReturn = sleepBeforeReturn;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setThrowException(boolean throwException) {
|
|
||||||
this.throwException = throwException;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getNumOfTimelineEntitiesPublished() {
|
public int getNumOfTimelineEntitiesPublished() {
|
||||||
return publishedEntities.size();
|
return publishedEntities.size();
|
||||||
}
|
}
|
||||||
|
@ -91,7 +123,7 @@ public class TestTimelineClientV2Impl {
|
||||||
protected void putObjects(String path,
|
protected void putObjects(String path,
|
||||||
MultivaluedMap<String, String> params, Object obj)
|
MultivaluedMap<String, String> params, Object obj)
|
||||||
throws IOException, YarnException {
|
throws IOException, YarnException {
|
||||||
if (throwException) {
|
if (throwYarnException) {
|
||||||
throw new YarnException("ActualException");
|
throw new YarnException("ActualException");
|
||||||
}
|
}
|
||||||
publishedEntities.add((TimelineEntities) obj);
|
publishedEntities.add((TimelineEntities) obj);
|
||||||
|
@ -105,6 +137,45 @@ public class TestTimelineClientV2Impl {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExceptionMultipleRetry() {
|
||||||
|
TestV2TimelineClientForExceptionHandling client =
|
||||||
|
new TestV2TimelineClientForExceptionHandling(
|
||||||
|
ApplicationId.newInstance(0, 0));
|
||||||
|
int maxRetries = 2;
|
||||||
|
conf.setInt(YarnConfiguration.TIMELINE_SERVICE_CLIENT_MAX_RETRIES,
|
||||||
|
maxRetries);
|
||||||
|
client.init(conf);
|
||||||
|
client.start();
|
||||||
|
client.setTimelineServiceAddress("localhost:12345");
|
||||||
|
try {
|
||||||
|
client.putEntities(new TimelineEntity());
|
||||||
|
} catch (IOException e) {
|
||||||
|
Assert.fail("YARN exception is expected");
|
||||||
|
} catch (YarnException e) {
|
||||||
|
Throwable cause = e.getCause();
|
||||||
|
Assert.assertTrue("IOException is expected",
|
||||||
|
cause instanceof IOException);
|
||||||
|
Assert.assertTrue("YARN exception is expected",
|
||||||
|
cause.getMessage().contains(
|
||||||
|
"TimelineClient has reached to max retry times : " + maxRetries));
|
||||||
|
}
|
||||||
|
|
||||||
|
client.setThrowYarnException(true);
|
||||||
|
try {
|
||||||
|
client.putEntities(new TimelineEntity());
|
||||||
|
} catch (IOException e) {
|
||||||
|
Assert.fail("YARN exception is expected");
|
||||||
|
} catch (YarnException e) {
|
||||||
|
Throwable cause = e.getCause();
|
||||||
|
Assert.assertTrue("YARN exception is expected",
|
||||||
|
cause instanceof YarnException);
|
||||||
|
Assert.assertTrue("YARN exception is expected",
|
||||||
|
cause.getMessage().contains(EXCEPTION_MSG));
|
||||||
|
}
|
||||||
|
client.stop();
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testPostEntities() throws Exception {
|
public void testPostEntities() throws Exception {
|
||||||
try {
|
try {
|
||||||
|
@ -189,7 +260,7 @@ public class TestTimelineClientV2Impl {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExceptionCalls() throws Exception {
|
public void testExceptionCalls() throws Exception {
|
||||||
client.setThrowException(true);
|
client.setThrowYarnException(true);
|
||||||
try {
|
try {
|
||||||
client.putEntitiesAsync(generateEntity("1"));
|
client.putEntitiesAsync(generateEntity("1"));
|
||||||
} catch (YarnException e) {
|
} catch (YarnException e) {
|
||||||
|
|
|
@ -69,4 +69,12 @@ public class ContainerMetricsConstants {
|
||||||
|
|
||||||
public static final String ALLOCATED_HOST_HTTP_ADDRESS_ENTITY_INFO =
|
public static final String ALLOCATED_HOST_HTTP_ADDRESS_ENTITY_INFO =
|
||||||
"YARN_CONTAINER_ALLOCATED_HOST_HTTP_ADDRESS";
|
"YARN_CONTAINER_ALLOCATED_HOST_HTTP_ADDRESS";
|
||||||
|
|
||||||
|
// Event of this type will be emitted by NM.
|
||||||
|
public static final String LOCALIZATION_START_EVENT_TYPE =
|
||||||
|
"YARN_NM_CONTAINER_LOCALIZATION_STARTED";
|
||||||
|
|
||||||
|
// Event of this type will be emitted by NM.
|
||||||
|
public static final String LOCALIZATION_FINISHED_EVENT_TYPE =
|
||||||
|
"YARN_NM_CONTAINER_LOCALIZATION_FINISHED";
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,7 +55,6 @@ import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeLabel;
|
import org.apache.hadoop.yarn.api.records.NodeLabel;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
import org.apache.hadoop.yarn.api.records.ResourceUtilization;
|
import org.apache.hadoop.yarn.api.records.ResourceUtilization;
|
||||||
import org.apache.hadoop.yarn.client.api.TimelineClient;
|
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.event.Dispatcher;
|
import org.apache.hadoop.yarn.event.Dispatcher;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
|
@ -89,6 +88,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Cont
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
|
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.nodelabels.NodeLabelsProvider;
|
import org.apache.hadoop.yarn.server.nodemanager.nodelabels.NodeLabelsProvider;
|
||||||
|
import org.apache.hadoop.yarn.server.nodemanager.timelineservice.NMTimelinePublisher;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.util.NodeManagerHardwareUtils;
|
import org.apache.hadoop.yarn.server.nodemanager.util.NodeManagerHardwareUtils;
|
||||||
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
|
@ -983,9 +983,11 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
||||||
LOG.debug("Sync a new collector address: " + collectorAddr +
|
LOG.debug("Sync a new collector address: " + collectorAddr +
|
||||||
" for application: " + appId + " from RM.");
|
" for application: " + appId + " from RM.");
|
||||||
}
|
}
|
||||||
TimelineClient client = application.getTimelineClient();
|
NMTimelinePublisher nmTimelinePublisher =
|
||||||
if (client != null) {
|
context.getNMTimelinePublisher();
|
||||||
client.setTimelineServiceAddress(collectorAddr);
|
if (nmTimelinePublisher != null) {
|
||||||
|
nmTimelinePublisher.setTimelineServiceAddress(
|
||||||
|
application.getAppId(), collectorAddr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.ipc.Server;
|
import org.apache.hadoop.ipc.Server;
|
||||||
import org.apache.hadoop.service.CompositeService;
|
import org.apache.hadoop.service.CompositeService;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.client.api.TimelineClient;
|
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.ipc.YarnRPC;
|
import org.apache.hadoop.yarn.ipc.YarnRPC;
|
||||||
|
@ -42,6 +41,7 @@ import org.apache.hadoop.yarn.server.api.records.AppCollectorsMap;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.Context;
|
import org.apache.hadoop.yarn.server.nodemanager.Context;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.NodeManager;
|
import org.apache.hadoop.yarn.server.nodemanager.NodeManager;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
|
||||||
|
import org.apache.hadoop.yarn.server.nodemanager.timelineservice.NMTimelinePublisher;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Service that handles collector information. It is used only if the timeline
|
* Service that handles collector information. It is used only if the timeline
|
||||||
|
@ -116,10 +116,10 @@ public class NMCollectorService extends CompositeService implements
|
||||||
String collectorAddr = collector.getCollectorAddr();
|
String collectorAddr = collector.getCollectorAddr();
|
||||||
newCollectorsMap.put(appId, collectorAddr);
|
newCollectorsMap.put(appId, collectorAddr);
|
||||||
// set registered collector address to TimelineClient.
|
// set registered collector address to TimelineClient.
|
||||||
TimelineClient client =
|
NMTimelinePublisher nmTimelinePublisher =
|
||||||
context.getApplications().get(appId).getTimelineClient();
|
context.getNMTimelinePublisher();
|
||||||
if (client != null) {
|
if (nmTimelinePublisher != null) {
|
||||||
client.setTimelineServiceAddress(collectorAddr);
|
nmTimelinePublisher.setTimelineServiceAddress(appId, collectorAddr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
((NodeManager.NMContext)context).addRegisteredCollectors(
|
((NodeManager.NMContext)context).addRegisteredCollectors(
|
||||||
|
|
|
@ -22,7 +22,6 @@ import java.util.Map;
|
||||||
|
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
import org.apache.hadoop.yarn.client.api.TimelineClient;
|
|
||||||
import org.apache.hadoop.yarn.event.EventHandler;
|
import org.apache.hadoop.yarn.event.EventHandler;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
||||||
|
|
||||||
|
@ -41,7 +40,4 @@ public interface Application extends EventHandler<ApplicationEvent> {
|
||||||
String getFlowVersion();
|
String getFlowVersion();
|
||||||
|
|
||||||
long getFlowRunId();
|
long getFlowRunId();
|
||||||
|
|
||||||
TimelineClient getTimelineClient();
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,6 +58,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.logaggregation
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppFinishedEvent;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppFinishedEvent;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppStartedEvent;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppStartedEvent;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
|
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
|
||||||
|
import org.apache.hadoop.yarn.server.nodemanager.timelineservice.NMTimelinePublisher;
|
||||||
import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
|
import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
|
||||||
import org.apache.hadoop.yarn.state.InvalidStateTransitionException;
|
import org.apache.hadoop.yarn.state.InvalidStateTransitionException;
|
||||||
import org.apache.hadoop.yarn.state.MultipleArcTransition;
|
import org.apache.hadoop.yarn.state.MultipleArcTransition;
|
||||||
|
@ -83,7 +84,6 @@ public class ApplicationImpl implements Application {
|
||||||
private final ReadLock readLock;
|
private final ReadLock readLock;
|
||||||
private final WriteLock writeLock;
|
private final WriteLock writeLock;
|
||||||
private final Context context;
|
private final Context context;
|
||||||
private TimelineClient timelineClient;
|
|
||||||
|
|
||||||
private static final Log LOG = LogFactory.getLog(ApplicationImpl.class);
|
private static final Log LOG = LogFactory.getLog(ApplicationImpl.class);
|
||||||
|
|
||||||
|
@ -143,7 +143,7 @@ public class ApplicationImpl implements Application {
|
||||||
}
|
}
|
||||||
this.flowContext = flowContext;
|
this.flowContext = flowContext;
|
||||||
if (YarnConfiguration.systemMetricsPublisherEnabled(conf)) {
|
if (YarnConfiguration.systemMetricsPublisherEnabled(conf)) {
|
||||||
createAndStartTimelineClient(conf);
|
context.getNMTimelinePublisher().createTimelineClient(appId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -175,13 +175,6 @@ public class ApplicationImpl implements Application {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void createAndStartTimelineClient(Configuration conf) {
|
|
||||||
// create and start timeline client
|
|
||||||
this.timelineClient = TimelineClient.createTimelineClient(appId);
|
|
||||||
timelineClient.init(conf);
|
|
||||||
timelineClient.start();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getUser() {
|
public String getUser() {
|
||||||
return user.toString();
|
return user.toString();
|
||||||
|
@ -192,11 +185,6 @@ public class ApplicationImpl implements Application {
|
||||||
return appId;
|
return appId;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public TimelineClient getTimelineClient() {
|
|
||||||
return timelineClient;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ApplicationState getApplicationState() {
|
public ApplicationState getApplicationState() {
|
||||||
this.readLock.lock();
|
this.readLock.lock();
|
||||||
|
@ -575,9 +563,10 @@ public class ApplicationImpl implements Application {
|
||||||
registeredCollectors.remove(app.getAppId());
|
registeredCollectors.remove(app.getAppId());
|
||||||
}
|
}
|
||||||
// stop timelineClient when application get finished.
|
// stop timelineClient when application get finished.
|
||||||
TimelineClient timelineClient = app.getTimelineClient();
|
NMTimelinePublisher nmTimelinePublisher =
|
||||||
if (timelineClient != null) {
|
app.context.getNMTimelinePublisher();
|
||||||
timelineClient.stop();
|
if (nmTimelinePublisher != null) {
|
||||||
|
nmTimelinePublisher.stopTimelineClient(app.getAppId());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,8 +18,10 @@
|
||||||
|
|
||||||
package org.apache.hadoop.yarn.server.nodemanager.timelineservice;
|
package org.apache.hadoop.yarn.server.nodemanager.timelineservice;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
@ -29,7 +31,6 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
import org.apache.hadoop.yarn.api.records.Priority;
|
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
import org.apache.hadoop.yarn.api.records.timelineservice.ContainerEntity;
|
import org.apache.hadoop.yarn.api.records.timelineservice.ContainerEntity;
|
||||||
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
|
||||||
|
@ -41,16 +42,15 @@ import org.apache.hadoop.yarn.client.api.TimelineClient;
|
||||||
import org.apache.hadoop.yarn.event.AsyncDispatcher;
|
import org.apache.hadoop.yarn.event.AsyncDispatcher;
|
||||||
import org.apache.hadoop.yarn.event.Dispatcher;
|
import org.apache.hadoop.yarn.event.Dispatcher;
|
||||||
import org.apache.hadoop.yarn.event.EventHandler;
|
import org.apache.hadoop.yarn.event.EventHandler;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.server.metrics.ContainerMetricsConstants;
|
import org.apache.hadoop.yarn.server.metrics.ContainerMetricsConstants;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.Context;
|
import org.apache.hadoop.yarn.server.nodemanager.Context;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationContainerFinishedEvent;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationContainerFinishedEvent;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType;
|
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ContainerLocalizationEvent;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizationEvent;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizationEvent;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizationEventType;
|
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl.ContainerMetric;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl.ContainerMetric;
|
||||||
import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
|
import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
|
||||||
import org.apache.hadoop.yarn.util.timeline.TimelineUtils;
|
import org.apache.hadoop.yarn.util.timeline.TimelineUtils;
|
||||||
|
@ -72,9 +72,12 @@ public class NMTimelinePublisher extends CompositeService {
|
||||||
|
|
||||||
private String httpAddress;
|
private String httpAddress;
|
||||||
|
|
||||||
|
protected final Map<ApplicationId, TimelineClient> appToClientMap;
|
||||||
|
|
||||||
public NMTimelinePublisher(Context context) {
|
public NMTimelinePublisher(Context context) {
|
||||||
super(NMTimelinePublisher.class.getName());
|
super(NMTimelinePublisher.class.getName());
|
||||||
this.context = context;
|
this.context = context;
|
||||||
|
appToClientMap = new ConcurrentHashMap<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -82,12 +85,6 @@ public class NMTimelinePublisher extends CompositeService {
|
||||||
dispatcher = new AsyncDispatcher();
|
dispatcher = new AsyncDispatcher();
|
||||||
dispatcher.register(NMTimelineEventType.class,
|
dispatcher.register(NMTimelineEventType.class,
|
||||||
new ForwardingEventHandler());
|
new ForwardingEventHandler());
|
||||||
dispatcher
|
|
||||||
.register(ContainerEventType.class, new ContainerEventHandler());
|
|
||||||
dispatcher.register(ApplicationEventType.class,
|
|
||||||
new ApplicationEventHandler());
|
|
||||||
dispatcher.register(LocalizationEventType.class,
|
|
||||||
new LocalizationEventDispatcher());
|
|
||||||
addIfService(dispatcher);
|
addIfService(dispatcher);
|
||||||
super.serviceInit(conf);
|
super.serviceInit(conf);
|
||||||
}
|
}
|
||||||
|
@ -112,7 +109,6 @@ public class NMTimelinePublisher extends CompositeService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public void reportContainerResourceUsage(Container container, Long pmemUsage,
|
public void reportContainerResourceUsage(Container container, Long pmemUsage,
|
||||||
Float cpuUsagePercentPerCore) {
|
Float cpuUsagePercentPerCore) {
|
||||||
if (pmemUsage != ResourceCalculatorProcessTree.UNAVAILABLE ||
|
if (pmemUsage != ResourceCalculatorProcessTree.UNAVAILABLE ||
|
||||||
|
@ -133,15 +129,32 @@ public class NMTimelinePublisher extends CompositeService {
|
||||||
Math.round(cpuUsagePercentPerCore));
|
Math.round(cpuUsagePercentPerCore));
|
||||||
entity.addMetric(cpuMetric);
|
entity.addMetric(cpuMetric);
|
||||||
}
|
}
|
||||||
dispatcher.getEventHandler()
|
ApplicationId appId = container.getContainerId().getApplicationAttemptId()
|
||||||
.handle(new TimelinePublishEvent(entity, container.getContainerId()
|
.getApplicationId();
|
||||||
.getApplicationAttemptId().getApplicationId()));
|
try {
|
||||||
|
// no need to put it as part of publisher as timeline client already has
|
||||||
|
// Queuing concept
|
||||||
|
TimelineClient timelineClient = getTimelineClient(appId);
|
||||||
|
if (timelineClient != null) {
|
||||||
|
timelineClient.putEntitiesAsync(entity);
|
||||||
|
} else {
|
||||||
|
LOG.error("Seems like client has been removed before the container"
|
||||||
|
+ " metric could be published for " + container.getContainerId());
|
||||||
|
}
|
||||||
|
} catch (IOException | YarnException e) {
|
||||||
|
LOG.error("Failed to publish Container metrics for container "
|
||||||
|
+ container.getContainerId(), e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void publishContainerCreatedEvent(ContainerEntity entity,
|
@SuppressWarnings("unchecked")
|
||||||
ContainerId containerId, Resource resource, Priority priority,
|
private void publishContainerCreatedEvent(ContainerEvent event) {
|
||||||
long timestamp) {
|
ContainerId containerId = event.getContainerID();
|
||||||
|
ContainerEntity entity = createContainerEntity(containerId);
|
||||||
|
Container container = context.getContainers().get(containerId);
|
||||||
|
Resource resource = container.getResource();
|
||||||
|
|
||||||
Map<String, Object> entityInfo = new HashMap<String, Object>();
|
Map<String, Object> entityInfo = new HashMap<String, Object>();
|
||||||
entityInfo.put(ContainerMetricsConstants.ALLOCATED_MEMORY_ENTITY_INFO,
|
entityInfo.put(ContainerMetricsConstants.ALLOCATED_MEMORY_ENTITY_INFO,
|
||||||
resource.getMemory());
|
resource.getMemory());
|
||||||
|
@ -152,7 +165,7 @@ public class NMTimelinePublisher extends CompositeService {
|
||||||
entityInfo.put(ContainerMetricsConstants.ALLOCATED_PORT_ENTITY_INFO,
|
entityInfo.put(ContainerMetricsConstants.ALLOCATED_PORT_ENTITY_INFO,
|
||||||
nodeId.getPort());
|
nodeId.getPort());
|
||||||
entityInfo.put(ContainerMetricsConstants.ALLOCATED_PRIORITY_ENTITY_INFO,
|
entityInfo.put(ContainerMetricsConstants.ALLOCATED_PRIORITY_ENTITY_INFO,
|
||||||
priority.toString());
|
container.getPriority().toString());
|
||||||
entityInfo.put(
|
entityInfo.put(
|
||||||
ContainerMetricsConstants.ALLOCATED_HOST_HTTP_ADDRESS_ENTITY_INFO,
|
ContainerMetricsConstants.ALLOCATED_HOST_HTTP_ADDRESS_ENTITY_INFO,
|
||||||
httpAddress);
|
httpAddress);
|
||||||
|
@ -160,13 +173,15 @@ public class NMTimelinePublisher extends CompositeService {
|
||||||
|
|
||||||
TimelineEvent tEvent = new TimelineEvent();
|
TimelineEvent tEvent = new TimelineEvent();
|
||||||
tEvent.setId(ContainerMetricsConstants.CREATED_EVENT_TYPE);
|
tEvent.setId(ContainerMetricsConstants.CREATED_EVENT_TYPE);
|
||||||
tEvent.setTimestamp(timestamp);
|
tEvent.setTimestamp(event.getTimestamp());
|
||||||
|
|
||||||
entity.addEvent(tEvent);
|
entity.addEvent(tEvent);
|
||||||
entity.setCreatedTime(timestamp);
|
entity.setCreatedTime(event.getTimestamp());
|
||||||
putEntity(entity, containerId.getApplicationAttemptId().getApplicationId());
|
dispatcher.getEventHandler().handle(new TimelinePublishEvent(entity,
|
||||||
|
containerId.getApplicationAttemptId().getApplicationId()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
private void publishContainerFinishedEvent(ContainerStatus containerStatus,
|
private void publishContainerFinishedEvent(ContainerStatus containerStatus,
|
||||||
long timeStamp) {
|
long timeStamp) {
|
||||||
ContainerId containerId = containerStatus.getContainerId();
|
ContainerId containerId = containerStatus.getContainerId();
|
||||||
|
@ -186,7 +201,38 @@ public class NMTimelinePublisher extends CompositeService {
|
||||||
tEvent.setInfo(eventInfo);
|
tEvent.setInfo(eventInfo);
|
||||||
|
|
||||||
entity.addEvent(tEvent);
|
entity.addEvent(tEvent);
|
||||||
putEntity(entity, containerId.getApplicationAttemptId().getApplicationId());
|
|
||||||
|
dispatcher.getEventHandler().handle(new TimelinePublishEvent(entity,
|
||||||
|
containerId.getApplicationAttemptId().getApplicationId()));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void publishContainerLocalizationEvent(
|
||||||
|
ContainerLocalizationEvent event, String eventType) {
|
||||||
|
Container container = event.getContainer();
|
||||||
|
ContainerId containerId = container.getContainerId();
|
||||||
|
TimelineEntity entity = createContainerEntity(containerId);
|
||||||
|
|
||||||
|
TimelineEvent tEvent = new TimelineEvent();
|
||||||
|
tEvent.setId(eventType);
|
||||||
|
tEvent.setTimestamp(event.getTimestamp());
|
||||||
|
entity.addEvent(tEvent);
|
||||||
|
|
||||||
|
ApplicationId appId =
|
||||||
|
container.getContainerId().getApplicationAttemptId().getApplicationId();
|
||||||
|
try {
|
||||||
|
// no need to put it as part of publisher as timeline client already has
|
||||||
|
// Queuing concept
|
||||||
|
TimelineClient timelineClient = getTimelineClient(appId);
|
||||||
|
if (timelineClient != null) {
|
||||||
|
timelineClient.putEntitiesAsync(entity);
|
||||||
|
} else {
|
||||||
|
LOG.error("Seems like client has been removed before the event could be"
|
||||||
|
+ " published for " + container.getContainerId());
|
||||||
|
}
|
||||||
|
} catch (IOException | YarnException e) {
|
||||||
|
LOG.error("Failed to publish Container metrics for container "
|
||||||
|
+ container.getContainerId(), e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static ContainerEntity createContainerEntity(
|
private static ContainerEntity createContainerEntity(
|
||||||
|
@ -207,23 +253,33 @@ public class NMTimelinePublisher extends CompositeService {
|
||||||
LOG.debug("Publishing the entity " + entity + ", JSON-style content: "
|
LOG.debug("Publishing the entity " + entity + ", JSON-style content: "
|
||||||
+ TimelineUtils.dumpTimelineRecordtoJSON(entity));
|
+ TimelineUtils.dumpTimelineRecordtoJSON(entity));
|
||||||
}
|
}
|
||||||
TimelineClient timelineClient =
|
TimelineClient timelineClient = getTimelineClient(appId);
|
||||||
context.getApplications().get(appId).getTimelineClient();
|
if (timelineClient != null) {
|
||||||
timelineClient.putEntities(entity);
|
timelineClient.putEntities(entity);
|
||||||
|
} else {
|
||||||
|
LOG.error("Seems like client has been removed before the entity "
|
||||||
|
+ "could be published for " + entity);
|
||||||
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
LOG.error("Error when publishing entity " + entity, e);
|
LOG.error("Error when publishing entity " + entity, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public void publishApplicationEvent(ApplicationEvent event) {
|
public void publishApplicationEvent(ApplicationEvent event) {
|
||||||
// publish only when the desired event is received
|
// publish only when the desired event is received
|
||||||
switch (event.getType()) {
|
switch (event.getType()) {
|
||||||
case INIT_APPLICATION:
|
case INIT_APPLICATION:
|
||||||
case FINISH_APPLICATION:
|
case FINISH_APPLICATION:
|
||||||
case APPLICATION_CONTAINER_FINISHED:
|
|
||||||
case APPLICATION_LOG_HANDLING_FAILED:
|
case APPLICATION_LOG_HANDLING_FAILED:
|
||||||
dispatcher.getEventHandler().handle(event);
|
// TODO need to be handled in future,
|
||||||
|
// not sure to publish under which entity
|
||||||
|
break;
|
||||||
|
case APPLICATION_CONTAINER_FINISHED:
|
||||||
|
// this is actually used to publish the container Event
|
||||||
|
ApplicationContainerFinishedEvent evnt =
|
||||||
|
(ApplicationContainerFinishedEvent) event;
|
||||||
|
publishContainerFinishedEvent(evnt.getContainerStatus(),
|
||||||
|
event.getTimestamp());
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -235,12 +291,11 @@ public class NMTimelinePublisher extends CompositeService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public void publishContainerEvent(ContainerEvent event) {
|
public void publishContainerEvent(ContainerEvent event) {
|
||||||
// publish only when the desired event is received
|
// publish only when the desired event is received
|
||||||
switch (event.getType()) {
|
switch (event.getType()) {
|
||||||
case INIT_CONTAINER:
|
case INIT_CONTAINER:
|
||||||
dispatcher.getEventHandler().handle(event);
|
publishContainerCreatedEvent(event);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -253,15 +308,17 @@ public class NMTimelinePublisher extends CompositeService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public void publishLocalizationEvent(LocalizationEvent event) {
|
public void publishLocalizationEvent(LocalizationEvent event) {
|
||||||
// publish only when the desired event is received
|
// publish only when the desired event is received
|
||||||
switch (event.getType()) {
|
switch (event.getType()) {
|
||||||
case CONTAINER_RESOURCES_LOCALIZED:
|
case CONTAINER_RESOURCES_LOCALIZED:
|
||||||
case INIT_CONTAINER_RESOURCES:
|
publishContainerLocalizationEvent((ContainerLocalizationEvent) event,
|
||||||
dispatcher.getEventHandler().handle(event);
|
ContainerMetricsConstants.LOCALIZATION_FINISHED_EVENT_TYPE);
|
||||||
|
break;
|
||||||
|
case INIT_CONTAINER_RESOURCES:
|
||||||
|
publishContainerLocalizationEvent((ContainerLocalizationEvent) event,
|
||||||
|
ContainerMetricsConstants.LOCALIZATION_START_EVENT_TYPE);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug(event.getType()
|
LOG.debug(event.getType()
|
||||||
|
@ -272,64 +329,6 @@ public class NMTimelinePublisher extends CompositeService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class ApplicationEventHandler implements
|
|
||||||
EventHandler<ApplicationEvent> {
|
|
||||||
@Override
|
|
||||||
public void handle(ApplicationEvent event) {
|
|
||||||
switch (event.getType()) {
|
|
||||||
case APPLICATION_CONTAINER_FINISHED:
|
|
||||||
// this is actually used to publish the container Event
|
|
||||||
ApplicationContainerFinishedEvent evnt =
|
|
||||||
(ApplicationContainerFinishedEvent) event;
|
|
||||||
publishContainerFinishedEvent(evnt.getContainerStatus(),
|
|
||||||
event.getTimestamp());
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
LOG.error("Seems like event type is captured only in "
|
|
||||||
+ "publishApplicationEvent method and not handled here");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private class ContainerEventHandler implements EventHandler<ContainerEvent> {
|
|
||||||
@Override
|
|
||||||
public void handle(ContainerEvent event) {
|
|
||||||
ContainerId containerId = event.getContainerID();
|
|
||||||
Container container = context.getContainers().get(containerId);
|
|
||||||
long timestamp = event.getTimestamp();
|
|
||||||
ContainerEntity entity = createContainerEntity(containerId);
|
|
||||||
|
|
||||||
switch (event.getType()) {
|
|
||||||
case INIT_CONTAINER:
|
|
||||||
publishContainerCreatedEvent(entity, containerId,
|
|
||||||
container.getResource(), container.getPriority(), timestamp);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
LOG.error("Seems like event type is captured only in "
|
|
||||||
+ "publishContainerEvent method and not handled here");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static final class LocalizationEventDispatcher implements
|
|
||||||
EventHandler<LocalizationEvent> {
|
|
||||||
@Override
|
|
||||||
public void handle(LocalizationEvent event) {
|
|
||||||
switch (event.getType()) {
|
|
||||||
case INIT_CONTAINER_RESOURCES:
|
|
||||||
case CONTAINER_RESOURCES_LOCALIZED:
|
|
||||||
// TODO after priority based flush jira is finished
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
LOG.error("Seems like event type is captured only in "
|
|
||||||
+ "publishLocalizationEvent method and not handled here");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* EventHandler implementation which forward events to NMMetricsPublisher.
|
* EventHandler implementation which forward events to NMMetricsPublisher.
|
||||||
* Making use of it, NMMetricsPublisher can avoid to have a public handle
|
* Making use of it, NMMetricsPublisher can avoid to have a public handle
|
||||||
|
@ -363,4 +362,33 @@ public class NMTimelinePublisher extends CompositeService {
|
||||||
return entityToPublish;
|
return entityToPublish;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void createTimelineClient(ApplicationId appId) {
|
||||||
|
if (!appToClientMap.containsKey(appId)) {
|
||||||
|
TimelineClient timelineClient =
|
||||||
|
TimelineClient.createTimelineClient(appId);
|
||||||
|
timelineClient.init(getConfig());
|
||||||
|
timelineClient.start();
|
||||||
|
appToClientMap.put(appId, timelineClient);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void stopTimelineClient(ApplicationId appId) {
|
||||||
|
TimelineClient client = appToClientMap.remove(appId);
|
||||||
|
if (client != null) {
|
||||||
|
client.stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTimelineServiceAddress(ApplicationId appId,
|
||||||
|
String collectorAddr) {
|
||||||
|
TimelineClient client = appToClientMap.get(appId);
|
||||||
|
if (client != null) {
|
||||||
|
client.setTimelineServiceAddress(collectorAddr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private TimelineClient getTimelineClient(ApplicationId appId) {
|
||||||
|
return appToClientMap.get(appId);
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -20,14 +20,12 @@ package org.apache.hadoop.yarn.server.nodemanager.timelineservice;
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
import static org.mockito.Matchers.any;
|
|
||||||
import static org.mockito.Mockito.mock;
|
import static org.mockito.Mockito.mock;
|
||||||
import static org.mockito.Mockito.when;
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
import java.util.concurrent.ConcurrentMap;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
|
@ -39,7 +37,6 @@ import org.apache.hadoop.yarn.api.records.timelineservice.TimelineMetric;
|
||||||
import org.apache.hadoop.yarn.client.api.impl.TimelineClientImpl;
|
import org.apache.hadoop.yarn.client.api.impl.TimelineClientImpl;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.Context;
|
import org.apache.hadoop.yarn.server.nodemanager.Context;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
|
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
||||||
import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
|
import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
|
@ -53,20 +50,23 @@ public class TestNMTimelinePublisher {
|
||||||
public void testContainerResourceUsage() {
|
public void testContainerResourceUsage() {
|
||||||
Context context = mock(Context.class);
|
Context context = mock(Context.class);
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
ConcurrentMap<ApplicationId, Application> map = mock(ConcurrentMap.class);
|
final DummyTimelineClient timelineClient = new DummyTimelineClient();
|
||||||
Application aApp = mock(Application.class);
|
|
||||||
when(map.get(any(ApplicationId.class))).thenReturn(aApp);
|
|
||||||
DummyTimelineClient timelineClient = new DummyTimelineClient();
|
|
||||||
when(aApp.getTimelineClient()).thenReturn(timelineClient);
|
|
||||||
when(context.getApplications()).thenReturn(map);
|
|
||||||
when(context.getNodeId()).thenReturn(NodeId.newInstance("localhost", 0));
|
when(context.getNodeId()).thenReturn(NodeId.newInstance("localhost", 0));
|
||||||
when(context.getHttpPort()).thenReturn(0);
|
when(context.getHttpPort()).thenReturn(0);
|
||||||
NMTimelinePublisher publisher = new NMTimelinePublisher(context);
|
NMTimelinePublisher publisher = new NMTimelinePublisher(context) {
|
||||||
|
public void createTimelineClient(ApplicationId appId) {
|
||||||
|
if (!appToClientMap.containsKey(appId)) {
|
||||||
|
appToClientMap.put(appId, timelineClient);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
publisher.init(new Configuration());
|
publisher.init(new Configuration());
|
||||||
publisher.start();
|
publisher.start();
|
||||||
|
ApplicationId appId = ApplicationId.newInstance(0, 1);
|
||||||
|
publisher.createTimelineClient(appId);
|
||||||
Container aContainer = mock(Container.class);
|
Container aContainer = mock(Container.class);
|
||||||
when(aContainer.getContainerId()).thenReturn(ContainerId.newContainerId(
|
when(aContainer.getContainerId()).thenReturn(ContainerId.newContainerId(
|
||||||
ApplicationAttemptId.newInstance(ApplicationId.newInstance(0, 1), 1),
|
ApplicationAttemptId.newInstance(appId, 1),
|
||||||
0L));
|
0L));
|
||||||
publisher.reportContainerResourceUsage(aContainer, 1024L, 8F);
|
publisher.reportContainerResourceUsage(aContainer, 1024L, 8F);
|
||||||
verifyPublishedResourceUsageMetrics(timelineClient, 1024L, 8);
|
verifyPublishedResourceUsageMetrics(timelineClient, 1024L, 8);
|
||||||
|
@ -141,7 +141,7 @@ public class TestNMTimelinePublisher {
|
||||||
private TimelineEntity[] lastPublishedEntities;
|
private TimelineEntity[] lastPublishedEntities;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void putEntities(TimelineEntity... entities)
|
public void putEntitiesAsync(TimelineEntity... entities)
|
||||||
throws IOException, YarnException {
|
throws IOException, YarnException {
|
||||||
this.lastPublishedEntities = entities;
|
this.lastPublishedEntities = entities;
|
||||||
}
|
}
|
||||||
|
|
|
@ -101,9 +101,4 @@ public class MockApp implements Application {
|
||||||
public long getFlowRunId() {
|
public long getFlowRunId() {
|
||||||
return flowRunId;
|
return flowRunId;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public TimelineClient getTimelineClient() {
|
|
||||||
return timelineClient;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue