diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index c43c8733e19..616e7c3901d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -1981,6 +1981,11 @@ public static boolean isAclEnabled(Configuration conf) {
public static final int
DEFAULT_TIMELINE_SERVICE_WRITER_FLUSH_INTERVAL_SECONDS = 60;
+ public static final String ATS_APP_COLLECTOR_LINGER_PERIOD_IN_MS =
+ TIMELINE_SERVICE_PREFIX + "app-collector.linger-period.ms";
+
+ public static final int DEFAULT_ATS_APP_COLLECTOR_LINGER_PERIOD_IN_MS = 1000;
+
// mark app-history related configs @Private as application history is going
// to be integrated into the timeline service
@Private
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index a33c131c918..2d3de01571c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -2213,6 +2213,13 @@
60
+
+ Time period till which the application collector will be alive
+ in NM, after the application master container finishes.
+ yarn.timeline-service.app-collector.linger-period.ms
+ 1000
+
+
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java
index 0319e34a5e7..b73853003b2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java
@@ -19,6 +19,9 @@
package org.apache.hadoop.yarn.server.timelineservice.collector;
import java.nio.ByteBuffer;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -54,6 +57,8 @@ public class PerNodeTimelineCollectorsAuxService extends AuxiliaryService {
private static final int SHUTDOWN_HOOK_PRIORITY = 30;
private final NodeTimelineCollectorManager collectorManager;
+ private long collectorLingerPeriod;
+ private ScheduledExecutorService scheduler;
public PerNodeTimelineCollectorsAuxService() {
this(new NodeTimelineCollectorManager());
@@ -70,6 +75,10 @@ protected void serviceInit(Configuration conf) throws Exception {
if (!YarnConfiguration.timelineServiceV2Enabled(conf)) {
throw new YarnException("Timeline service v2 is not enabled");
}
+ collectorLingerPeriod =
+ conf.getLong(YarnConfiguration.ATS_APP_COLLECTOR_LINGER_PERIOD_IN_MS,
+ YarnConfiguration.DEFAULT_ATS_APP_COLLECTOR_LINGER_PERIOD_IN_MS);
+ scheduler = Executors.newSingleThreadScheduledExecutor();
collectorManager.init(conf);
super.serviceInit(conf);
}
@@ -82,6 +91,12 @@ protected void serviceStart() throws Exception {
@Override
protected void serviceStop() throws Exception {
+ scheduler.shutdown();
+ if (!scheduler.awaitTermination(collectorLingerPeriod,
+ TimeUnit.MILLISECONDS)) {
+ LOG.warn(
+ "Scheduler terminated before removing the application collectors");
+ }
collectorManager.stop();
super.serviceStop();
}
@@ -141,17 +156,11 @@ public void stopContainer(ContainerTerminationContext context) {
if (context.getContainerType() == ContainerType.APPLICATION_MASTER) {
final ApplicationId appId =
context.getContainerId().getApplicationAttemptId().getApplicationId();
- new Thread(new Runnable() {
+ scheduler.schedule(new Runnable() {
public void run() {
- try {
- // TODO Temporary Fix until solution for YARN-3995 is finalized.
- Thread.sleep(1000l);
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
removeApplication(appId);
}
- }).start();
+ }, collectorLingerPeriod, TimeUnit.MILLISECONDS);
}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/test/java/org/apache/hadoop/yarn/server/timelineservice/collector/TestPerNodeTimelineCollectorsAuxService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/test/java/org/apache/hadoop/yarn/server/timelineservice/collector/TestPerNodeTimelineCollectorsAuxService.java
index 4fdf47e062b..f2775d5657c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/test/java/org/apache/hadoop/yarn/server/timelineservice/collector/TestPerNodeTimelineCollectorsAuxService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/test/java/org/apache/hadoop/yarn/server/timelineservice/collector/TestPerNodeTimelineCollectorsAuxService.java
@@ -22,12 +22,14 @@
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
-import static org.mockito.Mockito.any;
+import static org.mockito.Matchers.any;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.when;
+import java.io.IOException;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.Shell;
@@ -45,8 +47,6 @@
import org.junit.After;
import org.junit.Test;
-import java.io.IOException;
-
public class TestPerNodeTimelineCollectorsAuxService {
private ApplicationAttemptId appAttemptId;
private PerNodeTimelineCollectorsAuxService auxService;
@@ -103,8 +103,9 @@ public void testRemoveApplication() throws Exception {
when(context.getContainerType()).thenReturn(
ContainerType.APPLICATION_MASTER);
auxService.stopContainer(context);
-
- // TODO Temporary Fix until solution for YARN-3995 is finalized
+ // auxService should have the app's collector and need to remove only after
+ // a configured period
+ assertTrue(auxService.hasApplication(appAttemptId.getApplicationId()));
for (int i = 0; i < 4; i++) {
Thread.sleep(500l);
if (!auxService.hasApplication(appAttemptId.getApplicationId())) {