From a3d4a25bbfe5e41393f790e77b8e457f13c8424d Mon Sep 17 00:00:00 2001 From: Rohith Sharma K S Date: Sat, 18 Aug 2018 10:26:55 +0530 Subject: [PATCH] YARN-8679. [ATSv2] If HBase cluster is down for long time, high chances that NM ContainerManager dispatcher get blocked. Contributed by Wangda Tan. (cherry picked from commit 4aacbfff605262aaf3dbd926258afcadc86c72c0) --- .../applicationsmanager/TestAMLaunchFailure.java | 2 +- .../TestSchedulerNegotiator.java | 2 +- .../TestTimelineServiceClientIntegration.java | 3 ++- .../security/TestTimelineAuthFilterForV2.java | 2 +- .../PerNodeTimelineCollectorsAuxService.java | 13 +++++++++---- 5 files changed, 14 insertions(+), 8 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMLaunchFailure.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMLaunchFailure.java index c0009dd465c..ad39099999d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMLaunchFailure.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMLaunchFailure.java @@ -90,7 +90,7 @@ public class TestAMLaunchFailure { // } // // @Override -// public void addApplication(ApplicationId applicationId, +// public void addApplicationIfAbsent(ApplicationId applicationId, // ApplicationMaster master, String user, String queue, Priority priority // , ApplicationStore appStore) // throws IOException { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestSchedulerNegotiator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestSchedulerNegotiator.java index 7d06e55e7f5..fedbf2b353d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestSchedulerNegotiator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestSchedulerNegotiator.java @@ -67,7 +67,7 @@ public class TestSchedulerNegotiator { // return null; // } // @Override -// public void addApplication(ApplicationId applicationId, +// public void addApplicationIfAbsent(ApplicationId applicationId, // ApplicationMaster master, String user, String queue, Priority priority, // ApplicationStore store) // throws IOException { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/timelineservice/TestTimelineServiceClientIntegration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/timelineservice/TestTimelineServiceClientIntegration.java index 6a5ef552510..7cf7428db28 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/timelineservice/TestTimelineServiceClientIntegration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/timelineservice/TestTimelineServiceClientIntegration.java @@ -81,7 +81,8 @@ public class TestTimelineServiceClientIntegration { auxService = PerNodeTimelineCollectorsAuxService.launchServer(new String[0], collectorManager, conf); - auxService.addApplication(ApplicationId.newInstance(0, 1), "user"); + auxService + .addApplicationIfAbsent(ApplicationId.newInstance(0, 1), "user"); } catch (ExitUtil.ExitException e) { fail(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/timelineservice/security/TestTimelineAuthFilterForV2.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/timelineservice/security/TestTimelineAuthFilterForV2.java index bb511d8178a..356bfc79dd1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/timelineservice/security/TestTimelineAuthFilterForV2.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/timelineservice/security/TestTimelineAuthFilterForV2.java @@ -210,7 +210,7 @@ public class TestTimelineAuthFilterForV2 { YarnConfiguration.TIMELINE_SERVICE_PRINCIPAL, "localhost"); } ApplicationId appId = ApplicationId.newInstance(0, 1); - auxService.addApplication( + auxService.addApplicationIfAbsent( appId, UserGroupInformation.getCurrentUser().getUserName()); if (!withKerberosLogin) { AppLevelTimelineCollector collector = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java index c15f99d3bb2..82dd793b8b2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java @@ -125,7 +125,7 @@ public class PerNodeTimelineCollectorsAuxService extends AuxiliaryService { * @param user Application Master container user. * @return whether it was added successfully */ - public boolean addApplication(ApplicationId appId, String user) { + public boolean addApplicationIfAbsent(ApplicationId appId, String user) { AppLevelTimelineCollector collector = new AppLevelTimelineCollectorWithAgg(appId, user); return (collectorManager.putIfAbsent(appId, collector) @@ -156,15 +156,15 @@ public class PerNodeTimelineCollectorsAuxService extends AuxiliaryService { if (context.getContainerType() == ContainerType.APPLICATION_MASTER) { ApplicationId appId = context.getContainerId(). getApplicationAttemptId().getApplicationId(); - synchronized (appIdToContainerId) { + synchronized (appIdToContainerId){ Set masterContainers = appIdToContainerId.get(appId); if (masterContainers == null) { masterContainers = new HashSet<>(); appIdToContainerId.put(appId, masterContainers); } masterContainers.add(context.getContainerId()); - addApplication(appId, context.getUser()); } + addApplicationIfAbsent(appId, context.getUser()); } } @@ -189,6 +189,7 @@ public class PerNodeTimelineCollectorsAuxService extends AuxiliaryService { containerId.getApplicationAttemptId().getApplicationId(); return scheduler.schedule(new Runnable() { public void run() { + boolean shouldRemoveApplication = false; synchronized (appIdToContainerId) { Set masterContainers = appIdToContainerId.get(appId); if (masterContainers == null) { @@ -199,10 +200,14 @@ public class PerNodeTimelineCollectorsAuxService extends AuxiliaryService { masterContainers.remove(containerId); if (masterContainers.size() == 0) { // remove only if it is last master container - removeApplication(appId); + shouldRemoveApplication = true; appIdToContainerId.remove(appId); } } + + if (shouldRemoveApplication) { + removeApplication(appId); + } } }, collectorLingerPeriod, TimeUnit.MILLISECONDS); }