From 585ebd873a55bedd2a364d256837f08ada8ba032 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Thu, 23 Aug 2018 09:29:46 -0500 Subject: [PATCH] YARN-8649. NPE in localizer hearbeat processing if a container is killed while localizing. Contributed by lujie --- .../localizer/LocalResourcesTrackerImpl.java | 5 +++++ .../localizer/ResourceLocalizationService.java | 12 ++++++++---- .../localizer/TestResourceLocalizationService.java | 12 +++++++++++- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTrackerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTrackerImpl.java index dd315431a13..ad24c62828f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTrackerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTrackerImpl.java @@ -500,6 +500,11 @@ public Path getPathForLocalization(LocalResourceRequest req, Path localPath = new Path(rPath, req.getPath().getName()); LocalizedResource rsrc = localrsrc.get(req); + if (rsrc == null) { + LOG.warn("Resource " + req + " has been removed" + + " and will no longer be localized"); + return null; + } rsrc.setLocalPath(localPath); LocalResource lr = LocalResource.newInstance(req.getResource(), req.getType(), req.getVisibility(), req.getSize(), diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java index 2892d1fab47..d9b887f56de 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java @@ -885,6 +885,9 @@ public void addResource(LocalizerResourceRequestEvent request) { Path publicDirDestPath = publicRsrc.getPathForLocalization(key, publicRootPath, delService); + if (publicDirDestPath == null) { + return; + } if (!publicDirDestPath.getParent().equals(publicRootPath)) { createParentDirs(publicDirDestPath, publicRootPath); if (diskValidator != null) { @@ -1175,10 +1178,11 @@ LocalizerHeartbeatResponse processHeartbeat( LocalResourcesTracker tracker = getLocalResourcesTracker( next.getVisibility(), user, applicationId); if (tracker != null) { - ResourceLocalizationSpec resource = - NodeManagerBuilderUtils.newResourceLocalizationSpec(next, - getPathForLocalization(next, tracker)); - rsrcs.add(resource); + Path localPath = getPathForLocalization(next, tracker); + if (localPath != null) { + rsrcs.add(NodeManagerBuilderUtils.newResourceLocalizationSpec( + next, localPath)); + } } } catch (IOException e) { LOG.error("local path for PRIVATE localization could not be " + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java index 2b9148e5c16..21896ca4c01 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java @@ -1717,8 +1717,18 @@ dispatcher, exec, mock(DeletionService.class), dirsHandler, nmContext, assertEquals("NM should tell localizer to be LIVE in Heartbeat.", LocalizerAction.LIVE, response.getLocalizerAction()); - // Cleanup application. + // Cleanup container. spyService.handle(new ContainerLocalizationCleanupEvent(c, rsrcs)); + dispatcher.await(); + try { + /*Directly send heartbeat to introduce race as container + is being cleaned up.*/ + locRunnerForContainer.processHeartbeat( + Collections.singletonList(rsrcSuccess)); + } catch (Exception e) { + fail("Exception should not have been thrown on processing heartbeat"); + } + // Cleanup application. spyService.handle(new ApplicationLocalizationEvent( LocalizationEventType.DESTROY_APPLICATION_RESOURCES, app)); dispatcher.await();