YARN-8649. NPE in localizer hearbeat processing if a container is killed while localizing. Contributed by lujie

(cherry picked from commit 585ebd873a)
This commit is contained in:
Jason Lowe 2018-08-23 09:29:46 -05:00
parent 4fa97013a9
commit 84654451fa
3 changed files with 24 additions and 5 deletions

View File

@ -500,6 +500,11 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
Path localPath = new Path(rPath, req.getPath().getName());
LocalizedResource rsrc = localrsrc.get(req);
if (rsrc == null) {
LOG.warn("Resource " + req + " has been removed"
+ " and will no longer be localized");
return null;
}
rsrc.setLocalPath(localPath);
LocalResource lr = LocalResource.newInstance(req.getResource(),
req.getType(), req.getVisibility(), req.getSize(),

View File

@ -888,6 +888,9 @@ public class ResourceLocalizationService extends CompositeService
Path publicDirDestPath =
publicRsrc.getPathForLocalization(key, publicRootPath,
delService);
if (publicDirDestPath == null) {
return;
}
if (!publicDirDestPath.getParent().equals(publicRootPath)) {
createParentDirs(publicDirDestPath, publicRootPath);
if (diskValidator != null) {
@ -1178,10 +1181,11 @@ public class ResourceLocalizationService extends CompositeService
LocalResourcesTracker tracker = getLocalResourcesTracker(
next.getVisibility(), user, applicationId);
if (tracker != null) {
ResourceLocalizationSpec resource =
NodeManagerBuilderUtils.newResourceLocalizationSpec(next,
getPathForLocalization(next, tracker));
rsrcs.add(resource);
Path localPath = getPathForLocalization(next, tracker);
if (localPath != null) {
rsrcs.add(NodeManagerBuilderUtils.newResourceLocalizationSpec(
next, localPath));
}
}
} catch (IOException e) {
LOG.error("local path for PRIVATE localization could not be " +

View File

@ -1717,8 +1717,18 @@ public class TestResourceLocalizationService {
assertEquals("NM should tell localizer to be LIVE in Heartbeat.",
LocalizerAction.LIVE, response.getLocalizerAction());
// Cleanup application.
// Cleanup container.
spyService.handle(new ContainerLocalizationCleanupEvent(c, rsrcs));
dispatcher.await();
try {
/*Directly send heartbeat to introduce race as container
is being cleaned up.*/
locRunnerForContainer.processHeartbeat(
Collections.singletonList(rsrcSuccess));
} catch (Exception e) {
fail("Exception should not have been thrown on processing heartbeat");
}
// Cleanup application.
spyService.handle(new ApplicationLocalizationEvent(
LocalizationEventType.DESTROY_APPLICATION_RESOURCES, app));
dispatcher.await();