From f4d280f02b557885cd5e5cf36abc36eb579ccfb4 Mon Sep 17 00:00:00 2001 From: Eric Yang Date: Wed, 2 May 2018 20:07:19 -0400 Subject: [PATCH] YARN-8194. Fixed reinitialization error for LinuxContainerExecutor. Contributed by Chandni Singh --- .../launcher/ContainerLaunch.java | 37 +++++++++++++++++++ .../launcher/ContainerRelaunch.java | 36 +----------------- 2 files changed, 39 insertions(+), 34 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java index 9efe6864f52..fa778994c8d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java @@ -20,6 +20,8 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher; import static org.apache.hadoop.fs.CreateFlag.CREATE; import static org.apache.hadoop.fs.CreateFlag.OVERWRITE; + +import org.apache.hadoop.yarn.server.nodemanager.executor.DeletionAsUserContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -844,6 +846,7 @@ public class ContainerLaunch implements Callable { throw new IOException("Reap container failed for container " + containerIdStr); } + cleanupContainerFiles(getContainerWorkDir()); } /** @@ -1858,4 +1861,38 @@ public class ContainerLaunch implements Callable { context.getNMStateStore().storeContainerWorkDir(containerId, workDir); } } + + protected Path getContainerWorkDir() throws IOException { + String containerWorkDir = container.getWorkDir(); + if (containerWorkDir == null + || !dirsHandler.isGoodLocalDir(containerWorkDir)) { + throw new IOException( + "Could not find a good work dir " + containerWorkDir + + " for container " + container); + } + + return new Path(containerWorkDir); + } + + /** + * Clean up container's files for container relaunch or cleanup. + */ + protected void cleanupContainerFiles(Path containerWorkDir) { + LOG.debug("cleanup container {} files", containerWorkDir); + // delete ContainerScriptPath + deleteAsUser(new Path(containerWorkDir, CONTAINER_SCRIPT)); + // delete TokensPath + deleteAsUser(new Path(containerWorkDir, FINAL_CONTAINER_TOKENS_FILE)); + } + + private void deleteAsUser(Path path) { + try { + exec.deleteAsUser(new DeletionAsUserContext.Builder() + .setUser(container.getUser()) + .setSubDir(path) + .build()); + } catch (Exception e) { + LOG.warn("Failed to delete " + path, e); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerRelaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerRelaunch.java index c6e3ed4c646..f69cf967aef 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerRelaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerRelaunch.java @@ -34,7 +34,6 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Cont import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext; -import org.apache.hadoop.yarn.server.nodemanager.executor.DeletionAsUserContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -71,7 +70,8 @@ public class ContainerRelaunch extends ContainerLaunch { Path containerLogDir; try { Path containerWorkDir = getContainerWorkDir(); - cleanupPreviousContainerFiles(containerWorkDir); + // Clean up container's previous files for container relaunch. + cleanupContainerFiles(containerWorkDir); containerLogDir = getContainerLogDir(); @@ -148,17 +148,6 @@ public class ContainerRelaunch extends ContainerLaunch { return ret; } - private Path getContainerWorkDir() throws IOException { - String containerWorkDir = container.getWorkDir(); - if (containerWorkDir == null - || !dirsHandler.isGoodLocalDir(containerWorkDir)) { - throw new IOException( - "Could not find a good work dir " + containerWorkDir - + " for container " + container); - } - - return new Path(containerWorkDir); - } private Path getContainerLogDir() throws IOException { String containerLogDir = container.getLogDir(); @@ -190,25 +179,4 @@ public class ContainerRelaunch extends ContainerLaunch { return dirsHandler.getLocalPathForRead( getPidFileSubpath(appIdStr, containerIdStr)); } - - /** - * Clean up container's previous files for container relaunch. - */ - private void cleanupPreviousContainerFiles(Path containerWorkDir) { - // delete ContainerScriptPath - deleteAsUser(new Path(containerWorkDir, CONTAINER_SCRIPT)); - // delete TokensPath - deleteAsUser(new Path(containerWorkDir, FINAL_CONTAINER_TOKENS_FILE)); - } - - private void deleteAsUser(Path path) { - try { - exec.deleteAsUser(new DeletionAsUserContext.Builder() - .setUser(container.getUser()) - .setSubDir(path) - .build()); - } catch (Exception e) { - LOG.warn("Failed to delete " + path, e); - } - } }