diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index d726c2def1a..ab99d670283 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -54,6 +54,9 @@ Release 2.3.0 - UNRELEASED YARN-1268. TestFairScheduler.testContinuousScheduling is flaky (Sandy Ryza) + YARN-1284. LCE: Race condition leaves dangling cgroups entries for killed + containers. (Alejandro Abdelnur via Sandy Ryza) + Release 2.2.1 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 2003e13c3b5..be9e301b913 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -674,7 +674,19 @@ public class YarnConfiguration extends Configuration { /** Where the linux container executor should mount cgroups if not found */ public static final String NM_LINUX_CONTAINER_CGROUPS_MOUNT_PATH = NM_PREFIX + "linux-container-executor.cgroups.mount-path"; - + + + /** + * Interval of time the linux container executor should try cleaning up + * cgroups entry when cleaning up a container. This is required due to what + * it seems a race condition because the SIGTERM/SIGKILL is asynch. + */ + public static final String NM_LINUX_CONTAINER_CGROUPS_DELETE_TIMEOUT = + NM_PREFIX + "linux-container-executor.cgroups.delete-timeout-ms"; + + public static final long DEFAULT_NM_LINUX_CONTAINER_CGROUPS_DELETE_TIMEOUT = + 1000; + /** T-file compression types used to compress aggregated logs.*/ public static final String NM_LOG_AGG_COMPRESSION_TYPE = NM_PREFIX + "log-aggregation.compression-type"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/CgroupsLCEResourcesHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/CgroupsLCEResourcesHandler.java index 3a44afee0ad..d5bd22540f3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/CgroupsLCEResourcesHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/CgroupsLCEResourcesHandler.java @@ -32,6 +32,7 @@ import java.util.Map.Entry; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.google.common.annotations.VisibleForTesting; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -40,6 +41,8 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor; +import org.apache.hadoop.yarn.util.Clock; +import org.apache.hadoop.yarn.util.SystemClock; public class CgroupsLCEResourcesHandler implements LCEResourcesHandler { @@ -59,8 +62,13 @@ public class CgroupsLCEResourcesHandler implements LCEResourcesHandler { private final int CPU_DEFAULT_WEIGHT = 1024; // set by kernel private final Map controllerPaths; // Controller -> path + private long deleteCgroupTimeout; + // package private for testing purposes + Clock clock; + public CgroupsLCEResourcesHandler() { this.controllerPaths = new HashMap(); + clock = new SystemClock(); } @Override @@ -73,7 +81,8 @@ public class CgroupsLCEResourcesHandler implements LCEResourcesHandler { return conf; } - public synchronized void init(LinuxContainerExecutor lce) throws IOException { + @VisibleForTesting + void initConfig() throws IOException { this.cgroupPrefix = conf.get(YarnConfiguration. NM_LINUX_CONTAINER_CGROUPS_HIERARCHY, "/hadoop-yarn"); @@ -82,6 +91,9 @@ public class CgroupsLCEResourcesHandler implements LCEResourcesHandler { this.cgroupMountPath = conf.get(YarnConfiguration. NM_LINUX_CONTAINER_CGROUPS_MOUNT_PATH, null); + this.deleteCgroupTimeout = conf.getLong( + YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_DELETE_TIMEOUT, + YarnConfiguration.DEFAULT_NM_LINUX_CONTAINER_CGROUPS_DELETE_TIMEOUT); // remove extra /'s at end or start of cgroupPrefix if (cgroupPrefix.charAt(0) == '/') { cgroupPrefix = cgroupPrefix.substring(1); @@ -91,7 +103,11 @@ public class CgroupsLCEResourcesHandler implements LCEResourcesHandler { if (cgroupPrefix.charAt(len - 1) == '/') { cgroupPrefix = cgroupPrefix.substring(0, len - 1); } + } + public void init(LinuxContainerExecutor lce) throws IOException { + initConfig(); + // mount cgroups if requested if (cgroupMount && cgroupMountPath != null) { ArrayList cgroupKVs = new ArrayList(); @@ -158,14 +174,32 @@ public class CgroupsLCEResourcesHandler implements LCEResourcesHandler { } } - private void deleteCgroup(String controller, String groupName) { - String path = pathForCgroup(controller, groupName); - - LOG.debug("deleteCgroup: " + path); - - if (! new File(path).delete()) { - LOG.warn("Unable to delete cgroup at: " + path); + @VisibleForTesting + boolean deleteCgroup(String cgroupPath) { + boolean deleted; + + if (LOG.isDebugEnabled()) { + LOG.debug("deleteCgroup: " + cgroupPath); } + + long start = clock.getTime(); + do { + deleted = new File(cgroupPath).delete(); + if (!deleted) { + try { + Thread.sleep(20); + } catch (InterruptedException ex) { + // NOP + } + } + } while (!deleted && (clock.getTime() - start) < deleteCgroupTimeout); + + if (!deleted) { + LOG.warn("Unable to delete cgroup at: " + cgroupPath + + ", tried to delete for " + deleteCgroupTimeout + "ms"); + } + + return deleted; } /* @@ -185,21 +219,8 @@ public class CgroupsLCEResourcesHandler implements LCEResourcesHandler { } private void clearLimits(ContainerId containerId) { - String containerName = containerId.toString(); - - // Based on testing, ApplicationMaster executables don't terminate until - // a little after the container appears to have finished. Therefore, we - // wait a short bit for the cgroup to become empty before deleting it. - if (containerId.getId() == 1) { - try { - Thread.sleep(500); - } catch (InterruptedException e) { - // not a problem, continue anyway - } - } - if (isCpuWeightEnabled()) { - deleteCgroup(CONTROLLER_CPU, containerName); + deleteCgroup(pathForCgroup(CONTROLLER_CPU, containerId.toString())); } }