YARN-1284. LCE: Race condition leaves dangling cgroups entries for killed containers. (Alejandro Abdelnur via Sandy Ryza)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1530492 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sanford Ryza 2013-10-09 05:05:17 +00:00
parent b7e885435a
commit 726c3538a7
3 changed files with 59 additions and 23 deletions

View File

@ -69,6 +69,9 @@ Release 2.3.0 - UNRELEASED
YARN-1268. TestFairScheduler.testContinuousScheduling is flaky (Sandy Ryza)
YARN-1284. LCE: Race condition leaves dangling cgroups entries for killed
containers. (Alejandro Abdelnur via Sandy Ryza)
Release 2.2.1 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -674,7 +674,19 @@ public class YarnConfiguration extends Configuration {
/** Where the linux container executor should mount cgroups if not found */
public static final String NM_LINUX_CONTAINER_CGROUPS_MOUNT_PATH =
NM_PREFIX + "linux-container-executor.cgroups.mount-path";
/**
* Interval of time the linux container executor should try cleaning up
* cgroups entry when cleaning up a container. This is required due to what
* it seems a race condition because the SIGTERM/SIGKILL is asynch.
*/
public static final String NM_LINUX_CONTAINER_CGROUPS_DELETE_TIMEOUT =
NM_PREFIX + "linux-container-executor.cgroups.delete-timeout-ms";
public static final long DEFAULT_NM_LINUX_CONTAINER_CGROUPS_DELETE_TIMEOUT =
1000;
/** T-file compression types used to compress aggregated logs.*/
public static final String NM_LOG_AGG_COMPRESSION_TYPE =
NM_PREFIX + "log-aggregation.compression-type";

View File

@ -32,6 +32,7 @@ import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@ -40,6 +41,8 @@ import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor;
import org.apache.hadoop.yarn.util.Clock;
import org.apache.hadoop.yarn.util.SystemClock;
public class CgroupsLCEResourcesHandler implements LCEResourcesHandler {
@ -59,8 +62,13 @@ public class CgroupsLCEResourcesHandler implements LCEResourcesHandler {
private final int CPU_DEFAULT_WEIGHT = 1024; // set by kernel
private final Map<String, String> controllerPaths; // Controller -> path
private long deleteCgroupTimeout;
// package private for testing purposes
Clock clock;
public CgroupsLCEResourcesHandler() {
this.controllerPaths = new HashMap<String, String>();
clock = new SystemClock();
}
@Override
@ -73,7 +81,8 @@ public class CgroupsLCEResourcesHandler implements LCEResourcesHandler {
return conf;
}
public synchronized void init(LinuxContainerExecutor lce) throws IOException {
@VisibleForTesting
void initConfig() throws IOException {
this.cgroupPrefix = conf.get(YarnConfiguration.
NM_LINUX_CONTAINER_CGROUPS_HIERARCHY, "/hadoop-yarn");
@ -82,6 +91,9 @@ public class CgroupsLCEResourcesHandler implements LCEResourcesHandler {
this.cgroupMountPath = conf.get(YarnConfiguration.
NM_LINUX_CONTAINER_CGROUPS_MOUNT_PATH, null);
this.deleteCgroupTimeout = conf.getLong(
YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_DELETE_TIMEOUT,
YarnConfiguration.DEFAULT_NM_LINUX_CONTAINER_CGROUPS_DELETE_TIMEOUT);
// remove extra /'s at end or start of cgroupPrefix
if (cgroupPrefix.charAt(0) == '/') {
cgroupPrefix = cgroupPrefix.substring(1);
@ -91,7 +103,11 @@ public class CgroupsLCEResourcesHandler implements LCEResourcesHandler {
if (cgroupPrefix.charAt(len - 1) == '/') {
cgroupPrefix = cgroupPrefix.substring(0, len - 1);
}
}
public void init(LinuxContainerExecutor lce) throws IOException {
initConfig();
// mount cgroups if requested
if (cgroupMount && cgroupMountPath != null) {
ArrayList<String> cgroupKVs = new ArrayList<String>();
@ -158,14 +174,32 @@ public class CgroupsLCEResourcesHandler implements LCEResourcesHandler {
}
}
private void deleteCgroup(String controller, String groupName) {
String path = pathForCgroup(controller, groupName);
LOG.debug("deleteCgroup: " + path);
if (! new File(path).delete()) {
LOG.warn("Unable to delete cgroup at: " + path);
@VisibleForTesting
boolean deleteCgroup(String cgroupPath) {
boolean deleted;
if (LOG.isDebugEnabled()) {
LOG.debug("deleteCgroup: " + cgroupPath);
}
long start = clock.getTime();
do {
deleted = new File(cgroupPath).delete();
if (!deleted) {
try {
Thread.sleep(20);
} catch (InterruptedException ex) {
// NOP
}
}
} while (!deleted && (clock.getTime() - start) < deleteCgroupTimeout);
if (!deleted) {
LOG.warn("Unable to delete cgroup at: " + cgroupPath +
", tried to delete for " + deleteCgroupTimeout + "ms");
}
return deleted;
}
/*
@ -185,21 +219,8 @@ public class CgroupsLCEResourcesHandler implements LCEResourcesHandler {
}
private void clearLimits(ContainerId containerId) {
String containerName = containerId.toString();
// Based on testing, ApplicationMaster executables don't terminate until
// a little after the container appears to have finished. Therefore, we
// wait a short bit for the cgroup to become empty before deleting it.
if (containerId.getId() == 1) {
try {
Thread.sleep(500);
} catch (InterruptedException e) {
// not a problem, continue anyway
}
}
if (isCpuWeightEnabled()) {
deleteCgroup(CONTROLLER_CPU, containerName);
deleteCgroup(pathForCgroup(CONTROLLER_CPU, containerId.toString()));
}
}