YARN-8930. CGroup-based strict container memory enforcement does not work with CGroupElasticMemoryController (haibochen via rkanter)

This commit is contained in:
Robert Kanter 2018-10-25 10:43:36 -07:00
parent fb2b72e6fc
commit f76e3c3db7
5 changed files with 60 additions and 147 deletions

View File

@ -34,9 +34,6 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileg
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler.CGROUP_PARAM_MEMORY_OOM_CONTROL;
/**
* Handler class to handle the memory controller. YARN already ships a
@ -174,26 +171,4 @@ public class CGroupsMemoryResourceHandlerImpl implements MemoryResourceHandler {
public List<PrivilegedOperation> teardown() throws ResourceHandlerException {
return null;
}
@Override
public Optional<Boolean> isUnderOOM(ContainerId containerId) {
try {
String status = cGroupsHandler.getCGroupParam(
CGroupsHandler.CGroupController.MEMORY,
containerId.toString(),
CGROUP_PARAM_MEMORY_OOM_CONTROL);
if (LOG.isDebugEnabled()) {
LOG.debug("cgroups OOM status for " + containerId + ": " + status);
}
if (status.contains(CGroupsHandler.UNDER_OOM)) {
LOG.warn("Container " + containerId + " under OOM based on cgroups.");
return Optional.of(true);
} else {
return Optional.of(false);
}
} catch (ResourceHandlerException e) {
LOG.warn("Could not read cgroups" + containerId, e);
}
return Optional.empty();
}
}

View File

@ -20,18 +20,8 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resourc
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.yarn.api.records.ContainerId;
import java.util.Optional;
@InterfaceAudience.Private
@InterfaceStability.Unstable
public interface MemoryResourceHandler extends ResourceHandler {
/**
* check whether a container is under OOM.
* @param containerId the id of the container
* @return empty if the status is unknown, true is the container is under oom,
* false otherwise
*/
Optional<Boolean> isUnderOOM(ContainerId containerId);
}

View File

@ -22,7 +22,6 @@ import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupElasticMemoryController;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.MemoryResourceHandler;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
import org.slf4j.Logger;
@ -52,7 +51,6 @@ import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
import java.util.Arrays;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap;
/**
@ -699,75 +697,61 @@ public class ContainersMonitorImpl extends AbstractService implements
ProcessTreeInfo ptInfo,
long currentVmemUsage,
long currentPmemUsage) {
Optional<Boolean> isMemoryOverLimit = Optional.empty();
if (strictMemoryEnforcement && !elasticMemoryEnforcement) {
// When cgroup-based strict memory enforcement is used alone without
// elastic memory control, the oom-kill would take care of it.
// However, when elastic memory control is also enabled, the oom killer
// would be disabled at the root yarn container cgroup level (all child
// cgroups would inherit that setting). Hence, we fall back to the
// polling-based mechanism.
return;
}
boolean isMemoryOverLimit = false;
String msg = "";
int containerExitStatus = ContainerExitStatus.INVALID;
if (strictMemoryEnforcement && elasticMemoryEnforcement) {
// Both elastic memory control and strict memory control are enabled
// through cgroups. A container will be frozen by the elastic memory
// control mechanism if it exceeds its request, so we check for this
// here and kill it. Otherwise, the container will not be killed if
// the node never exceeds its limit and the procfs-based
// memory accounting is different from the cgroup-based accounting.
MemoryResourceHandler handler =
ResourceHandlerModule.getMemoryResourceHandler();
if (handler != null) {
isMemoryOverLimit = handler.isUnderOOM(containerId);
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM;
msg = containerId + " is under oom because it exceeded its" +
" physical memory limit";
}
} else if (strictMemoryEnforcement || elasticMemoryEnforcement) {
// if cgroup-based memory control is enabled
isMemoryOverLimit = Optional.of(false);
long vmemLimit = ptInfo.getVmemLimit();
long pmemLimit = ptInfo.getPmemLimit();
// as processes begin with an age 1, we want to see if there
// are processes more than 1 iteration old.
long curMemUsageOfAgedProcesses = pTree.getVirtualMemorySize(1);
long curRssMemUsageOfAgedProcesses = pTree.getRssMemorySize(1);
if (isVmemCheckEnabled()
&& isProcessTreeOverLimit(containerId.toString(),
currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) {
// The current usage (age=0) is always higher than the aged usage. We
// do not show the aged size in the message, base the delta on the
// current usage
long delta = currentVmemUsage - vmemLimit;
// Container (the root process) is still alive and overflowing
// memory.
// Dump the process-tree and then clean it up.
msg = formatErrorMessage("virtual",
formatUsageString(currentVmemUsage, vmemLimit,
currentPmemUsage, pmemLimit),
pId, containerId, pTree, delta);
isMemoryOverLimit = true;
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_VMEM;
} else if (isPmemCheckEnabled()
&& isProcessTreeOverLimit(containerId.toString(),
currentPmemUsage, curRssMemUsageOfAgedProcesses,
pmemLimit)) {
// The current usage (age=0) is always higher than the aged usage. We
// do not show the aged size in the message, base the delta on the
// current usage
long delta = currentPmemUsage - pmemLimit;
// Container (the root process) is still alive and overflowing
// memory.
// Dump the process-tree and then clean it up.
msg = formatErrorMessage("physical",
formatUsageString(currentVmemUsage, vmemLimit,
currentPmemUsage, pmemLimit),
pId, containerId, pTree, delta);
isMemoryOverLimit = true;
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM;
}
if (!isMemoryOverLimit.isPresent()) {
long vmemLimit = ptInfo.getVmemLimit();
long pmemLimit = ptInfo.getPmemLimit();
// as processes begin with an age 1, we want to see if there
// are processes more than 1 iteration old.
long curMemUsageOfAgedProcesses = pTree.getVirtualMemorySize(1);
long curRssMemUsageOfAgedProcesses = pTree.getRssMemorySize(1);
if (isVmemCheckEnabled()
&& isProcessTreeOverLimit(containerId.toString(),
currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) {
// The current usage (age=0) is always higher than the aged usage. We
// do not show the aged size in the message, base the delta on the
// current usage
long delta = currentVmemUsage - vmemLimit;
// Container (the root process) is still alive and overflowing
// memory.
// Dump the process-tree and then clean it up.
msg = formatErrorMessage("virtual",
formatUsageString(currentVmemUsage, vmemLimit,
currentPmemUsage, pmemLimit),
pId, containerId, pTree, delta);
isMemoryOverLimit = Optional.of(true);
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_VMEM;
} else if (isPmemCheckEnabled()
&& isProcessTreeOverLimit(containerId.toString(),
currentPmemUsage, curRssMemUsageOfAgedProcesses,
pmemLimit)) {
// The current usage (age=0) is always higher than the aged usage. We
// do not show the aged size in the message, base the delta on the
// current usage
long delta = currentPmemUsage - pmemLimit;
// Container (the root process) is still alive and overflowing
// memory.
// Dump the process-tree and then clean it up.
msg = formatErrorMessage("physical",
formatUsageString(currentVmemUsage, vmemLimit,
currentPmemUsage, pmemLimit),
pId, containerId, pTree, delta);
isMemoryOverLimit = Optional.of(true);
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM;
}
}
if (isMemoryOverLimit.isPresent() && isMemoryOverLimit.get()) {
if (isMemoryOverLimit) {
// Virtual or physical memory over limit. Fail the container and
// remove
// the corresponding process tree

View File

@ -31,9 +31,6 @@ import org.junit.Test;
import org.junit.Assert;
import java.util.List;
import java.util.Optional;
import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler.CGROUP_PARAM_MEMORY_OOM_CONTROL;
import static org.mockito.Mockito.*;
/**
@ -244,45 +241,4 @@ public class TestCGroupsMemoryResourceHandlerImpl {
.updateCGroupParam(CGroupsHandler.CGroupController.MEMORY, id,
CGroupsHandler.CGROUP_PARAM_MEMORY_HARD_LIMIT_BYTES, "1024M");
}
@Test
public void testContainerUnderOom() throws Exception {
Configuration conf = new YarnConfiguration();
conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);
cGroupsMemoryResourceHandler.bootstrap(conf);
ContainerId containerId = mock(ContainerId.class);
when(containerId.toString()).thenReturn("container_01_01");
when(mockCGroupsHandler.getCGroupParam(
CGroupsHandler.CGroupController.MEMORY,
containerId.toString(),
CGROUP_PARAM_MEMORY_OOM_CONTROL)).thenReturn(CGroupsHandler.UNDER_OOM);
Optional<Boolean> outOfOom =
cGroupsMemoryResourceHandler.isUnderOOM(containerId);
Assert.assertTrue("The container should be reported to run under oom",
outOfOom.isPresent() && outOfOom.get().equals(true));
when(mockCGroupsHandler.getCGroupParam(
CGroupsHandler.CGroupController.MEMORY,
containerId.toString(),
CGROUP_PARAM_MEMORY_OOM_CONTROL)).thenReturn("");
outOfOom = cGroupsMemoryResourceHandler.isUnderOOM(containerId);
Assert.assertTrue(
"The container should not be reported to run under oom",
outOfOom.isPresent() && outOfOom.get().equals(false));
when(mockCGroupsHandler.getCGroupParam(
CGroupsHandler.CGroupController.MEMORY,
containerId.toString(),
CGROUP_PARAM_MEMORY_OOM_CONTROL)).
thenThrow(new ResourceHandlerException());
outOfOom = cGroupsMemoryResourceHandler.isUnderOOM(containerId);
Assert.assertFalse(
"No report of the oom status should be available.",
outOfOom.isPresent());
}
}

View File

@ -20,8 +20,6 @@ YARN has multiple features to enforce container memory limits. There are three t
2. Strict memory control kills each container that has exceeded its limits. It is using the OOM killer capability of the cgroups Linux kernel feature.
3. Elastic memory control is also based on cgroups. It allows bursting and starts killing containers only, if the overall system memory usage reaches a limit.
If you use 2. or 3. feature 1. is disabled.
Strict Memory Feature
---------------------
@ -131,3 +129,13 @@ Configure the cgroups prerequisites mentioned above.
`yarn.nodemanager.resource.memory.enforced` should be `false`
`yarn.nodemanager.pmem-check-enabled` or `yarn.nodemanager.vmem-check-enabled` should be `true`. If swapping is turned off the former should be set, the latter should be set otherwise.
Configuring elastic memory control and strict container memory enforcement through cgroups
------------------------------------------
ADVANCED ONLY
Elastic memory control and strict container memory enforcement can be enabled at the same time to allow Node Manager to over-allocate itself.
However, elastic memory control changes how strict container memory enforcement through cgroups is performed. Elastic memory control
disables the oom killer on the root yarn container cgroup. The oom killer setting overrides that of individual container cgroups, so individual
containers won't be killed by the oom killer when they go over their memory limit. The strict container memory enforcement in this case falls
back to the polling-based mechanism.