From 62d83ca5360cf803ecf6780caf192462d0092009 Mon Sep 17 00:00:00 2001 From: Miklos Szegedi Date: Tue, 26 Jun 2018 15:21:35 -0700 Subject: [PATCH] YARN-8461. Support strict memory control on individual container with elastic control memory mechanism. Contributed by Haibo Chen. --- .../CGroupsMemoryResourceHandlerImpl.java | 24 ++++ .../resources/MemoryResourceHandler.java | 10 ++ .../monitor/ContainersMonitorImpl.java | 112 +++++++++++------- .../TestCGroupsMemoryResourceHandlerImpl.java | 43 +++++++ 4 files changed, 144 insertions(+), 45 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsMemoryResourceHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsMemoryResourceHandlerImpl.java index a57adb1391f..053b796b5fd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsMemoryResourceHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsMemoryResourceHandlerImpl.java @@ -34,6 +34,9 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileg import java.io.File; import java.util.ArrayList; import java.util.List; +import java.util.Optional; + +import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler.CGROUP_PARAM_MEMORY_OOM_CONTROL; /** * Handler class to handle the memory controller. YARN already ships a @@ -172,4 +175,25 @@ public class CGroupsMemoryResourceHandlerImpl implements MemoryResourceHandler { return null; } + @Override + public Optional isUnderOOM(ContainerId containerId) { + try { + String status = cGroupsHandler.getCGroupParam( + CGroupsHandler.CGroupController.MEMORY, + containerId.toString(), + CGROUP_PARAM_MEMORY_OOM_CONTROL); + if (LOG.isDebugEnabled()) { + LOG.debug("cgroups OOM status for " + containerId + ": " + status); + } + if (status.contains(CGroupsHandler.UNDER_OOM)) { + LOG.warn("Container " + containerId + " under OOM based on cgroups."); + return Optional.of(true); + } else { + return Optional.of(false); + } + } catch (ResourceHandlerException e) { + LOG.warn("Could not read cgroups" + containerId, e); + } + return Optional.empty(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/MemoryResourceHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/MemoryResourceHandler.java index 013a49fbb44..1729fc17a0d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/MemoryResourceHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/MemoryResourceHandler.java @@ -20,8 +20,18 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resourc import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.api.records.ContainerId; + +import java.util.Optional; @InterfaceAudience.Private @InterfaceStability.Unstable public interface MemoryResourceHandler extends ResourceHandler { + /** + * check whether a container is under OOM. + * @param containerId the id of the container + * @return empty if the status is unknown, true is the container is under oom, + * false otherwise + */ + Optional isUnderOOM(ContainerId containerId); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index bd68dfe23db..d83fe39ffce 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -22,6 +22,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupElasticMemoryController; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.MemoryResourceHandler; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.slf4j.Logger; @@ -51,6 +52,7 @@ import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree; import java.util.Arrays; import java.util.Map; import java.util.Map.Entry; +import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; /** @@ -697,55 +699,75 @@ public class ContainersMonitorImpl extends AbstractService implements ProcessTreeInfo ptInfo, long currentVmemUsage, long currentPmemUsage) { - if (elasticMemoryEnforcement || strictMemoryEnforcement) { - // We enforce the overall memory usage instead of individual containers - return; - } - boolean isMemoryOverLimit = false; - long vmemLimit = ptInfo.getVmemLimit(); - long pmemLimit = ptInfo.getPmemLimit(); - // as processes begin with an age 1, we want to see if there - // are processes more than 1 iteration old. - long curMemUsageOfAgedProcesses = pTree.getVirtualMemorySize(1); - long curRssMemUsageOfAgedProcesses = pTree.getRssMemorySize(1); + Optional isMemoryOverLimit = Optional.empty(); String msg = ""; int containerExitStatus = ContainerExitStatus.INVALID; - if (isVmemCheckEnabled() - && isProcessTreeOverLimit(containerId.toString(), - currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) { - // The current usage (age=0) is always higher than the aged usage. We - // do not show the aged size in the message, base the delta on the - // current usage - long delta = currentVmemUsage - vmemLimit; - // Container (the root process) is still alive and overflowing - // memory. - // Dump the process-tree and then clean it up. - msg = formatErrorMessage("virtual", - formatUsageString(currentVmemUsage, vmemLimit, - currentPmemUsage, pmemLimit), - pId, containerId, pTree, delta); - isMemoryOverLimit = true; - containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_VMEM; - } else if (isPmemCheckEnabled() - && isProcessTreeOverLimit(containerId.toString(), - currentPmemUsage, curRssMemUsageOfAgedProcesses, - pmemLimit)) { - // The current usage (age=0) is always higher than the aged usage. We - // do not show the aged size in the message, base the delta on the - // current usage - long delta = currentPmemUsage - pmemLimit; - // Container (the root process) is still alive and overflowing - // memory. - // Dump the process-tree and then clean it up. - msg = formatErrorMessage("physical", - formatUsageString(currentVmemUsage, vmemLimit, - currentPmemUsage, pmemLimit), - pId, containerId, pTree, delta); - isMemoryOverLimit = true; - containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM; + + if (strictMemoryEnforcement && elasticMemoryEnforcement) { + // Both elastic memory control and strict memory control are enabled + // through cgroups. A container will be frozen by the elastic memory + // control mechanism if it exceeds its request, so we check for this + // here and kill it. Otherwise, the container will not be killed if + // the node never exceeds its limit and the procfs-based + // memory accounting is different from the cgroup-based accounting. + + MemoryResourceHandler handler = + ResourceHandlerModule.getMemoryResourceHandler(); + if (handler != null) { + isMemoryOverLimit = handler.isUnderOOM(containerId); + containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM; + msg = containerId + " is under oom because it exceeded its" + + " physical memory limit"; + } + } else if (strictMemoryEnforcement || elasticMemoryEnforcement) { + // if cgroup-based memory control is enabled + isMemoryOverLimit = Optional.of(false); } - if (isMemoryOverLimit) { + if (!isMemoryOverLimit.isPresent()) { + long vmemLimit = ptInfo.getVmemLimit(); + long pmemLimit = ptInfo.getPmemLimit(); + // as processes begin with an age 1, we want to see if there + // are processes more than 1 iteration old. + long curMemUsageOfAgedProcesses = pTree.getVirtualMemorySize(1); + long curRssMemUsageOfAgedProcesses = pTree.getRssMemorySize(1); + if (isVmemCheckEnabled() + && isProcessTreeOverLimit(containerId.toString(), + currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) { + // The current usage (age=0) is always higher than the aged usage. We + // do not show the aged size in the message, base the delta on the + // current usage + long delta = currentVmemUsage - vmemLimit; + // Container (the root process) is still alive and overflowing + // memory. + // Dump the process-tree and then clean it up. + msg = formatErrorMessage("virtual", + formatUsageString(currentVmemUsage, vmemLimit, + currentPmemUsage, pmemLimit), + pId, containerId, pTree, delta); + isMemoryOverLimit = Optional.of(true); + containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_VMEM; + } else if (isPmemCheckEnabled() + && isProcessTreeOverLimit(containerId.toString(), + currentPmemUsage, curRssMemUsageOfAgedProcesses, + pmemLimit)) { + // The current usage (age=0) is always higher than the aged usage. We + // do not show the aged size in the message, base the delta on the + // current usage + long delta = currentPmemUsage - pmemLimit; + // Container (the root process) is still alive and overflowing + // memory. + // Dump the process-tree and then clean it up. + msg = formatErrorMessage("physical", + formatUsageString(currentVmemUsage, vmemLimit, + currentPmemUsage, pmemLimit), + pId, containerId, pTree, delta); + isMemoryOverLimit = Optional.of(true); + containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM; + } + } + + if (isMemoryOverLimit.isPresent() && isMemoryOverLimit.get()) { // Virtual or physical memory over limit. Fail the container and // remove // the corresponding process tree diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsMemoryResourceHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsMemoryResourceHandlerImpl.java index 5c7e2333819..4d3e7e6e1d1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsMemoryResourceHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsMemoryResourceHandlerImpl.java @@ -31,7 +31,9 @@ import org.junit.Test; import org.junit.Assert; import java.util.List; +import java.util.Optional; +import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler.CGROUP_PARAM_MEMORY_OOM_CONTROL; import static org.mockito.Mockito.*; /** @@ -242,4 +244,45 @@ public class TestCGroupsMemoryResourceHandlerImpl { .updateCGroupParam(CGroupsHandler.CGroupController.MEMORY, id, CGroupsHandler.CGROUP_PARAM_MEMORY_HARD_LIMIT_BYTES, "1024M"); } + + @Test + public void testContainerUnderOom() throws Exception { + Configuration conf = new YarnConfiguration(); + conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false); + conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false); + + cGroupsMemoryResourceHandler.bootstrap(conf); + + ContainerId containerId = mock(ContainerId.class); + when(containerId.toString()).thenReturn("container_01_01"); + + when(mockCGroupsHandler.getCGroupParam( + CGroupsHandler.CGroupController.MEMORY, + containerId.toString(), + CGROUP_PARAM_MEMORY_OOM_CONTROL)).thenReturn(CGroupsHandler.UNDER_OOM); + Optional outOfOom = + cGroupsMemoryResourceHandler.isUnderOOM(containerId); + Assert.assertTrue("The container should be reported to run under oom", + outOfOom.isPresent() && outOfOom.get().equals(true)); + + when(mockCGroupsHandler.getCGroupParam( + CGroupsHandler.CGroupController.MEMORY, + containerId.toString(), + CGROUP_PARAM_MEMORY_OOM_CONTROL)).thenReturn(""); + outOfOom = cGroupsMemoryResourceHandler.isUnderOOM(containerId); + Assert.assertTrue( + "The container should not be reported to run under oom", + outOfOom.isPresent() && outOfOom.get().equals(false)); + + when(mockCGroupsHandler.getCGroupParam( + CGroupsHandler.CGroupController.MEMORY, + containerId.toString(), + CGROUP_PARAM_MEMORY_OOM_CONTROL)). + thenThrow(new ResourceHandlerException()); + outOfOom = cGroupsMemoryResourceHandler.isUnderOOM(containerId); + Assert.assertFalse( + "No report of the oom status should be available.", + outOfOom.isPresent()); + + } }