From 718ae2d4773fd2771c64e2f55a9eed1c4ffe3c07 Mon Sep 17 00:00:00 2001 From: Daniel Templeton Date: Wed, 11 Jan 2017 14:48:52 -0800 Subject: [PATCH] YARN-5849. Automatically create YARN control group for pre-mounted cgroups (Contributed by Miklos Szegedi via Daniel Templeton) (cherry picked from commit 0fffebe51a91023c4949ab560f5c828f1b568072) --- .../src/main/resources/yarn-default.xml | 8 +- .../CGroupsBlkioResourceHandlerImpl.java | 2 +- .../CGroupsCpuResourceHandlerImpl.java | 2 +- .../linux/resources/CGroupsHandler.java | 58 +++--- .../linux/resources/CGroupsHandlerImpl.java | 171 +++++++++++++++--- .../CGroupsMemoryResourceHandlerImpl.java | 2 +- .../TrafficControlBandwidthHandlerImpl.java | 3 +- .../TestCGroupsBlkioResourceHandlerImpl.java | 2 +- .../TestCGroupsCpuResourceHandlerImpl.java | 6 +- .../resources/TestCGroupsHandlerImpl.java | 153 +++++++++++++++- .../TestCGroupsMemoryResourceHandlerImpl.java | 2 +- ...estTrafficControlBandwidthHandlerImpl.java | 2 +- .../src/site/markdown/NodeManagerCgroups.md | 2 +- 13 files changed, 334 insertions(+), 79 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index d6ea32b911f..310cdb48c7f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -1424,9 +1424,11 @@ The cgroups hierarchy under which to place YARN proccesses (cannot contain commas). - If yarn.nodemanager.linux-container-executor.cgroups.mount is false (that is, if cgroups have - been pre-configured), then this cgroups hierarchy must already exist and be writable by the - NodeManager user, otherwise the NodeManager may fail. + If yarn.nodemanager.linux-container-executor.cgroups.mount is false + (that is, if cgroups have been pre-configured) and the Yarn user has write + access to the parent directory, then the directory will be created. + If the directory already exists, the administrator has to give Yarn + write permissions to it recursively. Only used when the LCE resources handler is set to the CgroupsLCEResourcesHandler. yarn.nodemanager.linux-container-executor.cgroups.hierarchy /hadoop-yarn diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsBlkioResourceHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsBlkioResourceHandlerImpl.java index e7eea1f94e0..e0b43d32221 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsBlkioResourceHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsBlkioResourceHandlerImpl.java @@ -120,7 +120,7 @@ public List bootstrap(Configuration configuration) // if bootstrap is called on this class, disk is already enabled // so no need to check again this.cGroupsHandler - .mountCGroupController(CGroupsHandler.CGroupController.BLKIO); + .initializeCGroupController(CGroupsHandler.CGroupController.BLKIO); return null; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsCpuResourceHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsCpuResourceHandlerImpl.java index 0071c7e65c9..d9cca8f523a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsCpuResourceHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsCpuResourceHandlerImpl.java @@ -91,7 +91,7 @@ List bootstrap( this.strictResourceUsageMode = conf.getBoolean( YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_STRICT_RESOURCE_USAGE, YarnConfiguration.DEFAULT_NM_LINUX_CONTAINER_CGROUPS_STRICT_RESOURCE_USAGE); - this.cGroupsHandler.mountCGroupController(CPU); + this.cGroupsHandler.initializeCGroupController(CPU); nodeVCores = NodeManagerHardwareUtils.getVCores(plugin, conf); // cap overall usage to the number of cores allocated to YARN diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java index eefaabdad84..d09a25dd024 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java @@ -32,7 +32,10 @@ @InterfaceStability.Unstable public interface CGroupsHandler { - public enum CGroupController { + /** + * List of supported cgroup subsystem types. + */ + enum CGroupController { CPU("cpu"), NET_CLS("net_cls"), BLKIO("blkio"), @@ -49,9 +52,9 @@ String getName() { } } - public static final String CGROUP_FILE_TASKS = "tasks"; - public static final String CGROUP_PARAM_CLASSID = "classid"; - public static final String CGROUP_PARAM_BLKIO_WEIGHT = "weight"; + String CGROUP_FILE_TASKS = "tasks"; + String CGROUP_PARAM_CLASSID = "classid"; + String CGROUP_PARAM_BLKIO_WEIGHT = "weight"; String CGROUP_PARAM_MEMORY_HARD_LIMIT_BYTES = "limit_in_bytes"; String CGROUP_PARAM_MEMORY_SOFT_LIMIT_BYTES = "soft_limit_in_bytes"; @@ -63,30 +66,31 @@ String getName() { String CGROUP_CPU_SHARES = "shares"; /** - * Mounts a cgroup controller - * @param controller - the controller being mounted - * @throws ResourceHandlerException + * Mounts or initializes a cgroup controller. + * @param controller - the controller being initialized + * @throws ResourceHandlerException the initialization failed due to the + * environment */ - public void mountCGroupController(CGroupController controller) + void initializeCGroupController(CGroupController controller) throws ResourceHandlerException; /** - * Creates a cgroup for a given controller + * Creates a cgroup for a given controller. * @param controller - controller type for which the cgroup is being created * @param cGroupId - id of the cgroup being created * @return full path to created cgroup - * @throws ResourceHandlerException + * @throws ResourceHandlerException creation failed */ - public String createCGroup(CGroupController controller, String cGroupId) + String createCGroup(CGroupController controller, String cGroupId) throws ResourceHandlerException; /** - * Deletes the specified cgroup + * Deletes the specified cgroup. * @param controller - controller type for the cgroup * @param cGroupId - id of the cgroup being deleted - * @throws ResourceHandlerException + * @throws ResourceHandlerException deletion failed */ - public void deleteCGroup(CGroupController controller, String cGroupId) throws + void deleteCGroup(CGroupController controller, String cGroupId) throws ResourceHandlerException; /** @@ -95,59 +99,59 @@ public void deleteCGroup(CGroupController controller, String cGroupId) throws * @param cGroupId - id of the cgroup * @return path for the cgroup relative to the root of (any) controller. */ - public String getRelativePathForCGroup(String cGroupId); + String getRelativePathForCGroup(String cGroupId); /** - * Gets the full path for the cgroup, given a controller and a cgroup id + * Gets the full path for the cgroup, given a controller and a cgroup id. * @param controller - controller type for the cgroup * @param cGroupId - id of the cgroup * @return full path for the cgroup */ - public String getPathForCGroup(CGroupController controller, String + String getPathForCGroup(CGroupController controller, String cGroupId); /** * Gets the full path for the cgroup's tasks file, given a controller and a - * cgroup id + * cgroup id. * @param controller - controller type for the cgroup * @param cGroupId - id of the cgroup * @return full path for the cgroup's tasks file */ - public String getPathForCGroupTasks(CGroupController controller, String + String getPathForCGroupTasks(CGroupController controller, String cGroupId); /** * Gets the full path for a cgroup parameter, given a controller, - * cgroup id and parameter name + * cgroup id and parameter name. * @param controller - controller type for the cgroup * @param cGroupId - id of the cgroup * @param param - cgroup parameter ( e.g classid ) * @return full path for the cgroup parameter */ - public String getPathForCGroupParam(CGroupController controller, String + String getPathForCGroupParam(CGroupController controller, String cGroupId, String param); /** - * updates a cgroup parameter, given a controller, cgroup id, parameter name + * updates a cgroup parameter, given a controller, cgroup id, parameter name. * and a parameter value * @param controller - controller type for the cgroup * @param cGroupId - id of the cgroup * @param param - cgroup parameter ( e.g classid ) * @param value - value to be written to the parameter file - * @throws ResourceHandlerException + * @throws ResourceHandlerException the operation failed */ - public void updateCGroupParam(CGroupController controller, String cGroupId, + void updateCGroupParam(CGroupController controller, String cGroupId, String param, String value) throws ResourceHandlerException; /** - * reads a cgroup parameter value, given a controller, cgroup id, parameter + * reads a cgroup parameter value, given a controller, cgroup id, parameter. * name * @param controller - controller type for the cgroup * @param cGroupId - id of the cgroup * @param param - cgroup parameter ( e.g classid ) * @return parameter value as read from the parameter file - * @throws ResourceHandlerException + * @throws ResourceHandlerException the operation failed */ - public String getCGroupParam(CGroupController controller, String cGroupId, + String getCGroupParam(CGroupController controller, String cGroupId, String param) throws ResourceHandlerException; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java index 36bd468f791..0b29abc9a5d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java @@ -39,7 +39,6 @@ import java.nio.file.Files; import java.nio.file.Paths; import java.util.Arrays; -import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -60,6 +59,7 @@ class CGroupsHandlerImpl implements CGroupsHandler { private static final String MTAB_FILE = "/proc/mounts"; private static final String CGROUPS_FSTYPE = "cgroup"; + private String mtabFile; private final String cGroupPrefix; private final boolean enableCGroupMount; private final String cGroupMountPath; @@ -70,8 +70,17 @@ class CGroupsHandlerImpl implements CGroupsHandler { private final PrivilegedOperationExecutor privilegedOperationExecutor; private final Clock clock; + /** + * Create cgroup handler object. + * @param conf configuration + * @param privilegedOperationExecutor provides mechanisms to execute + * PrivilegedContainerOperations + * @param mtab mount file location + * @throws ResourceHandlerException if initialization failed + */ public CGroupsHandlerImpl(Configuration conf, PrivilegedOperationExecutor - privilegedOperationExecutor) throws ResourceHandlerException { + privilegedOperationExecutor, String mtab) + throws ResourceHandlerException { this.cGroupPrefix = conf.get(YarnConfiguration. NM_LINUX_CONTAINER_CGROUPS_HIERARCHY, "/hadoop-yarn") .replaceAll("^/", "").replaceAll("$/", ""); @@ -89,10 +98,22 @@ public CGroupsHandlerImpl(Configuration conf, PrivilegedOperationExecutor this.rwLock = new ReentrantReadWriteLock(); this.privilegedOperationExecutor = privilegedOperationExecutor; this.clock = SystemClock.getInstance(); - + mtabFile = mtab; init(); } + /** + * Create cgroup handler object. + * @param conf configuration + * @param privilegedOperationExecutor provides mechanisms to execute + * PrivilegedContainerOperations + * @throws ResourceHandlerException if initialization failed + */ + public CGroupsHandlerImpl(Configuration conf, PrivilegedOperationExecutor + privilegedOperationExecutor) throws ResourceHandlerException { + this(conf, privilegedOperationExecutor, MTAB_FILE); + } + private void init() throws ResourceHandlerException { initializeControllerPaths(); } @@ -117,7 +138,7 @@ private void initializeControllerPaths() throws ResourceHandlerException { // locations - we'll attempt to figure out mount points Map cPaths = - initializeControllerPathsFromMtab(MTAB_FILE, this.cGroupPrefix); + initializeControllerPathsFromMtab(mtabFile, this.cGroupPrefix); // we want to do a bulk update without the paths changing concurrently try { rwLock.writeLock().lock(); @@ -136,26 +157,14 @@ static Map initializeControllerPathsFromMtab( Map ret = new HashMap<>(); for (CGroupController controller : CGroupController.values()) { - String name = controller.getName(); - String controllerPath = findControllerInMtab(name, parsedMtab); + String subsystemName = controller.getName(); + String controllerPath = findControllerInMtab(subsystemName, parsedMtab); if (controllerPath != null) { - File f = new File(controllerPath + "/" + cGroupPrefix); - - if (FileUtil.canWrite(f)) { - ret.put(controller, controllerPath); - } else { - String error = - new StringBuffer("Mount point Based on mtab file: ") - .append(mtab) - .append(". Controller mount point not writable for: ") - .append(name).toString(); - - LOG.error(error); - throw new ResourceHandlerException(error); - } + ret.put(controller, controllerPath); } else { - LOG.warn("Controller not mounted but automount disabled: " + name); + LOG.warn("Controller not mounted but automount disabled: " + + subsystemName); } } return ret; @@ -214,25 +223,28 @@ private static Map> parseMtab(String mtab) return ret; } + /** + * Find the hierarchy of the subsystem. + * The kernel ensures that a subsystem can only be part of a single hierarchy. + * The subsystem can be part of multiple mount points, if they belong to the + * same hierarchy. + * @param controller subsystem like cpu, cpuset, etc... + * @param entries map of paths to mount options + * @return the first mount path that has the requested subsystem + */ private static String findControllerInMtab(String controller, Map> entries) { for (Map.Entry> e : entries.entrySet()) { - if (e.getValue().contains(controller)) + if (e.getValue().contains(controller)) { return e.getKey(); + } } return null; } - @Override - public void mountCGroupController(CGroupController controller) + private void mountCGroupController(CGroupController controller) throws ResourceHandlerException { - if (!enableCGroupMount) { - LOG.warn("CGroup mounting is disabled - ignoring mount request for: " + - controller.getName()); - return; - } - String path = getControllerPath(controller); if (path == null) { @@ -299,6 +311,105 @@ public String getPathForCGroupParam(CGroupController controller, .append(param).toString(); } + /** + * Mount cgroup or use existing mount point based on configuration. + * @param controller - the controller being initialized + * @throws ResourceHandlerException yarn hierarchy cannot be created or + * accessed for any reason + */ + @Override + public void initializeCGroupController(CGroupController controller) throws + ResourceHandlerException { + if (enableCGroupMount) { + // We have a controller that needs to be mounted + mountCGroupController(controller); + } else { + // We are working with a pre-mounted contoller + // Make sure that Yarn cgroup hierarchy path exists + initializePreMountedCGroupController(controller); + } + } + + /** + * This function is called when the administrator opted + * to use a pre-mounted cgroup controller. + * There are two options. + * 1. Yarn hierarchy already exists. We verify, whether we have write access + * in this case. + * 2. Yarn hierarchy does not exist, yet. We create it in this case. + * @param controller the controller being initialized + * @throws ResourceHandlerException yarn hierarchy cannot be created or + * accessed for any reason + */ + public void initializePreMountedCGroupController(CGroupController controller) + throws ResourceHandlerException { + // Check permissions to cgroup hierarchy and + // create YARN cgroup if it does not exist, yet + File rootHierarchy = new File(getControllerPath(controller)); + File yarnHierarchy = new File(rootHierarchy, cGroupPrefix); + String subsystemName = controller.getName(); + + LOG.info("Initializing mounted controller " + controller.getName() + " " + + "at " + yarnHierarchy); + + if (!rootHierarchy.exists()) { + throw new ResourceHandlerException(getErrorWithDetails( + "Cgroups mount point does not exist or not accessible", + subsystemName, + rootHierarchy.getAbsolutePath() + )); + } else if (!yarnHierarchy.exists()) { + LOG.info("Yarn control group does not exist. Creating " + + yarnHierarchy.getAbsolutePath()); + try { + if (!yarnHierarchy.mkdir()) { + // Unexpected: we just checked that it was missing + throw new ResourceHandlerException(getErrorWithDetails( + "Unexpected: Cannot create yarn cgroup", + subsystemName, + yarnHierarchy.getAbsolutePath() + )); + } + } catch (SecurityException e) { + throw new ResourceHandlerException(getErrorWithDetails( + "No permissions to create yarn cgroup", + subsystemName, + yarnHierarchy.getAbsolutePath() + ), e); + } + } else if (!FileUtil.canWrite(yarnHierarchy)) { + throw new ResourceHandlerException(getErrorWithDetails( + "Yarn control group not writable", + subsystemName, + yarnHierarchy.getAbsolutePath() + )); + } + } + + /** + * Creates an actionable error message for mtab parsing. + * @param errorMessage message to use + * @param subsystemName cgroup subsystem + * @param yarnCgroupPath cgroup path that failed + * @return a string builder that can be appended by the caller + */ + private String getErrorWithDetails( + String errorMessage, + String subsystemName, + String yarnCgroupPath) { + return new StringBuilder() + .append(errorMessage) + .append(" Subsystem:") + .append(subsystemName) + .append(" Mount points:") + .append(mtabFile) + .append(" User:") + .append(System.getProperty("user.name")) + .append(" Path: ") + .append(yarnCgroupPath) + .toString(); + } + @Override public String createCGroup(CGroupController controller, String cGroupId) throws ResourceHandlerException { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsMemoryResourceHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsMemoryResourceHandlerImpl.java index 4630c1bd87d..b4d2a9ababe 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsMemoryResourceHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsMemoryResourceHandlerImpl.java @@ -74,7 +74,7 @@ public List bootstrap(Configuration conf) + YarnConfiguration.NM_VMEM_CHECK_ENABLED + " to false."; throw new ResourceHandlerException(msg); } - this.cGroupsHandler.mountCGroupController(MEMORY); + this.cGroupsHandler.initializeCGroupController(MEMORY); swappiness = conf .getInt(YarnConfiguration.NM_MEMORY_RESOURCE_CGROUPS_SWAPPINESS, YarnConfiguration.DEFAULT_NM_MEMORY_RESOURCE_CGROUPS_SWAPPINESS); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TrafficControlBandwidthHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TrafficControlBandwidthHandlerImpl.java index a0327a2730b..3bb80356129 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TrafficControlBandwidthHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TrafficControlBandwidthHandlerImpl.java @@ -31,7 +31,6 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; -import org.apache.hadoop.yarn.util.SystemClock; import java.util.ArrayList; import java.util.HashMap; @@ -88,7 +87,7 @@ public List bootstrap(Configuration configuration) //operation. At some point, LCE code can be refactored to batch mount //operations across multiple controllers - cpu, net_cls, blkio etc cGroupsHandler - .mountCGroupController(CGroupsHandler.CGroupController.NET_CLS); + .initializeCGroupController(CGroupsHandler.CGroupController.NET_CLS); device = conf.get(YarnConfiguration.NM_NETWORK_RESOURCE_INTERFACE, YarnConfiguration.DEFAULT_NM_NETWORK_RESOURCE_INTERFACE); strictMode = configuration.getBoolean(YarnConfiguration diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsBlkioResourceHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsBlkioResourceHandlerImpl.java index 20aab691240..e8ec6fa83bd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsBlkioResourceHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsBlkioResourceHandlerImpl.java @@ -54,7 +54,7 @@ public void testBootstrap() throws Exception { Configuration conf = new YarnConfiguration(); List ret = cGroupsBlkioResourceHandlerImpl.bootstrap(conf); - verify(mockCGroupsHandler, times(1)).mountCGroupController( + verify(mockCGroupsHandler, times(1)).initializeCGroupController( CGroupsHandler.CGroupController.BLKIO); Assert.assertNull(ret); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsCpuResourceHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsCpuResourceHandlerImpl.java index 119235ca2d0..674cd7142b8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsCpuResourceHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsCpuResourceHandlerImpl.java @@ -62,7 +62,7 @@ public void testBootstrap() throws Exception { List ret = cGroupsCpuResourceHandler.bootstrap(plugin, conf); verify(mockCGroupsHandler, times(1)) - .mountCGroupController(CGroupsHandler.CGroupController.CPU); + .initializeCGroupController(CGroupsHandler.CGroupController.CPU); verify(mockCGroupsHandler, times(0)) .updateCGroupParam(CGroupsHandler.CGroupController.CPU, "", CGroupsHandler.CGROUP_CPU_PERIOD_US, ""); @@ -84,7 +84,7 @@ public void testBootstrapLimits() throws Exception { List ret = cGroupsCpuResourceHandler.bootstrap(plugin, conf); verify(mockCGroupsHandler, times(1)) - .mountCGroupController(CGroupsHandler.CGroupController.CPU); + .initializeCGroupController(CGroupsHandler.CGroupController.CPU); verify(mockCGroupsHandler, times(1)) .updateCGroupParam(CGroupsHandler.CGroupController.CPU, "", CGroupsHandler.CGROUP_CPU_PERIOD_US, String.valueOf(period)); @@ -109,7 +109,7 @@ public void testBootstrapExistingLimits() throws Exception { List ret = cGroupsCpuResourceHandler.bootstrap(plugin, conf); verify(mockCGroupsHandler, times(1)) - .mountCGroupController(CGroupsHandler.CGroupController.CPU); + .initializeCGroupController(CGroupsHandler.CGroupController.CPU); verify(mockCGroupsHandler, times(1)) .updateCGroupParam(CGroupsHandler.CGroupController.CPU, "", CGroupsHandler.CGROUP_CPU_QUOTA_US, "-1"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsHandlerImpl.java index 76d56b4a725..38dc34fb784 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsHandlerImpl.java @@ -39,6 +39,7 @@ import java.io.FileWriter; import java.io.IOException; import java.nio.file.Files; +import java.security.Permission; import java.util.Map; import java.util.UUID; @@ -96,7 +97,7 @@ public void testMountController() { .append('=').append(tmpPath).append('/').append(controller.getName()); expectedOp.appendArgs(hierarchy, controllerKV.toString()); - cGroupsHandler.mountCGroupController(controller); + cGroupsHandler.initializeCGroupController(controller); try { ArgumentCaptor opCaptor = ArgumentCaptor.forClass( PrivilegedOperation.class); @@ -109,7 +110,7 @@ public void testMountController() { verifyNoMoreInteractions(privilegedOperationExecutorMock); //Try mounting the same controller again - this should be a no-op - cGroupsHandler.mountCGroupController(controller); + cGroupsHandler.initializeCGroupController(controller); verifyNoMoreInteractions(privilegedOperationExecutorMock); } catch (PrivilegedOperationException e) { LOG.error("Caught exception: " + e); @@ -131,7 +132,7 @@ public void testCGroupPaths() { try { cGroupsHandler = new CGroupsHandlerImpl(conf, privilegedOperationExecutorMock); - cGroupsHandler.mountCGroupController(controller); + cGroupsHandler.initializeCGroupController(controller); } catch (ResourceHandlerException e) { LOG.error("Caught exception: " + e); Assert.assertTrue( @@ -167,7 +168,7 @@ public void testCGroupOperations() { try { cGroupsHandler = new CGroupsHandlerImpl(conf, privilegedOperationExecutorMock); - cGroupsHandler.mountCGroupController(controller); + cGroupsHandler.initializeCGroupController(controller); } catch (ResourceHandlerException e) { LOG.error("Caught exception: " + e); Assert.assertTrue( @@ -234,7 +235,7 @@ public static File createMockCgroupMount(File parentDir, String type) return createMockCgroupMount(parentDir, type, "hadoop-yarn"); } - public static File createMockCgroupMount(File parentDir, String type, + private static File createMockCgroupMount(File parentDir, String type, String hierarchy) throws IOException { File cgroupMountDir = new File(parentDir.getAbsolutePath(), type + "/" + hierarchy); @@ -270,9 +271,13 @@ public static File createMockMTab(File parentDir) throws IOException { return mockMtab; } - + /** + * Tests whether mtab parsing works as expected with a valid hierarchy set. + * @throws Exception the test will fail + */ @Test public void testMtabParsing() throws Exception { + // Initialize mtab and cgroup dir File parentDir = new File(tmpPath); // create mock cgroup File cpuCgroupMountDir = createMockCgroupMount(parentDir, "cpu", @@ -282,9 +287,13 @@ public void testMtabParsing() throws Exception { "blkio", hierarchy); Assert.assertTrue(blkioCgroupMountDir.exists()); File mockMtabFile = createMockMTab(parentDir); + + // Run mtabs parsing Map controllerPaths = CGroupsHandlerImpl.initializeControllerPathsFromMtab( - mockMtabFile.getAbsolutePath(), hierarchy); + mockMtabFile.getAbsolutePath(), hierarchy); + + // Verify Assert.assertEquals(2, controllerPaths.size()); Assert.assertTrue(controllerPaths .containsKey(CGroupsHandler.CGroupController.CPU)); @@ -297,8 +306,138 @@ public void testMtabParsing() throws Exception { Assert.assertEquals(parentDir.getAbsolutePath() + "/blkio", blkioDir); } + /** + * Tests whether mtab parsing works as expected with an empty hierarchy set. + * @throws Exception the test will fail + */ + @Test + public void testPreMountedController() throws Exception { + testPreMountedControllerInitialization("hadoop-yarn"); + testPreMountedControllerInitialization(""); + testPreMountedControllerInitialization("/"); + } + + /** + * Tests whether mtab parsing works as expected with the specified hierarchy. + * @param myHierarchy path to local cgroup hierarchy + * @throws Exception the test will fail + */ + private void testPreMountedControllerInitialization(String myHierarchy) + throws Exception { + // Initialize mount point + File parentDir = new File(tmpPath); + FileUtils.deleteQuietly(parentDir); + Assert.assertTrue("Could not create dirs", parentDir.mkdirs()); + File mtab = createMockMTab(parentDir); + File mountPoint = new File(parentDir, "cpu"); + File cpuCgroupMountDir = createMockCgroupMount( + parentDir, "cpu", myHierarchy); + + // Initialize Yarn classes + Configuration confNoMount = new Configuration(); + confNoMount.set(YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_HIERARCHY, + myHierarchy); + confNoMount.setBoolean(YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_MOUNT, + false); + CGroupsHandlerImpl cGroupsHandler = new CGroupsHandlerImpl(confNoMount, + privilegedOperationExecutorMock, mtab.getAbsolutePath()); + + + // Test that a missing yarn hierarchy will be created automatically + if (!cpuCgroupMountDir.equals(mountPoint)) { + Assert.assertTrue("Could not delete cgroups", cpuCgroupMountDir.delete()); + Assert.assertTrue("Directory should be deleted", + !cpuCgroupMountDir.exists()); + } + cGroupsHandler.initializeCGroupController( + CGroupsHandler.CGroupController.CPU); + Assert.assertTrue("Cgroups not writable", cpuCgroupMountDir.exists() && + cpuCgroupMountDir.canWrite()); + + // Test that an inaccessible yarn hierarchy results in an exception + Assert.assertTrue(cpuCgroupMountDir.setWritable(false)); + try { + cGroupsHandler.initializeCGroupController( + CGroupsHandler.CGroupController.CPU); + Assert.fail("An inaccessible path should result in an exception"); + } catch (Exception e) { + Assert.assertTrue("Unexpected exception " + e.getClass().toString(), + e instanceof ResourceHandlerException); + } finally { + Assert.assertTrue("Could not revert writable permission", + cpuCgroupMountDir.setWritable(true)); + } + + // Test that a non-accessible mount directory results in an exception + if (!cpuCgroupMountDir.equals(mountPoint)) { + Assert.assertTrue("Could not delete cgroups", cpuCgroupMountDir.delete()); + Assert.assertTrue("Directory should be deleted", + !cpuCgroupMountDir.exists()); + } + Assert.assertTrue(mountPoint.setWritable(false)); + try { + cGroupsHandler.initializeCGroupController( + CGroupsHandler.CGroupController.CPU); + Assert.fail("An inaccessible path should result in an exception"); + } catch (Exception e) { + Assert.assertTrue("Unexpected exception " + e.getClass().toString(), + e instanceof ResourceHandlerException); + } finally { + Assert.assertTrue("Could not revert writable permission", + mountPoint.setWritable(true)); + } + + // Test that a SecurityException results in an exception + if (!cpuCgroupMountDir.equals(mountPoint)) { + Assert.assertFalse("Could not delete cgroups", + cpuCgroupMountDir.delete()); + Assert.assertTrue("Directory should be deleted", + !cpuCgroupMountDir.exists()); + SecurityManager manager = System.getSecurityManager(); + System.setSecurityManager(new MockSecurityManagerDenyWrite()); + try { + cGroupsHandler.initializeCGroupController( + CGroupsHandler.CGroupController.CPU); + Assert.fail("An inaccessible path should result in an exception"); + } catch (Exception e) { + Assert.assertTrue("Unexpected exception " + e.getClass().toString(), + e instanceof ResourceHandlerException); + } finally { + System.setSecurityManager(manager); + } + } + + // Test that a non-existing mount directory results in an exception + if (!cpuCgroupMountDir.equals(mountPoint)) { + Assert.assertFalse("Could not delete cgroups", + cpuCgroupMountDir.delete()); + Assert.assertTrue("Directory should be deleted", + !cpuCgroupMountDir.exists()); + } + FileUtils.deleteQuietly(mountPoint); + Assert.assertTrue("cgroups mount point should be deleted", + !mountPoint.exists()); + try { + cGroupsHandler.initializeCGroupController( + CGroupsHandler.CGroupController.CPU); + Assert.fail("An inaccessible path should result in an exception"); + } catch (Exception e) { + Assert.assertTrue("Unexpected exception " + e.getClass().toString(), + e instanceof ResourceHandlerException); + } + } + @After public void teardown() { FileUtil.fullyDelete(new File(tmpPath)); } + + private class MockSecurityManagerDenyWrite extends SecurityManager { + @Override + public void checkPermission(Permission perm) { + if(perm.getActions().equals("write")) { + throw new SecurityException("Mock not allowed"); + } + } + } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsMemoryResourceHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsMemoryResourceHandlerImpl.java index d98a16fb2c7..180e1340906 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsMemoryResourceHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsMemoryResourceHandlerImpl.java @@ -52,7 +52,7 @@ public void testBootstrap() throws Exception { List ret = cGroupsMemoryResourceHandler.bootstrap(conf); verify(mockCGroupsHandler, times(1)) - .mountCGroupController(CGroupsHandler.CGroupController.MEMORY); + .initializeCGroupController(CGroupsHandler.CGroupController.MEMORY); Assert.assertNull(ret); Assert.assertEquals("Default swappiness value incorrect", 0, cGroupsMemoryResourceHandler.getSwappiness()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestTrafficControlBandwidthHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestTrafficControlBandwidthHandlerImpl.java index 50ad6b9abdd..13b01880c5a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestTrafficControlBandwidthHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestTrafficControlBandwidthHandlerImpl.java @@ -99,7 +99,7 @@ public void testBootstrap() { try { handlerImpl.bootstrap(conf); - verify(cGroupsHandlerMock).mountCGroupController( + verify(cGroupsHandlerMock).initializeCGroupController( eq(CGroupsHandler.CGroupController.NET_CLS)); verifyNoMoreInteractions(cGroupsHandlerMock); verify(trafficControllerMock).bootstrap(eq(device), diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManagerCgroups.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManagerCgroups.md index 79a428de8e6..50f2faf2c2c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManagerCgroups.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManagerCgroups.md @@ -31,7 +31,7 @@ The following settings are related to setting up CGroups. These need to be set i |:---- |:---- | | `yarn.nodemanager.container-executor.class` | This should be set to "org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor". CGroups is a Linux kernel feature and is exposed via the LinuxContainerExecutor. | | `yarn.nodemanager.linux-container-executor.resources-handler.class` | This should be set to "org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler". Using the LinuxContainerExecutor doesn't force you to use CGroups. If you wish to use CGroups, the resource-handler-class must be set to CGroupsLCEResourceHandler. | -| `yarn.nodemanager.linux-container-executor.cgroups.hierarchy` | The cgroups hierarchy under which to place YARN proccesses(cannot contain commas). If yarn.nodemanager.linux-container-executor.cgroups.mount is false (that is, if cgroups have been pre-configured), then this cgroups hierarchy must already exist | +| `yarn.nodemanager.linux-container-executor.cgroups.hierarchy` | The cgroups hierarchy under which to place YARN proccesses(cannot contain commas). If yarn.nodemanager.linux-container-executor.cgroups.mount is false (that is, if cgroups have been pre-configured) and the Yarn user has write access to the parent directory, then the directory will be created. If the directory already exists, the administrator has to give Yarn write permissions to it recursively. | | `yarn.nodemanager.linux-container-executor.cgroups.mount` | Whether the LCE should attempt to mount cgroups if not found - can be true or false. | | `yarn.nodemanager.linux-container-executor.cgroups.mount-path` | Where the LCE should attempt to mount cgroups if not found. Common locations include /sys/fs/cgroup and /cgroup; the default location can vary depending on the Linux distribution in use. This path must exist before the NodeManager is launched. Only used when the LCE resources handler is set to the CgroupsLCEResourcesHandler, and yarn.nodemanager.linux-container-executor.cgroups.mount is true. A point to note here is that the container-executor binary will try to mount the path specified + "/" + the subsystem. In our case, since we are trying to limit CPU the binary tries to mount the path specified + "/cpu" and that's the path it expects to exist. | | `yarn.nodemanager.linux-container-executor.group` | The Unix group of the NodeManager. It should match the setting in "container-executor.cfg". This configuration is required for validating the secure access of the container-executor binary. |