YARN-4762. Fixed CgroupHandler's creation and usage to avoid NodeManagers crashing when LinuxContainerExecutor is enabled. (Sidharta Seethana via vinodkv)

This commit is contained in:
Vinod Kumar Vavilapalli 2016-03-07 11:08:17 -08:00
parent d718fc1ee5
commit b2661765a5
4 changed files with 62 additions and 20 deletions

View File

@ -63,7 +63,7 @@ public class ResourceHandlerModule {
/**
* Returns an initialized, thread-safe CGroupsHandler instance.
*/
public static CGroupsHandler getCGroupsHandler(Configuration conf)
private static CGroupsHandler getInitializedCGroupsHandler(Configuration conf)
throws ResourceHandlerException {
if (cGroupsHandler == null) {
synchronized (CGroupsHandler.class) {
@ -77,7 +77,17 @@ public class ResourceHandlerModule {
return cGroupsHandler;
}
private static CGroupsCpuResourceHandlerImpl getcGroupsCpuResourceHandler(
/**
* Returns a (possibly null) reference to a cGroupsHandler. This handler is
* non-null only if one or more of the known cgroups-based resource
* handlers are in use and have been initialized.
*/
public static CGroupsHandler getCGroupsHandler() {
return cGroupsHandler;
}
private static CGroupsCpuResourceHandlerImpl getCGroupsCpuResourceHandler(
Configuration conf) throws ResourceHandlerException {
boolean cgroupsCpuEnabled =
conf.getBoolean(YarnConfiguration.NM_CPU_RESOURCE_ENABLED,
@ -92,7 +102,8 @@ public class ResourceHandlerModule {
if (cGroupsCpuResourceHandler == null) {
LOG.debug("Creating new cgroups cpu handler");
cGroupsCpuResourceHandler =
new CGroupsCpuResourceHandlerImpl(getCGroupsHandler(conf));
new CGroupsCpuResourceHandlerImpl(
getInitializedCGroupsHandler(conf));
return cGroupsCpuResourceHandler;
}
}
@ -112,7 +123,7 @@ public class ResourceHandlerModule {
LOG.debug("Creating new traffic control bandwidth handler");
trafficControlBandwidthHandler = new
TrafficControlBandwidthHandlerImpl(PrivilegedOperationExecutor
.getInstance(conf), getCGroupsHandler(conf),
.getInstance(conf), getInitializedCGroupsHandler(conf),
new TrafficController(conf, PrivilegedOperationExecutor
.getInstance(conf)));
}
@ -147,7 +158,8 @@ public class ResourceHandlerModule {
if (cGroupsBlkioResourceHandler == null) {
LOG.debug("Creating new cgroups blkio handler");
cGroupsBlkioResourceHandler =
new CGroupsBlkioResourceHandlerImpl(getCGroupsHandler(conf));
new CGroupsBlkioResourceHandlerImpl(
getInitializedCGroupsHandler(conf));
}
}
}
@ -170,7 +182,8 @@ public class ResourceHandlerModule {
synchronized (MemoryResourceHandler.class) {
if (cGroupsMemoryResourceHandler == null) {
cGroupsMemoryResourceHandler =
new CGroupsMemoryResourceHandlerImpl(getCGroupsHandler(conf));
new CGroupsMemoryResourceHandlerImpl(
getInitializedCGroupsHandler(conf));
}
}
}
@ -191,7 +204,7 @@ public class ResourceHandlerModule {
addHandlerIfNotNull(handlerList, getOutboundBandwidthResourceHandler(conf));
addHandlerIfNotNull(handlerList, getDiskResourceHandler(conf));
addHandlerIfNotNull(handlerList, getMemoryResourceHandler(conf));
addHandlerIfNotNull(handlerList, getcGroupsCpuResourceHandler(conf));
addHandlerIfNotNull(handlerList, getCGroupsCpuResourceHandler(conf));
resourceHandlerChain = new ResourceHandlerChain(handlerList);
}

View File

@ -27,9 +27,6 @@ import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeContext;
@ -48,19 +45,11 @@ public class DelegatingLinuxContainerRuntime implements LinuxContainerRuntime {
throws ContainerExecutionException {
PrivilegedOperationExecutor privilegedOperationExecutor =
PrivilegedOperationExecutor.getInstance(conf);
CGroupsHandler cGroupsHandler;
try {
cGroupsHandler = ResourceHandlerModule.getCGroupsHandler(conf);
} catch (ResourceHandlerException e) {
LOG.error("Unable to get cgroups handle.");
throw new ContainerExecutionException(e);
}
defaultLinuxContainerRuntime = new DefaultLinuxContainerRuntime(
privilegedOperationExecutor);
defaultLinuxContainerRuntime.initialize(conf);
dockerLinuxContainerRuntime = new DockerLinuxContainerRuntime(
privilegedOperationExecutor, cGroupsHandler);
privilegedOperationExecutor);
dockerLinuxContainerRuntime.initialize(conf);
}

View File

@ -20,6 +20,7 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime;
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
@ -36,6 +37,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileg
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerClient;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
@ -88,10 +90,25 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
return type != null && type.equals("docker");
}
public DockerLinuxContainerRuntime(PrivilegedOperationExecutor
privilegedOperationExecutor) {
this(privilegedOperationExecutor, ResourceHandlerModule
.getCGroupsHandler());
}
//A constructor with an injected cGroupsHandler primarily used for testing.
@VisibleForTesting
public DockerLinuxContainerRuntime(PrivilegedOperationExecutor
privilegedOperationExecutor, CGroupsHandler cGroupsHandler) {
this.privilegedOperationExecutor = privilegedOperationExecutor;
this.cGroupsHandler = cGroupsHandler;
if (cGroupsHandler == null) {
if (LOG.isInfoEnabled()) {
LOG.info("cGroupsHandler is null - cgroups not in use.");
}
} else {
this.cGroupsHandler = cGroupsHandler;
}
}
@Override
@ -113,6 +130,14 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
public void addCGroupParentIfRequired(String resourcesOptions,
String containerIdStr, DockerRunCommand runCommand)
throws ContainerExecutionException {
if (cGroupsHandler == null) {
if (LOG.isDebugEnabled()) {
LOG.debug("cGroupsHandler is null. cgroups are not in use. nothing to"
+ " do.");
}
return;
}
if (resourcesOptions.equals(
(PrivilegedOperation.CGROUP_ARG_PREFIX + PrivilegedOperation
.CGROUP_ARG_NO_TASKS))) {

View File

@ -429,5 +429,20 @@ public class TestDockerContainerRuntime {
//--cgroup-parent should be added for the containerId in question
String expectedPath = "/" + hierarchy + "/" + containerIdStr;
Mockito.verify(command).setCGroupParent(expectedPath);
//create a runtime with a 'null' cgroups handler - i.e no
// cgroup-based resource handlers are in use.
runtime = new DockerLinuxContainerRuntime
(mockExecutor, null);
runtime.initialize(conf);
runtime.addCGroupParentIfRequired(resourceOptionsNone, containerIdStr,
command);
runtime.addCGroupParentIfRequired(resourceOptionsCpu, containerIdStr,
command);
//no --cgroup-parent should be added in either case
Mockito.verifyZeroInteractions(command);
}
}