YARN-4762. Fixed CgroupHandler's creation and usage to avoid NodeManagers crashing when LinuxContainerExecutor is enabled. (Sidharta Seethana via vinodkv)

(cherry picked from commit b2661765a5)
This commit is contained in:
Vinod Kumar Vavilapalli 2016-03-07 11:08:17 -08:00
parent 135ceb6c7b
commit da9f39b107
4 changed files with 62 additions and 20 deletions

View File

@ -63,7 +63,7 @@ public class ResourceHandlerModule {
/** /**
* Returns an initialized, thread-safe CGroupsHandler instance. * Returns an initialized, thread-safe CGroupsHandler instance.
*/ */
public static CGroupsHandler getCGroupsHandler(Configuration conf) private static CGroupsHandler getInitializedCGroupsHandler(Configuration conf)
throws ResourceHandlerException { throws ResourceHandlerException {
if (cGroupsHandler == null) { if (cGroupsHandler == null) {
synchronized (CGroupsHandler.class) { synchronized (CGroupsHandler.class) {
@ -77,7 +77,17 @@ public class ResourceHandlerModule {
return cGroupsHandler; return cGroupsHandler;
} }
private static CGroupsCpuResourceHandlerImpl getcGroupsCpuResourceHandler( /**
* Returns a (possibly null) reference to a cGroupsHandler. This handler is
* non-null only if one or more of the known cgroups-based resource
* handlers are in use and have been initialized.
*/
public static CGroupsHandler getCGroupsHandler() {
return cGroupsHandler;
}
private static CGroupsCpuResourceHandlerImpl getCGroupsCpuResourceHandler(
Configuration conf) throws ResourceHandlerException { Configuration conf) throws ResourceHandlerException {
boolean cgroupsCpuEnabled = boolean cgroupsCpuEnabled =
conf.getBoolean(YarnConfiguration.NM_CPU_RESOURCE_ENABLED, conf.getBoolean(YarnConfiguration.NM_CPU_RESOURCE_ENABLED,
@ -92,7 +102,8 @@ public class ResourceHandlerModule {
if (cGroupsCpuResourceHandler == null) { if (cGroupsCpuResourceHandler == null) {
LOG.debug("Creating new cgroups cpu handler"); LOG.debug("Creating new cgroups cpu handler");
cGroupsCpuResourceHandler = cGroupsCpuResourceHandler =
new CGroupsCpuResourceHandlerImpl(getCGroupsHandler(conf)); new CGroupsCpuResourceHandlerImpl(
getInitializedCGroupsHandler(conf));
return cGroupsCpuResourceHandler; return cGroupsCpuResourceHandler;
} }
} }
@ -112,7 +123,7 @@ public class ResourceHandlerModule {
LOG.debug("Creating new traffic control bandwidth handler"); LOG.debug("Creating new traffic control bandwidth handler");
trafficControlBandwidthHandler = new trafficControlBandwidthHandler = new
TrafficControlBandwidthHandlerImpl(PrivilegedOperationExecutor TrafficControlBandwidthHandlerImpl(PrivilegedOperationExecutor
.getInstance(conf), getCGroupsHandler(conf), .getInstance(conf), getInitializedCGroupsHandler(conf),
new TrafficController(conf, PrivilegedOperationExecutor new TrafficController(conf, PrivilegedOperationExecutor
.getInstance(conf))); .getInstance(conf)));
} }
@ -147,7 +158,8 @@ public class ResourceHandlerModule {
if (cGroupsBlkioResourceHandler == null) { if (cGroupsBlkioResourceHandler == null) {
LOG.debug("Creating new cgroups blkio handler"); LOG.debug("Creating new cgroups blkio handler");
cGroupsBlkioResourceHandler = cGroupsBlkioResourceHandler =
new CGroupsBlkioResourceHandlerImpl(getCGroupsHandler(conf)); new CGroupsBlkioResourceHandlerImpl(
getInitializedCGroupsHandler(conf));
} }
} }
} }
@ -170,7 +182,8 @@ public class ResourceHandlerModule {
synchronized (MemoryResourceHandler.class) { synchronized (MemoryResourceHandler.class) {
if (cGroupsMemoryResourceHandler == null) { if (cGroupsMemoryResourceHandler == null) {
cGroupsMemoryResourceHandler = cGroupsMemoryResourceHandler =
new CGroupsMemoryResourceHandlerImpl(getCGroupsHandler(conf)); new CGroupsMemoryResourceHandlerImpl(
getInitializedCGroupsHandler(conf));
} }
} }
} }
@ -191,7 +204,7 @@ public class ResourceHandlerModule {
addHandlerIfNotNull(handlerList, getOutboundBandwidthResourceHandler(conf)); addHandlerIfNotNull(handlerList, getOutboundBandwidthResourceHandler(conf));
addHandlerIfNotNull(handlerList, getDiskResourceHandler(conf)); addHandlerIfNotNull(handlerList, getDiskResourceHandler(conf));
addHandlerIfNotNull(handlerList, getMemoryResourceHandler(conf)); addHandlerIfNotNull(handlerList, getMemoryResourceHandler(conf));
addHandlerIfNotNull(handlerList, getcGroupsCpuResourceHandler(conf)); addHandlerIfNotNull(handlerList, getCGroupsCpuResourceHandler(conf));
resourceHandlerChain = new ResourceHandlerChain(handlerList); resourceHandlerChain = new ResourceHandlerChain(handlerList);
} }

View File

@ -27,9 +27,6 @@ import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeContext; import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeContext;
@ -48,19 +45,11 @@ public class DelegatingLinuxContainerRuntime implements LinuxContainerRuntime {
throws ContainerExecutionException { throws ContainerExecutionException {
PrivilegedOperationExecutor privilegedOperationExecutor = PrivilegedOperationExecutor privilegedOperationExecutor =
PrivilegedOperationExecutor.getInstance(conf); PrivilegedOperationExecutor.getInstance(conf);
CGroupsHandler cGroupsHandler;
try {
cGroupsHandler = ResourceHandlerModule.getCGroupsHandler(conf);
} catch (ResourceHandlerException e) {
LOG.error("Unable to get cgroups handle.");
throw new ContainerExecutionException(e);
}
defaultLinuxContainerRuntime = new DefaultLinuxContainerRuntime( defaultLinuxContainerRuntime = new DefaultLinuxContainerRuntime(
privilegedOperationExecutor); privilegedOperationExecutor);
defaultLinuxContainerRuntime.initialize(conf); defaultLinuxContainerRuntime.initialize(conf);
dockerLinuxContainerRuntime = new DockerLinuxContainerRuntime( dockerLinuxContainerRuntime = new DockerLinuxContainerRuntime(
privilegedOperationExecutor, cGroupsHandler); privilegedOperationExecutor);
dockerLinuxContainerRuntime.initialize(conf); dockerLinuxContainerRuntime.initialize(conf);
} }

View File

@ -20,6 +20,7 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime; package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime;
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
@ -36,6 +37,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileg
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerClient; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerClient;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand; import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
@ -88,10 +90,25 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
return type != null && type.equals("docker"); return type != null && type.equals("docker");
} }
public DockerLinuxContainerRuntime(PrivilegedOperationExecutor
privilegedOperationExecutor) {
this(privilegedOperationExecutor, ResourceHandlerModule
.getCGroupsHandler());
}
//A constructor with an injected cGroupsHandler primarily used for testing.
@VisibleForTesting
public DockerLinuxContainerRuntime(PrivilegedOperationExecutor public DockerLinuxContainerRuntime(PrivilegedOperationExecutor
privilegedOperationExecutor, CGroupsHandler cGroupsHandler) { privilegedOperationExecutor, CGroupsHandler cGroupsHandler) {
this.privilegedOperationExecutor = privilegedOperationExecutor; this.privilegedOperationExecutor = privilegedOperationExecutor;
this.cGroupsHandler = cGroupsHandler;
if (cGroupsHandler == null) {
if (LOG.isInfoEnabled()) {
LOG.info("cGroupsHandler is null - cgroups not in use.");
}
} else {
this.cGroupsHandler = cGroupsHandler;
}
} }
@Override @Override
@ -113,6 +130,14 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
public void addCGroupParentIfRequired(String resourcesOptions, public void addCGroupParentIfRequired(String resourcesOptions,
String containerIdStr, DockerRunCommand runCommand) String containerIdStr, DockerRunCommand runCommand)
throws ContainerExecutionException { throws ContainerExecutionException {
if (cGroupsHandler == null) {
if (LOG.isDebugEnabled()) {
LOG.debug("cGroupsHandler is null. cgroups are not in use. nothing to"
+ " do.");
}
return;
}
if (resourcesOptions.equals( if (resourcesOptions.equals(
(PrivilegedOperation.CGROUP_ARG_PREFIX + PrivilegedOperation (PrivilegedOperation.CGROUP_ARG_PREFIX + PrivilegedOperation
.CGROUP_ARG_NO_TASKS))) { .CGROUP_ARG_NO_TASKS))) {

View File

@ -429,5 +429,20 @@ public class TestDockerContainerRuntime {
//--cgroup-parent should be added for the containerId in question //--cgroup-parent should be added for the containerId in question
String expectedPath = "/" + hierarchy + "/" + containerIdStr; String expectedPath = "/" + hierarchy + "/" + containerIdStr;
Mockito.verify(command).setCGroupParent(expectedPath); Mockito.verify(command).setCGroupParent(expectedPath);
//create a runtime with a 'null' cgroups handler - i.e no
// cgroup-based resource handlers are in use.
runtime = new DockerLinuxContainerRuntime
(mockExecutor, null);
runtime.initialize(conf);
runtime.addCGroupParentIfRequired(resourceOptionsNone, containerIdStr,
command);
runtime.addCGroupParentIfRequired(resourceOptionsCpu, containerIdStr,
command);
//no --cgroup-parent should be added in either case
Mockito.verifyZeroInteractions(command);
} }
} }