YARN-4762. Fixed CgroupHandler's creation and usage to avoid NodeManagers crashing when LinuxContainerExecutor is enabled. (Sidharta Seethana via vinodkv)
This commit is contained in:
parent
d718fc1ee5
commit
b2661765a5
|
@ -63,7 +63,7 @@ public class ResourceHandlerModule {
|
|||
/**
|
||||
* Returns an initialized, thread-safe CGroupsHandler instance.
|
||||
*/
|
||||
public static CGroupsHandler getCGroupsHandler(Configuration conf)
|
||||
private static CGroupsHandler getInitializedCGroupsHandler(Configuration conf)
|
||||
throws ResourceHandlerException {
|
||||
if (cGroupsHandler == null) {
|
||||
synchronized (CGroupsHandler.class) {
|
||||
|
@ -77,7 +77,17 @@ public class ResourceHandlerModule {
|
|||
return cGroupsHandler;
|
||||
}
|
||||
|
||||
private static CGroupsCpuResourceHandlerImpl getcGroupsCpuResourceHandler(
|
||||
/**
|
||||
* Returns a (possibly null) reference to a cGroupsHandler. This handler is
|
||||
* non-null only if one or more of the known cgroups-based resource
|
||||
* handlers are in use and have been initialized.
|
||||
*/
|
||||
|
||||
public static CGroupsHandler getCGroupsHandler() {
|
||||
return cGroupsHandler;
|
||||
}
|
||||
|
||||
private static CGroupsCpuResourceHandlerImpl getCGroupsCpuResourceHandler(
|
||||
Configuration conf) throws ResourceHandlerException {
|
||||
boolean cgroupsCpuEnabled =
|
||||
conf.getBoolean(YarnConfiguration.NM_CPU_RESOURCE_ENABLED,
|
||||
|
@ -92,7 +102,8 @@ public class ResourceHandlerModule {
|
|||
if (cGroupsCpuResourceHandler == null) {
|
||||
LOG.debug("Creating new cgroups cpu handler");
|
||||
cGroupsCpuResourceHandler =
|
||||
new CGroupsCpuResourceHandlerImpl(getCGroupsHandler(conf));
|
||||
new CGroupsCpuResourceHandlerImpl(
|
||||
getInitializedCGroupsHandler(conf));
|
||||
return cGroupsCpuResourceHandler;
|
||||
}
|
||||
}
|
||||
|
@ -112,7 +123,7 @@ public class ResourceHandlerModule {
|
|||
LOG.debug("Creating new traffic control bandwidth handler");
|
||||
trafficControlBandwidthHandler = new
|
||||
TrafficControlBandwidthHandlerImpl(PrivilegedOperationExecutor
|
||||
.getInstance(conf), getCGroupsHandler(conf),
|
||||
.getInstance(conf), getInitializedCGroupsHandler(conf),
|
||||
new TrafficController(conf, PrivilegedOperationExecutor
|
||||
.getInstance(conf)));
|
||||
}
|
||||
|
@ -147,7 +158,8 @@ public class ResourceHandlerModule {
|
|||
if (cGroupsBlkioResourceHandler == null) {
|
||||
LOG.debug("Creating new cgroups blkio handler");
|
||||
cGroupsBlkioResourceHandler =
|
||||
new CGroupsBlkioResourceHandlerImpl(getCGroupsHandler(conf));
|
||||
new CGroupsBlkioResourceHandlerImpl(
|
||||
getInitializedCGroupsHandler(conf));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -170,7 +182,8 @@ public class ResourceHandlerModule {
|
|||
synchronized (MemoryResourceHandler.class) {
|
||||
if (cGroupsMemoryResourceHandler == null) {
|
||||
cGroupsMemoryResourceHandler =
|
||||
new CGroupsMemoryResourceHandlerImpl(getCGroupsHandler(conf));
|
||||
new CGroupsMemoryResourceHandlerImpl(
|
||||
getInitializedCGroupsHandler(conf));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -191,7 +204,7 @@ public class ResourceHandlerModule {
|
|||
addHandlerIfNotNull(handlerList, getOutboundBandwidthResourceHandler(conf));
|
||||
addHandlerIfNotNull(handlerList, getDiskResourceHandler(conf));
|
||||
addHandlerIfNotNull(handlerList, getMemoryResourceHandler(conf));
|
||||
addHandlerIfNotNull(handlerList, getcGroupsCpuResourceHandler(conf));
|
||||
addHandlerIfNotNull(handlerList, getCGroupsCpuResourceHandler(conf));
|
||||
resourceHandlerChain = new ResourceHandlerChain(handlerList);
|
||||
}
|
||||
|
||||
|
|
|
@ -27,9 +27,6 @@ import org.apache.hadoop.classification.InterfaceStability;
|
|||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeContext;
|
||||
|
||||
|
@ -48,19 +45,11 @@ public class DelegatingLinuxContainerRuntime implements LinuxContainerRuntime {
|
|||
throws ContainerExecutionException {
|
||||
PrivilegedOperationExecutor privilegedOperationExecutor =
|
||||
PrivilegedOperationExecutor.getInstance(conf);
|
||||
CGroupsHandler cGroupsHandler;
|
||||
try {
|
||||
cGroupsHandler = ResourceHandlerModule.getCGroupsHandler(conf);
|
||||
} catch (ResourceHandlerException e) {
|
||||
LOG.error("Unable to get cgroups handle.");
|
||||
throw new ContainerExecutionException(e);
|
||||
}
|
||||
|
||||
defaultLinuxContainerRuntime = new DefaultLinuxContainerRuntime(
|
||||
privilegedOperationExecutor);
|
||||
defaultLinuxContainerRuntime.initialize(conf);
|
||||
dockerLinuxContainerRuntime = new DockerLinuxContainerRuntime(
|
||||
privilegedOperationExecutor, cGroupsHandler);
|
||||
privilegedOperationExecutor);
|
||||
dockerLinuxContainerRuntime.initialize(conf);
|
||||
}
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
|
||||
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
|
@ -36,6 +37,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileg
|
|||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerClient;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
|
||||
|
@ -88,10 +90,25 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
|
|||
return type != null && type.equals("docker");
|
||||
}
|
||||
|
||||
public DockerLinuxContainerRuntime(PrivilegedOperationExecutor
|
||||
privilegedOperationExecutor) {
|
||||
this(privilegedOperationExecutor, ResourceHandlerModule
|
||||
.getCGroupsHandler());
|
||||
}
|
||||
|
||||
//A constructor with an injected cGroupsHandler primarily used for testing.
|
||||
@VisibleForTesting
|
||||
public DockerLinuxContainerRuntime(PrivilegedOperationExecutor
|
||||
privilegedOperationExecutor, CGroupsHandler cGroupsHandler) {
|
||||
this.privilegedOperationExecutor = privilegedOperationExecutor;
|
||||
this.cGroupsHandler = cGroupsHandler;
|
||||
|
||||
if (cGroupsHandler == null) {
|
||||
if (LOG.isInfoEnabled()) {
|
||||
LOG.info("cGroupsHandler is null - cgroups not in use.");
|
||||
}
|
||||
} else {
|
||||
this.cGroupsHandler = cGroupsHandler;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -113,6 +130,14 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
|
|||
public void addCGroupParentIfRequired(String resourcesOptions,
|
||||
String containerIdStr, DockerRunCommand runCommand)
|
||||
throws ContainerExecutionException {
|
||||
if (cGroupsHandler == null) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("cGroupsHandler is null. cgroups are not in use. nothing to"
|
||||
+ " do.");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (resourcesOptions.equals(
|
||||
(PrivilegedOperation.CGROUP_ARG_PREFIX + PrivilegedOperation
|
||||
.CGROUP_ARG_NO_TASKS))) {
|
||||
|
|
|
@ -429,5 +429,20 @@ public class TestDockerContainerRuntime {
|
|||
//--cgroup-parent should be added for the containerId in question
|
||||
String expectedPath = "/" + hierarchy + "/" + containerIdStr;
|
||||
Mockito.verify(command).setCGroupParent(expectedPath);
|
||||
|
||||
//create a runtime with a 'null' cgroups handler - i.e no
|
||||
// cgroup-based resource handlers are in use.
|
||||
|
||||
runtime = new DockerLinuxContainerRuntime
|
||||
(mockExecutor, null);
|
||||
runtime.initialize(conf);
|
||||
|
||||
runtime.addCGroupParentIfRequired(resourceOptionsNone, containerIdStr,
|
||||
command);
|
||||
runtime.addCGroupParentIfRequired(resourceOptionsCpu, containerIdStr,
|
||||
command);
|
||||
|
||||
//no --cgroup-parent should be added in either case
|
||||
Mockito.verifyZeroInteractions(command);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue