YARN-4762. Fixed CgroupHandler's creation and usage to avoid NodeManagers crashing when LinuxContainerExecutor is enabled. (Sidharta Seethana via vinodkv)
(cherry picked from commit b2661765a5
)
This commit is contained in:
parent
135ceb6c7b
commit
da9f39b107
|
@ -63,7 +63,7 @@ public class ResourceHandlerModule {
|
||||||
/**
|
/**
|
||||||
* Returns an initialized, thread-safe CGroupsHandler instance.
|
* Returns an initialized, thread-safe CGroupsHandler instance.
|
||||||
*/
|
*/
|
||||||
public static CGroupsHandler getCGroupsHandler(Configuration conf)
|
private static CGroupsHandler getInitializedCGroupsHandler(Configuration conf)
|
||||||
throws ResourceHandlerException {
|
throws ResourceHandlerException {
|
||||||
if (cGroupsHandler == null) {
|
if (cGroupsHandler == null) {
|
||||||
synchronized (CGroupsHandler.class) {
|
synchronized (CGroupsHandler.class) {
|
||||||
|
@ -77,7 +77,17 @@ public class ResourceHandlerModule {
|
||||||
return cGroupsHandler;
|
return cGroupsHandler;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static CGroupsCpuResourceHandlerImpl getcGroupsCpuResourceHandler(
|
/**
|
||||||
|
* Returns a (possibly null) reference to a cGroupsHandler. This handler is
|
||||||
|
* non-null only if one or more of the known cgroups-based resource
|
||||||
|
* handlers are in use and have been initialized.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public static CGroupsHandler getCGroupsHandler() {
|
||||||
|
return cGroupsHandler;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static CGroupsCpuResourceHandlerImpl getCGroupsCpuResourceHandler(
|
||||||
Configuration conf) throws ResourceHandlerException {
|
Configuration conf) throws ResourceHandlerException {
|
||||||
boolean cgroupsCpuEnabled =
|
boolean cgroupsCpuEnabled =
|
||||||
conf.getBoolean(YarnConfiguration.NM_CPU_RESOURCE_ENABLED,
|
conf.getBoolean(YarnConfiguration.NM_CPU_RESOURCE_ENABLED,
|
||||||
|
@ -92,7 +102,8 @@ public class ResourceHandlerModule {
|
||||||
if (cGroupsCpuResourceHandler == null) {
|
if (cGroupsCpuResourceHandler == null) {
|
||||||
LOG.debug("Creating new cgroups cpu handler");
|
LOG.debug("Creating new cgroups cpu handler");
|
||||||
cGroupsCpuResourceHandler =
|
cGroupsCpuResourceHandler =
|
||||||
new CGroupsCpuResourceHandlerImpl(getCGroupsHandler(conf));
|
new CGroupsCpuResourceHandlerImpl(
|
||||||
|
getInitializedCGroupsHandler(conf));
|
||||||
return cGroupsCpuResourceHandler;
|
return cGroupsCpuResourceHandler;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -112,7 +123,7 @@ public class ResourceHandlerModule {
|
||||||
LOG.debug("Creating new traffic control bandwidth handler");
|
LOG.debug("Creating new traffic control bandwidth handler");
|
||||||
trafficControlBandwidthHandler = new
|
trafficControlBandwidthHandler = new
|
||||||
TrafficControlBandwidthHandlerImpl(PrivilegedOperationExecutor
|
TrafficControlBandwidthHandlerImpl(PrivilegedOperationExecutor
|
||||||
.getInstance(conf), getCGroupsHandler(conf),
|
.getInstance(conf), getInitializedCGroupsHandler(conf),
|
||||||
new TrafficController(conf, PrivilegedOperationExecutor
|
new TrafficController(conf, PrivilegedOperationExecutor
|
||||||
.getInstance(conf)));
|
.getInstance(conf)));
|
||||||
}
|
}
|
||||||
|
@ -147,7 +158,8 @@ public class ResourceHandlerModule {
|
||||||
if (cGroupsBlkioResourceHandler == null) {
|
if (cGroupsBlkioResourceHandler == null) {
|
||||||
LOG.debug("Creating new cgroups blkio handler");
|
LOG.debug("Creating new cgroups blkio handler");
|
||||||
cGroupsBlkioResourceHandler =
|
cGroupsBlkioResourceHandler =
|
||||||
new CGroupsBlkioResourceHandlerImpl(getCGroupsHandler(conf));
|
new CGroupsBlkioResourceHandlerImpl(
|
||||||
|
getInitializedCGroupsHandler(conf));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -170,7 +182,8 @@ public class ResourceHandlerModule {
|
||||||
synchronized (MemoryResourceHandler.class) {
|
synchronized (MemoryResourceHandler.class) {
|
||||||
if (cGroupsMemoryResourceHandler == null) {
|
if (cGroupsMemoryResourceHandler == null) {
|
||||||
cGroupsMemoryResourceHandler =
|
cGroupsMemoryResourceHandler =
|
||||||
new CGroupsMemoryResourceHandlerImpl(getCGroupsHandler(conf));
|
new CGroupsMemoryResourceHandlerImpl(
|
||||||
|
getInitializedCGroupsHandler(conf));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -191,7 +204,7 @@ public class ResourceHandlerModule {
|
||||||
addHandlerIfNotNull(handlerList, getOutboundBandwidthResourceHandler(conf));
|
addHandlerIfNotNull(handlerList, getOutboundBandwidthResourceHandler(conf));
|
||||||
addHandlerIfNotNull(handlerList, getDiskResourceHandler(conf));
|
addHandlerIfNotNull(handlerList, getDiskResourceHandler(conf));
|
||||||
addHandlerIfNotNull(handlerList, getMemoryResourceHandler(conf));
|
addHandlerIfNotNull(handlerList, getMemoryResourceHandler(conf));
|
||||||
addHandlerIfNotNull(handlerList, getcGroupsCpuResourceHandler(conf));
|
addHandlerIfNotNull(handlerList, getCGroupsCpuResourceHandler(conf));
|
||||||
resourceHandlerChain = new ResourceHandlerChain(handlerList);
|
resourceHandlerChain = new ResourceHandlerChain(handlerList);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -27,9 +27,6 @@ import org.apache.hadoop.classification.InterfaceStability;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
|
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException;
|
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
|
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeContext;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeContext;
|
||||||
|
|
||||||
|
@ -48,19 +45,11 @@ public class DelegatingLinuxContainerRuntime implements LinuxContainerRuntime {
|
||||||
throws ContainerExecutionException {
|
throws ContainerExecutionException {
|
||||||
PrivilegedOperationExecutor privilegedOperationExecutor =
|
PrivilegedOperationExecutor privilegedOperationExecutor =
|
||||||
PrivilegedOperationExecutor.getInstance(conf);
|
PrivilegedOperationExecutor.getInstance(conf);
|
||||||
CGroupsHandler cGroupsHandler;
|
|
||||||
try {
|
|
||||||
cGroupsHandler = ResourceHandlerModule.getCGroupsHandler(conf);
|
|
||||||
} catch (ResourceHandlerException e) {
|
|
||||||
LOG.error("Unable to get cgroups handle.");
|
|
||||||
throw new ContainerExecutionException(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
defaultLinuxContainerRuntime = new DefaultLinuxContainerRuntime(
|
defaultLinuxContainerRuntime = new DefaultLinuxContainerRuntime(
|
||||||
privilegedOperationExecutor);
|
privilegedOperationExecutor);
|
||||||
defaultLinuxContainerRuntime.initialize(conf);
|
defaultLinuxContainerRuntime.initialize(conf);
|
||||||
dockerLinuxContainerRuntime = new DockerLinuxContainerRuntime(
|
dockerLinuxContainerRuntime = new DockerLinuxContainerRuntime(
|
||||||
privilegedOperationExecutor, cGroupsHandler);
|
privilegedOperationExecutor);
|
||||||
dockerLinuxContainerRuntime.initialize(conf);
|
dockerLinuxContainerRuntime.initialize(conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
|
|
||||||
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime;
|
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
@ -36,6 +37,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileg
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
|
||||||
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerClient;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerClient;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
|
||||||
|
@ -88,10 +90,25 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
|
||||||
return type != null && type.equals("docker");
|
return type != null && type.equals("docker");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public DockerLinuxContainerRuntime(PrivilegedOperationExecutor
|
||||||
|
privilegedOperationExecutor) {
|
||||||
|
this(privilegedOperationExecutor, ResourceHandlerModule
|
||||||
|
.getCGroupsHandler());
|
||||||
|
}
|
||||||
|
|
||||||
|
//A constructor with an injected cGroupsHandler primarily used for testing.
|
||||||
|
@VisibleForTesting
|
||||||
public DockerLinuxContainerRuntime(PrivilegedOperationExecutor
|
public DockerLinuxContainerRuntime(PrivilegedOperationExecutor
|
||||||
privilegedOperationExecutor, CGroupsHandler cGroupsHandler) {
|
privilegedOperationExecutor, CGroupsHandler cGroupsHandler) {
|
||||||
this.privilegedOperationExecutor = privilegedOperationExecutor;
|
this.privilegedOperationExecutor = privilegedOperationExecutor;
|
||||||
this.cGroupsHandler = cGroupsHandler;
|
|
||||||
|
if (cGroupsHandler == null) {
|
||||||
|
if (LOG.isInfoEnabled()) {
|
||||||
|
LOG.info("cGroupsHandler is null - cgroups not in use.");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
this.cGroupsHandler = cGroupsHandler;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -113,6 +130,14 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
|
||||||
public void addCGroupParentIfRequired(String resourcesOptions,
|
public void addCGroupParentIfRequired(String resourcesOptions,
|
||||||
String containerIdStr, DockerRunCommand runCommand)
|
String containerIdStr, DockerRunCommand runCommand)
|
||||||
throws ContainerExecutionException {
|
throws ContainerExecutionException {
|
||||||
|
if (cGroupsHandler == null) {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("cGroupsHandler is null. cgroups are not in use. nothing to"
|
||||||
|
+ " do.");
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (resourcesOptions.equals(
|
if (resourcesOptions.equals(
|
||||||
(PrivilegedOperation.CGROUP_ARG_PREFIX + PrivilegedOperation
|
(PrivilegedOperation.CGROUP_ARG_PREFIX + PrivilegedOperation
|
||||||
.CGROUP_ARG_NO_TASKS))) {
|
.CGROUP_ARG_NO_TASKS))) {
|
||||||
|
|
|
@ -429,5 +429,20 @@ public class TestDockerContainerRuntime {
|
||||||
//--cgroup-parent should be added for the containerId in question
|
//--cgroup-parent should be added for the containerId in question
|
||||||
String expectedPath = "/" + hierarchy + "/" + containerIdStr;
|
String expectedPath = "/" + hierarchy + "/" + containerIdStr;
|
||||||
Mockito.verify(command).setCGroupParent(expectedPath);
|
Mockito.verify(command).setCGroupParent(expectedPath);
|
||||||
|
|
||||||
|
//create a runtime with a 'null' cgroups handler - i.e no
|
||||||
|
// cgroup-based resource handlers are in use.
|
||||||
|
|
||||||
|
runtime = new DockerLinuxContainerRuntime
|
||||||
|
(mockExecutor, null);
|
||||||
|
runtime.initialize(conf);
|
||||||
|
|
||||||
|
runtime.addCGroupParentIfRequired(resourceOptionsNone, containerIdStr,
|
||||||
|
command);
|
||||||
|
runtime.addCGroupParentIfRequired(resourceOptionsCpu, containerIdStr,
|
||||||
|
command);
|
||||||
|
|
||||||
|
//no --cgroup-parent should be added in either case
|
||||||
|
Mockito.verifyZeroInteractions(command);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue