YARN-7064. Use cgroup to get container resource utilization. (Miklos Szegedi via Haibo Chen)
This commit is contained in:
parent
6eef3d7f1a
commit
649ef7ac33
|
@ -99,7 +99,7 @@ public class CpuTimeTracker {
|
||||||
public void updateElapsedJiffies(BigInteger elapsedJiffies, long newTime) {
|
public void updateElapsedJiffies(BigInteger elapsedJiffies, long newTime) {
|
||||||
BigInteger newValue = elapsedJiffies.multiply(jiffyLengthInMillis);
|
BigInteger newValue = elapsedJiffies.multiply(jiffyLengthInMillis);
|
||||||
cumulativeCpuTime = newValue.compareTo(cumulativeCpuTime) >= 0 ?
|
cumulativeCpuTime = newValue.compareTo(cumulativeCpuTime) >= 0 ?
|
||||||
newValue : cumulativeCpuTime;
|
newValue : cumulativeCpuTime;
|
||||||
sampleTime = newTime;
|
sampleTime = newTime;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1357,22 +1357,20 @@ public class YarnConfiguration extends Configuration {
|
||||||
public static final String NM_MEMORY_RESOURCE_PREFIX = NM_PREFIX
|
public static final String NM_MEMORY_RESOURCE_PREFIX = NM_PREFIX
|
||||||
+ "resource.memory.";
|
+ "resource.memory.";
|
||||||
|
|
||||||
@Private
|
|
||||||
public static final String NM_MEMORY_RESOURCE_ENABLED =
|
public static final String NM_MEMORY_RESOURCE_ENABLED =
|
||||||
NM_MEMORY_RESOURCE_PREFIX + "enabled";
|
NM_MEMORY_RESOURCE_PREFIX + "enabled";
|
||||||
@Private
|
|
||||||
public static final boolean DEFAULT_NM_MEMORY_RESOURCE_ENABLED = false;
|
public static final boolean DEFAULT_NM_MEMORY_RESOURCE_ENABLED = false;
|
||||||
|
|
||||||
@Private
|
public static final String NM_MEMORY_RESOURCE_ENFORCED =
|
||||||
|
NM_MEMORY_RESOURCE_PREFIX + "enforced";
|
||||||
|
public static final boolean DEFAULT_NM_MEMORY_RESOURCE_ENFORCED = true;
|
||||||
|
|
||||||
public static final String NM_MEMORY_RESOURCE_CGROUPS_SWAPPINESS =
|
public static final String NM_MEMORY_RESOURCE_CGROUPS_SWAPPINESS =
|
||||||
NM_MEMORY_RESOURCE_PREFIX + "cgroups.swappiness";
|
NM_MEMORY_RESOURCE_PREFIX + "cgroups.swappiness";
|
||||||
@Private
|
|
||||||
public static final int DEFAULT_NM_MEMORY_RESOURCE_CGROUPS_SWAPPINESS = 0;
|
public static final int DEFAULT_NM_MEMORY_RESOURCE_CGROUPS_SWAPPINESS = 0;
|
||||||
|
|
||||||
@Private
|
|
||||||
public static final String NM_MEMORY_RESOURCE_CGROUPS_SOFT_LIMIT_PERCENTAGE =
|
public static final String NM_MEMORY_RESOURCE_CGROUPS_SOFT_LIMIT_PERCENTAGE =
|
||||||
NM_MEMORY_RESOURCE_PREFIX + "cgroups.soft-limit-percentage";
|
NM_MEMORY_RESOURCE_PREFIX + "cgroups.soft-limit-percentage";
|
||||||
@Private
|
|
||||||
public static final float
|
public static final float
|
||||||
DEFAULT_NM_MEMORY_RESOURCE_CGROUPS_SOFT_LIMIT_PERCENTAGE =
|
DEFAULT_NM_MEMORY_RESOURCE_CGROUPS_SOFT_LIMIT_PERCENTAGE =
|
||||||
90.0f;
|
90.0f;
|
||||||
|
|
|
@ -158,8 +158,6 @@ public class TestYarnConfigurationFields extends TestConfigurationFieldsBase {
|
||||||
.add(YarnConfiguration.NM_NETWORK_RESOURCE_OUTBOUND_BANDWIDTH_YARN_MBIT);
|
.add(YarnConfiguration.NM_NETWORK_RESOURCE_OUTBOUND_BANDWIDTH_YARN_MBIT);
|
||||||
configurationPrefixToSkipCompare
|
configurationPrefixToSkipCompare
|
||||||
.add(YarnConfiguration.NM_DISK_RESOURCE_ENABLED);
|
.add(YarnConfiguration.NM_DISK_RESOURCE_ENABLED);
|
||||||
configurationPrefixToSkipCompare
|
|
||||||
.add(YarnConfiguration.NM_MEMORY_RESOURCE_PREFIX);
|
|
||||||
configurationPrefixToSkipCompare
|
configurationPrefixToSkipCompare
|
||||||
.add(YarnConfiguration.NM_CPU_RESOURCE_ENABLED);
|
.add(YarnConfiguration.NM_CPU_RESOURCE_ENABLED);
|
||||||
configurationPrefixToSkipCompare.add(
|
configurationPrefixToSkipCompare.add(
|
||||||
|
|
|
@ -468,6 +468,9 @@ public class ProcfsBasedProcessTree extends ResourceCalculatorProcessTree {
|
||||||
@Override
|
@Override
|
||||||
public float getCpuUsagePercent() {
|
public float getCpuUsagePercent() {
|
||||||
BigInteger processTotalJiffies = getTotalProcessJiffies();
|
BigInteger processTotalJiffies = getTotalProcessJiffies();
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Process " + pid + " jiffies:" + processTotalJiffies);
|
||||||
|
}
|
||||||
cpuTimeTracker.updateElapsedJiffies(processTotalJiffies,
|
cpuTimeTracker.updateElapsedJiffies(processTotalJiffies,
|
||||||
clock.getTime());
|
clock.getTime());
|
||||||
return cpuTimeTracker.getCpuTrackerUsagePercent();
|
return cpuTimeTracker.getCpuTrackerUsagePercent();
|
||||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
import org.apache.hadoop.classification.InterfaceStability.Evolving;
|
import org.apache.hadoop.classification.InterfaceStability.Evolving;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.conf.Configured;
|
import org.apache.hadoop.conf.Configured;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interface class to obtain process resource usage
|
* Interface class to obtain process resource usage
|
||||||
|
@ -50,6 +51,13 @@ public abstract class ResourceCalculatorProcessTree extends Configured {
|
||||||
public ResourceCalculatorProcessTree(String root) {
|
public ResourceCalculatorProcessTree(String root) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize the object.
|
||||||
|
* @throws YarnException Throws an exception on error.
|
||||||
|
*/
|
||||||
|
public void initialize() throws YarnException {
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Update the process-tree with latest state.
|
* Update the process-tree with latest state.
|
||||||
*
|
*
|
||||||
|
@ -168,6 +176,7 @@ public abstract class ResourceCalculatorProcessTree extends Configured {
|
||||||
Constructor <? extends ResourceCalculatorProcessTree> c = clazz.getConstructor(String.class);
|
Constructor <? extends ResourceCalculatorProcessTree> c = clazz.getConstructor(String.class);
|
||||||
ResourceCalculatorProcessTree rctree = c.newInstance(pid);
|
ResourceCalculatorProcessTree rctree = c.newInstance(pid);
|
||||||
rctree.setConf(conf);
|
rctree.setConf(conf);
|
||||||
|
rctree.initialize();
|
||||||
return rctree;
|
return rctree;
|
||||||
} catch(Exception e) {
|
} catch(Exception e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
|
|
|
@ -1308,6 +1308,37 @@
|
||||||
<value>-1</value>
|
<value>-1</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>Whether YARN CGroups memory tracking is enabled.</description>
|
||||||
|
<name>yarn.nodemanager.resource.memory.enabled</name>
|
||||||
|
<value>false</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>Whether YARN CGroups strict memory enforcement is enabled.
|
||||||
|
</description>
|
||||||
|
<name>yarn.nodemanager.resource.memory.enforced</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>If memory limit is enforced, this the percentage of soft limit
|
||||||
|
compared to the memory assigned to the container. If there is memory
|
||||||
|
pressure container memory usage will be pushed back to its soft limit
|
||||||
|
by swapping out memory.
|
||||||
|
</description>
|
||||||
|
<name>yarn.nodemanager.resource.memory.cgroups.soft-limit-percentage</name>
|
||||||
|
<value>90.0</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>Container swappiness is the likelihood a page will be swapped
|
||||||
|
out compared to be kept in memory. Value is between 0-100.
|
||||||
|
</description>
|
||||||
|
<name>yarn.nodemanager.resource.memory.cgroups.swappiness</name>
|
||||||
|
<value>0</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<description>Whether physical memory limits will be enforced for
|
<description>Whether physical memory limits will be enforced for
|
||||||
containers.</description>
|
containers.</description>
|
||||||
|
@ -1622,7 +1653,8 @@
|
||||||
or be allowed to consume spare resources if they need them. For example, turning the
|
or be allowed to consume spare resources if they need them. For example, turning the
|
||||||
flag on will restrict apps to use only their share of CPU, even if the node has spare
|
flag on will restrict apps to use only their share of CPU, even if the node has spare
|
||||||
CPU cycles. The default value is false i.e. use available resources. Please note that
|
CPU cycles. The default value is false i.e. use available resources. Please note that
|
||||||
turning this flag on may reduce job throughput on the cluster.</description>
|
turning this flag on may reduce job throughput on the cluster. This setting does
|
||||||
|
not apply to other subsystems like memory.</description>
|
||||||
<name>yarn.nodemanager.linux-container-executor.cgroups.strict-resource-usage</name>
|
<name>yarn.nodemanager.linux-container-executor.cgroups.strict-resource-usage</name>
|
||||||
<value>false</value>
|
<value>false</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
|
@ -54,7 +54,7 @@ public interface CGroupsHandler {
|
||||||
this.name = name;
|
this.name = name;
|
||||||
}
|
}
|
||||||
|
|
||||||
String getName() {
|
public String getName() {
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -112,6 +112,13 @@ public interface CGroupsHandler {
|
||||||
void deleteCGroup(CGroupController controller, String cGroupId) throws
|
void deleteCGroup(CGroupController controller, String cGroupId) throws
|
||||||
ResourceHandlerException;
|
ResourceHandlerException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the absolute path to the specified cgroup controller.
|
||||||
|
* @param controller - controller type for the cgroup
|
||||||
|
* @return the root of the controller.
|
||||||
|
*/
|
||||||
|
String getControllerPath(CGroupController controller);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the relative path for the cgroup, independent of a controller, for a
|
* Gets the relative path for the cgroup, independent of a controller, for a
|
||||||
* given cgroup id.
|
* given cgroup id.
|
||||||
|
|
|
@ -125,7 +125,8 @@ class CGroupsHandlerImpl implements CGroupsHandler {
|
||||||
initializeControllerPaths();
|
initializeControllerPaths();
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getControllerPath(CGroupController controller) {
|
@Override
|
||||||
|
public String getControllerPath(CGroupController controller) {
|
||||||
try {
|
try {
|
||||||
rwLock.readLock().lock();
|
rwLock.readLock().lock();
|
||||||
return controllerPaths.get(controller);
|
return controllerPaths.get(controller);
|
||||||
|
|
|
@ -52,6 +52,7 @@ public class CGroupsMemoryResourceHandlerImpl implements MemoryResourceHandler {
|
||||||
private static final int OPPORTUNISTIC_SOFT_LIMIT = 0;
|
private static final int OPPORTUNISTIC_SOFT_LIMIT = 0;
|
||||||
|
|
||||||
private CGroupsHandler cGroupsHandler;
|
private CGroupsHandler cGroupsHandler;
|
||||||
|
private boolean enforce = true;
|
||||||
private int swappiness = 0;
|
private int swappiness = 0;
|
||||||
// multiplier to set the soft limit - value should be between 0 and 1
|
// multiplier to set the soft limit - value should be between 0 and 1
|
||||||
private float softLimit = 0.0f;
|
private float softLimit = 0.0f;
|
||||||
|
@ -79,6 +80,9 @@ public class CGroupsMemoryResourceHandlerImpl implements MemoryResourceHandler {
|
||||||
throw new ResourceHandlerException(msg);
|
throw new ResourceHandlerException(msg);
|
||||||
}
|
}
|
||||||
this.cGroupsHandler.initializeCGroupController(MEMORY);
|
this.cGroupsHandler.initializeCGroupController(MEMORY);
|
||||||
|
enforce = conf.getBoolean(
|
||||||
|
YarnConfiguration.NM_MEMORY_RESOURCE_ENFORCED,
|
||||||
|
YarnConfiguration.DEFAULT_NM_MEMORY_RESOURCE_ENFORCED);
|
||||||
swappiness = conf
|
swappiness = conf
|
||||||
.getInt(YarnConfiguration.NM_MEMORY_RESOURCE_CGROUPS_SWAPPINESS,
|
.getInt(YarnConfiguration.NM_MEMORY_RESOURCE_CGROUPS_SWAPPINESS,
|
||||||
YarnConfiguration.DEFAULT_NM_MEMORY_RESOURCE_CGROUPS_SWAPPINESS);
|
YarnConfiguration.DEFAULT_NM_MEMORY_RESOURCE_CGROUPS_SWAPPINESS);
|
||||||
|
@ -124,31 +128,33 @@ public class CGroupsMemoryResourceHandlerImpl implements MemoryResourceHandler {
|
||||||
(long) (container.getResource().getMemorySize() * this.softLimit);
|
(long) (container.getResource().getMemorySize() * this.softLimit);
|
||||||
long containerHardLimit = container.getResource().getMemorySize();
|
long containerHardLimit = container.getResource().getMemorySize();
|
||||||
cGroupsHandler.createCGroup(MEMORY, cgroupId);
|
cGroupsHandler.createCGroup(MEMORY, cgroupId);
|
||||||
try {
|
if (enforce) {
|
||||||
cGroupsHandler.updateCGroupParam(MEMORY, cgroupId,
|
try {
|
||||||
CGroupsHandler.CGROUP_PARAM_MEMORY_HARD_LIMIT_BYTES,
|
|
||||||
String.valueOf(containerHardLimit) + "M");
|
|
||||||
ContainerTokenIdentifier id = container.getContainerTokenIdentifier();
|
|
||||||
if (id != null && id.getExecutionType() ==
|
|
||||||
ExecutionType.OPPORTUNISTIC) {
|
|
||||||
cGroupsHandler.updateCGroupParam(MEMORY, cgroupId,
|
cGroupsHandler.updateCGroupParam(MEMORY, cgroupId,
|
||||||
CGroupsHandler.CGROUP_PARAM_MEMORY_SOFT_LIMIT_BYTES,
|
CGroupsHandler.CGROUP_PARAM_MEMORY_HARD_LIMIT_BYTES,
|
||||||
String.valueOf(OPPORTUNISTIC_SOFT_LIMIT) + "M");
|
String.valueOf(containerHardLimit) + "M");
|
||||||
cGroupsHandler.updateCGroupParam(MEMORY, cgroupId,
|
ContainerTokenIdentifier id = container.getContainerTokenIdentifier();
|
||||||
CGroupsHandler.CGROUP_PARAM_MEMORY_SWAPPINESS,
|
if (id != null && id.getExecutionType() ==
|
||||||
String.valueOf(OPPORTUNISTIC_SWAPPINESS));
|
ExecutionType.OPPORTUNISTIC) {
|
||||||
} else {
|
cGroupsHandler.updateCGroupParam(MEMORY, cgroupId,
|
||||||
cGroupsHandler.updateCGroupParam(MEMORY, cgroupId,
|
CGroupsHandler.CGROUP_PARAM_MEMORY_SOFT_LIMIT_BYTES,
|
||||||
CGroupsHandler.CGROUP_PARAM_MEMORY_SOFT_LIMIT_BYTES,
|
String.valueOf(OPPORTUNISTIC_SOFT_LIMIT) + "M");
|
||||||
String.valueOf(containerSoftLimit) + "M");
|
cGroupsHandler.updateCGroupParam(MEMORY, cgroupId,
|
||||||
cGroupsHandler.updateCGroupParam(MEMORY, cgroupId,
|
CGroupsHandler.CGROUP_PARAM_MEMORY_SWAPPINESS,
|
||||||
CGroupsHandler.CGROUP_PARAM_MEMORY_SWAPPINESS,
|
String.valueOf(OPPORTUNISTIC_SWAPPINESS));
|
||||||
String.valueOf(swappiness));
|
} else {
|
||||||
|
cGroupsHandler.updateCGroupParam(MEMORY, cgroupId,
|
||||||
|
CGroupsHandler.CGROUP_PARAM_MEMORY_SOFT_LIMIT_BYTES,
|
||||||
|
String.valueOf(containerSoftLimit) + "M");
|
||||||
|
cGroupsHandler.updateCGroupParam(MEMORY, cgroupId,
|
||||||
|
CGroupsHandler.CGROUP_PARAM_MEMORY_SWAPPINESS,
|
||||||
|
String.valueOf(swappiness));
|
||||||
|
}
|
||||||
|
} catch (ResourceHandlerException re) {
|
||||||
|
cGroupsHandler.deleteCGroup(MEMORY, cgroupId);
|
||||||
|
LOG.warn("Could not update cgroup for container", re);
|
||||||
|
throw re;
|
||||||
}
|
}
|
||||||
} catch (ResourceHandlerException re) {
|
|
||||||
cGroupsHandler.deleteCGroup(MEMORY, cgroupId);
|
|
||||||
LOG.warn("Could not update cgroup for container", re);
|
|
||||||
throw re;
|
|
||||||
}
|
}
|
||||||
List<PrivilegedOperation> ret = new ArrayList<>();
|
List<PrivilegedOperation> ret = new ArrayList<>();
|
||||||
ret.add(new PrivilegedOperation(
|
ret.add(new PrivilegedOperation(
|
||||||
|
|
|
@ -0,0 +1,357 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.util.CpuTimeTracker;
|
||||||
|
import org.apache.hadoop.util.Shell;
|
||||||
|
import org.apache.hadoop.util.SysInfoLinux;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
|
import org.apache.hadoop.yarn.util.Clock;
|
||||||
|
import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
|
||||||
|
import org.apache.hadoop.yarn.util.SystemClock;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.math.BigInteger;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.util.function.Function;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A cgroups file-system based Resource calculator without the process tree
|
||||||
|
* features.
|
||||||
|
*
|
||||||
|
* CGroups has its limitations. It can only be enabled, if both CPU and memory
|
||||||
|
* cgroups are enabled with yarn.nodemanager.resource.cpu.enabled and
|
||||||
|
* yarn.nodemanager.resource.memory.enabled respectively. This means that
|
||||||
|
* memory limits are enforced by default. You can turn this off and keep
|
||||||
|
* memory reporting only with yarn.nodemanager.resource.memory.enforced.
|
||||||
|
*
|
||||||
|
* Another limitation is virtual memory measurement. CGroups does not have the
|
||||||
|
* ability to measure virtual memory usage. This includes memory reserved but
|
||||||
|
* not used. CGroups measures used memory as sa sum of
|
||||||
|
* physical memory and swap usage. This will be returned in the virtual
|
||||||
|
* memory counters.
|
||||||
|
* If the real virtual memory is required please use the legacy procfs based
|
||||||
|
* resource calculator or CombinedResourceCalculator.
|
||||||
|
*/
|
||||||
|
public class CGroupsResourceCalculator extends ResourceCalculatorProcessTree {
|
||||||
|
enum Result {
|
||||||
|
Continue,
|
||||||
|
Exit
|
||||||
|
}
|
||||||
|
protected static final Log LOG = LogFactory
|
||||||
|
.getLog(CGroupsResourceCalculator.class);
|
||||||
|
private static final String PROCFS = "/proc";
|
||||||
|
static final String CGROUP = "cgroup";
|
||||||
|
static final String CPU_STAT = "cpuacct.stat";
|
||||||
|
static final String MEM_STAT = "memory.usage_in_bytes";
|
||||||
|
static final String MEMSW_STAT = "memory.memsw.usage_in_bytes";
|
||||||
|
private static final String USER = "user ";
|
||||||
|
private static final String SYSTEM = "system ";
|
||||||
|
|
||||||
|
private static final Pattern CGROUP_FILE_FORMAT = Pattern.compile(
|
||||||
|
"^(\\d+):([^:]+):/(.*)$");
|
||||||
|
private final String procfsDir;
|
||||||
|
private CGroupsHandler cGroupsHandler;
|
||||||
|
|
||||||
|
private String pid;
|
||||||
|
private File cpuStat;
|
||||||
|
private File memStat;
|
||||||
|
private File memswStat;
|
||||||
|
|
||||||
|
private BigInteger processTotalJiffies;
|
||||||
|
private long processPhysicalMemory;
|
||||||
|
private long processVirtualMemory;
|
||||||
|
|
||||||
|
private final long jiffyLengthMs;
|
||||||
|
private final CpuTimeTracker cpuTimeTracker;
|
||||||
|
private Clock clock;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create resource calculator for all Yarn containers.
|
||||||
|
*/
|
||||||
|
public CGroupsResourceCalculator()
|
||||||
|
throws YarnException {
|
||||||
|
this(null, PROCFS, ResourceHandlerModule.getCGroupsHandler(),
|
||||||
|
SystemClock.getInstance(), SysInfoLinux.JIFFY_LENGTH_IN_MILLIS);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create resource calculator for the container that has the specified pid.
|
||||||
|
* @param pid A pid from the cgroup or null for all containers
|
||||||
|
*/
|
||||||
|
public CGroupsResourceCalculator(String pid) {
|
||||||
|
this(pid, PROCFS, ResourceHandlerModule.getCGroupsHandler(),
|
||||||
|
SystemClock.getInstance(), SysInfoLinux.JIFFY_LENGTH_IN_MILLIS);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create resource calculator for testing.
|
||||||
|
* @param pid A pid from the cgroup or null for all containers
|
||||||
|
* @param procfsDir Path to /proc or a mock /proc directory
|
||||||
|
* @param cGroupsHandler Initialized cgroups handler object
|
||||||
|
* @param clock A clock object
|
||||||
|
* @param jiffyLengthMs0 Jiffy length in milliseconds
|
||||||
|
*/
|
||||||
|
@VisibleForTesting
|
||||||
|
CGroupsResourceCalculator(String pid, String procfsDir,
|
||||||
|
CGroupsHandler cGroupsHandler,
|
||||||
|
Clock clock,
|
||||||
|
long jiffyLengthMs0) {
|
||||||
|
super(pid);
|
||||||
|
this.procfsDir = procfsDir;
|
||||||
|
this.cGroupsHandler = cGroupsHandler;
|
||||||
|
this.pid = pid != null && pid.equals("0") ? "1" : pid;
|
||||||
|
this.jiffyLengthMs = jiffyLengthMs0;
|
||||||
|
this.cpuTimeTracker =
|
||||||
|
new CpuTimeTracker(this.jiffyLengthMs);
|
||||||
|
this.clock = clock;
|
||||||
|
this.processTotalJiffies = BigInteger.ZERO;
|
||||||
|
this.processPhysicalMemory = UNAVAILABLE;
|
||||||
|
this.processVirtualMemory = UNAVAILABLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void initialize() throws YarnException {
|
||||||
|
if (!CGroupsResourceCalculator.isAvailable()) {
|
||||||
|
throw new YarnException("CGroupsResourceCalculator is not available");
|
||||||
|
}
|
||||||
|
setCGroupFilePaths();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float getCpuUsagePercent() {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Process " + pid + " jiffies:" + processTotalJiffies);
|
||||||
|
}
|
||||||
|
return cpuTimeTracker.getCpuTrackerUsagePercent();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getCumulativeCpuTime() {
|
||||||
|
if (jiffyLengthMs < 0) {
|
||||||
|
return UNAVAILABLE;
|
||||||
|
}
|
||||||
|
return processTotalJiffies.longValue() * jiffyLengthMs;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getRssMemorySize(int olderThanAge) {
|
||||||
|
if (olderThanAge > 1) {
|
||||||
|
return UNAVAILABLE;
|
||||||
|
}
|
||||||
|
return processPhysicalMemory;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getVirtualMemorySize(int olderThanAge) {
|
||||||
|
if (olderThanAge > 1) {
|
||||||
|
return UNAVAILABLE;
|
||||||
|
}
|
||||||
|
return processVirtualMemory;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void updateProcessTree() {
|
||||||
|
try {
|
||||||
|
this.processTotalJiffies = readTotalProcessJiffies();
|
||||||
|
cpuTimeTracker.updateElapsedJiffies(processTotalJiffies,
|
||||||
|
clock.getTime());
|
||||||
|
} catch (YarnException e) {
|
||||||
|
LOG.warn("Failed to parse " + pid, e);
|
||||||
|
}
|
||||||
|
processPhysicalMemory = getMemorySize(memStat);
|
||||||
|
if (memswStat.exists()) {
|
||||||
|
processVirtualMemory = getMemorySize(memswStat);
|
||||||
|
} else if(LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Swap cgroups monitoring is not compiled into the kernel " +
|
||||||
|
memswStat.getAbsolutePath().toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getProcessTreeDump() {
|
||||||
|
// We do not have a process tree in cgroups return just the pid for tracking
|
||||||
|
return pid;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean checkPidPgrpidForMatch() {
|
||||||
|
// We do not have a process tree in cgroups returning default ok
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if the CGroupsResourceCalculator is available on this system.
|
||||||
|
* This assumes that Linux container executor is already initialized.
|
||||||
|
*
|
||||||
|
* @return true if CGroupsResourceCalculator is available. False otherwise.
|
||||||
|
*/
|
||||||
|
public static boolean isAvailable() {
|
||||||
|
try {
|
||||||
|
if (!Shell.LINUX) {
|
||||||
|
LOG.info("CGroupsResourceCalculator currently is supported only on "
|
||||||
|
+ "Linux.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (ResourceHandlerModule.getCGroupsHandler() == null ||
|
||||||
|
ResourceHandlerModule.getCpuResourceHandler() == null ||
|
||||||
|
ResourceHandlerModule.getMemoryResourceHandler() == null) {
|
||||||
|
LOG.info("CGroupsResourceCalculator requires enabling CGroups" +
|
||||||
|
"cpu and memory");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} catch (SecurityException se) {
|
||||||
|
LOG.warn("Failed to get Operating System name. " + se);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private long getMemorySize(File cgroupUsageFile) {
|
||||||
|
long[] mem = new long[1];
|
||||||
|
try {
|
||||||
|
processFile(cgroupUsageFile, (String line) -> {
|
||||||
|
mem[0] = Long.parseLong(line);
|
||||||
|
return Result.Exit;
|
||||||
|
});
|
||||||
|
return mem[0];
|
||||||
|
} catch (YarnException e) {
|
||||||
|
LOG.warn("Failed to parse cgroups " + memswStat, e);
|
||||||
|
}
|
||||||
|
return UNAVAILABLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
private BigInteger readTotalProcessJiffies() throws YarnException {
|
||||||
|
final BigInteger[] totalCPUTimeJiffies = new BigInteger[1];
|
||||||
|
totalCPUTimeJiffies[0] = BigInteger.ZERO;
|
||||||
|
processFile(cpuStat, (String line) -> {
|
||||||
|
if (line.startsWith(USER)) {
|
||||||
|
totalCPUTimeJiffies[0] = totalCPUTimeJiffies[0].add(
|
||||||
|
new BigInteger(line.substring(USER.length())));
|
||||||
|
}
|
||||||
|
if (line.startsWith(SYSTEM)) {
|
||||||
|
totalCPUTimeJiffies[0] = totalCPUTimeJiffies[0].add(
|
||||||
|
new BigInteger(line.substring(SYSTEM.length())));
|
||||||
|
}
|
||||||
|
return Result.Continue;
|
||||||
|
});
|
||||||
|
return totalCPUTimeJiffies[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getCGroupRelativePath(
|
||||||
|
CGroupsHandler.CGroupController controller)
|
||||||
|
throws YarnException {
|
||||||
|
if (pid == null) {
|
||||||
|
return cGroupsHandler.getRelativePathForCGroup("");
|
||||||
|
} else {
|
||||||
|
return getCGroupRelativePathForPid(controller);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getCGroupRelativePathForPid(
|
||||||
|
CGroupsHandler.CGroupController controller)
|
||||||
|
throws YarnException {
|
||||||
|
File pidCgroupFile = new File(new File(procfsDir, pid), CGROUP);
|
||||||
|
String[] result = new String[1];
|
||||||
|
processFile(pidCgroupFile, (String line)->{
|
||||||
|
Matcher m = CGROUP_FILE_FORMAT.matcher(line);
|
||||||
|
boolean mat = m.find();
|
||||||
|
if (mat) {
|
||||||
|
if (m.group(2).contains(controller.getName())) {
|
||||||
|
// Instead of returning the full path we compose it
|
||||||
|
// based on the last item as the container id
|
||||||
|
// This helps to avoid confusion within a privileged Docker container
|
||||||
|
// where the path is referred in /proc/<pid>/cgroup as
|
||||||
|
// /docker/<dcontainerid>/hadoop-yarn/<containerid>
|
||||||
|
// but it is /hadoop-yarn/<containerid> in the cgroups hierarchy
|
||||||
|
String cgroupPath = m.group(3);
|
||||||
|
|
||||||
|
if (cgroupPath != null) {
|
||||||
|
String cgroup =
|
||||||
|
new File(cgroupPath).toPath().getFileName().toString();
|
||||||
|
result[0] = cGroupsHandler.getRelativePathForCGroup(cgroup);
|
||||||
|
} else {
|
||||||
|
LOG.warn("Invalid cgroup path for " + pidCgroupFile);
|
||||||
|
}
|
||||||
|
return Result.Exit;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
LOG.warn(
|
||||||
|
"Unexpected: cgroup file is not in the expected format"
|
||||||
|
+ " for process with pid " + pid);
|
||||||
|
}
|
||||||
|
return Result.Continue;
|
||||||
|
});
|
||||||
|
if (result[0] == null) {
|
||||||
|
throw new YarnException(controller.getName() + " CGroup for pid " + pid +
|
||||||
|
" not found " + pidCgroupFile);
|
||||||
|
}
|
||||||
|
return result[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
private void processFile(File file, Function<String, Result> processLine)
|
||||||
|
throws YarnException {
|
||||||
|
// Read "procfsDir/<pid>/stat" file - typically /proc/<pid>/stat
|
||||||
|
try (InputStreamReader fReader = new InputStreamReader(
|
||||||
|
new FileInputStream(file), Charset.forName("UTF-8"))) {
|
||||||
|
try (BufferedReader in = new BufferedReader(fReader)) {
|
||||||
|
try {
|
||||||
|
String str;
|
||||||
|
while ((str = in.readLine()) != null) {
|
||||||
|
Result result = processLine.apply(str);
|
||||||
|
if (result == Result.Exit) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException io) {
|
||||||
|
throw new YarnException("Error reading the stream " + io, io);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException f) {
|
||||||
|
throw new YarnException("The process vanished in the interim " + pid, f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void setCGroupFilePaths() throws YarnException {
|
||||||
|
if (cGroupsHandler == null) {
|
||||||
|
throw new YarnException("CGroups handler is not initialized");
|
||||||
|
}
|
||||||
|
File cpuDir = new File(
|
||||||
|
cGroupsHandler.getControllerPath(
|
||||||
|
CGroupsHandler.CGroupController.CPUACCT),
|
||||||
|
getCGroupRelativePath(CGroupsHandler.CGroupController.CPUACCT));
|
||||||
|
File memDir = new File(
|
||||||
|
cGroupsHandler.getControllerPath(
|
||||||
|
CGroupsHandler.CGroupController.MEMORY),
|
||||||
|
getCGroupRelativePath(CGroupsHandler.CGroupController.MEMORY));
|
||||||
|
cpuStat = new File(cpuDir, CPU_STAT);
|
||||||
|
memStat = new File(memDir, MEM_STAT);
|
||||||
|
memswStat = new File(memDir, MEMSW_STAT);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,108 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
|
import org.apache.hadoop.yarn.util.ProcfsBasedProcessTree;
|
||||||
|
import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CombinedResourceCalculator is a resource calculator that uses cgroups but
|
||||||
|
* it is backward compatible with procfs in terms of virtual memory usage.
|
||||||
|
*/
|
||||||
|
public class CombinedResourceCalculator extends ResourceCalculatorProcessTree {
|
||||||
|
protected static final Log LOG = LogFactory
|
||||||
|
.getLog(CombinedResourceCalculator.class);
|
||||||
|
private ProcfsBasedProcessTree procfs;
|
||||||
|
private CGroupsResourceCalculator cgroup;
|
||||||
|
|
||||||
|
public CombinedResourceCalculator(String pid) {
|
||||||
|
super(pid);
|
||||||
|
procfs = new ProcfsBasedProcessTree(pid);
|
||||||
|
cgroup = new CGroupsResourceCalculator(pid);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void initialize() throws YarnException {
|
||||||
|
procfs.initialize();
|
||||||
|
cgroup.initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void updateProcessTree() {
|
||||||
|
procfs.updateProcessTree();
|
||||||
|
cgroup.updateProcessTree();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getProcessTreeDump() {
|
||||||
|
return procfs.getProcessTreeDump();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float getCpuUsagePercent() {
|
||||||
|
float cgroupUsage = cgroup.getCpuUsagePercent();
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
float procfsUsage = procfs.getCpuUsagePercent();
|
||||||
|
LOG.debug("CPU Comparison:" + procfsUsage + " " + cgroupUsage);
|
||||||
|
LOG.debug("Jiffy Comparison:" +
|
||||||
|
procfs.getCumulativeCpuTime() + " " +
|
||||||
|
cgroup.getCumulativeCpuTime());
|
||||||
|
}
|
||||||
|
|
||||||
|
return cgroupUsage;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean checkPidPgrpidForMatch() {
|
||||||
|
return procfs.checkPidPgrpidForMatch();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getCumulativeCpuTime() {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("CPU Comparison:" +
|
||||||
|
procfs.getCumulativeCpuTime() + " " +
|
||||||
|
cgroup.getCumulativeCpuTime());
|
||||||
|
}
|
||||||
|
return cgroup.getCumulativeCpuTime();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getRssMemorySize(int olderThanAge) {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("MEM Comparison:" +
|
||||||
|
procfs.getRssMemorySize(olderThanAge) + " " +
|
||||||
|
cgroup.getRssMemorySize(olderThanAge));
|
||||||
|
}
|
||||||
|
return cgroup.getRssMemorySize(olderThanAge);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getVirtualMemorySize(int olderThanAge) {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("VMEM Comparison:" +
|
||||||
|
procfs.getVirtualMemorySize(olderThanAge) + " " +
|
||||||
|
cgroup.getVirtualMemorySize(olderThanAge));
|
||||||
|
}
|
||||||
|
return procfs.getVirtualMemorySize(olderThanAge);
|
||||||
|
}
|
||||||
|
}
|
|
@ -25,7 +25,6 @@ import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.classification.InterfaceStability;
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.Context;
|
import org.apache.hadoop.yarn.server.nodemanager.Context;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin;
|
||||||
|
@ -101,7 +100,27 @@ public class ResourceHandlerModule {
|
||||||
return cGroupsHandler;
|
return cGroupsHandler;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static CGroupsCpuResourceHandlerImpl getCGroupsCpuResourceHandler(
|
public static NetworkPacketTaggingHandlerImpl
|
||||||
|
getNetworkResourceHandler() {
|
||||||
|
return networkPacketTaggingHandlerImpl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DiskResourceHandler
|
||||||
|
getDiskResourceHandler() {
|
||||||
|
return cGroupsBlkioResourceHandler;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static MemoryResourceHandler
|
||||||
|
getMemoryResourceHandler() {
|
||||||
|
return cGroupsMemoryResourceHandler;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static CpuResourceHandler
|
||||||
|
getCpuResourceHandler() {
|
||||||
|
return cGroupsCpuResourceHandler;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static CGroupsCpuResourceHandlerImpl initCGroupsCpuResourceHandler(
|
||||||
Configuration conf) throws ResourceHandlerException {
|
Configuration conf) throws ResourceHandlerException {
|
||||||
boolean cgroupsCpuEnabled =
|
boolean cgroupsCpuEnabled =
|
||||||
conf.getBoolean(YarnConfiguration.NM_CPU_RESOURCE_ENABLED,
|
conf.getBoolean(YarnConfiguration.NM_CPU_RESOURCE_ENABLED,
|
||||||
|
@ -150,7 +169,7 @@ public class ResourceHandlerModule {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static ResourceHandler getNetworkResourceHandler(Configuration conf)
|
public static ResourceHandler initNetworkResourceHandler(Configuration conf)
|
||||||
throws ResourceHandlerException {
|
throws ResourceHandlerException {
|
||||||
boolean useNetworkTagHandler = conf.getBoolean(
|
boolean useNetworkTagHandler = conf.getBoolean(
|
||||||
YarnConfiguration.NM_NETWORK_TAG_HANDLER_ENABLED,
|
YarnConfiguration.NM_NETWORK_TAG_HANDLER_ENABLED,
|
||||||
|
@ -181,12 +200,12 @@ public class ResourceHandlerModule {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static OutboundBandwidthResourceHandler
|
public static OutboundBandwidthResourceHandler
|
||||||
getOutboundBandwidthResourceHandler(Configuration conf)
|
initOutboundBandwidthResourceHandler(Configuration conf)
|
||||||
throws ResourceHandlerException {
|
throws ResourceHandlerException {
|
||||||
return getTrafficControlBandwidthHandler(conf);
|
return getTrafficControlBandwidthHandler(conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static DiskResourceHandler getDiskResourceHandler(Configuration conf)
|
public static DiskResourceHandler initDiskResourceHandler(Configuration conf)
|
||||||
throws ResourceHandlerException {
|
throws ResourceHandlerException {
|
||||||
if (conf.getBoolean(YarnConfiguration.NM_DISK_RESOURCE_ENABLED,
|
if (conf.getBoolean(YarnConfiguration.NM_DISK_RESOURCE_ENABLED,
|
||||||
YarnConfiguration.DEFAULT_NM_DISK_RESOURCE_ENABLED)) {
|
YarnConfiguration.DEFAULT_NM_DISK_RESOURCE_ENABLED)) {
|
||||||
|
@ -210,7 +229,7 @@ public class ResourceHandlerModule {
|
||||||
return cGroupsBlkioResourceHandler;
|
return cGroupsBlkioResourceHandler;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static MemoryResourceHandler getMemoryResourceHandler(
|
public static MemoryResourceHandler initMemoryResourceHandler(
|
||||||
Configuration conf) throws ResourceHandlerException {
|
Configuration conf) throws ResourceHandlerException {
|
||||||
if (conf.getBoolean(YarnConfiguration.NM_MEMORY_RESOURCE_ENABLED,
|
if (conf.getBoolean(YarnConfiguration.NM_MEMORY_RESOURCE_ENABLED,
|
||||||
YarnConfiguration.DEFAULT_NM_MEMORY_RESOURCE_ENABLED)) {
|
YarnConfiguration.DEFAULT_NM_MEMORY_RESOURCE_ENABLED)) {
|
||||||
|
@ -246,10 +265,14 @@ public class ResourceHandlerModule {
|
||||||
throws ResourceHandlerException {
|
throws ResourceHandlerException {
|
||||||
ArrayList<ResourceHandler> handlerList = new ArrayList<>();
|
ArrayList<ResourceHandler> handlerList = new ArrayList<>();
|
||||||
|
|
||||||
addHandlerIfNotNull(handlerList, getNetworkResourceHandler(conf));
|
addHandlerIfNotNull(handlerList,
|
||||||
addHandlerIfNotNull(handlerList, getDiskResourceHandler(conf));
|
initNetworkResourceHandler(conf));
|
||||||
addHandlerIfNotNull(handlerList, getMemoryResourceHandler(conf));
|
addHandlerIfNotNull(handlerList,
|
||||||
addHandlerIfNotNull(handlerList, getCGroupsCpuResourceHandler(conf));
|
initDiskResourceHandler(conf));
|
||||||
|
addHandlerIfNotNull(handlerList,
|
||||||
|
initMemoryResourceHandler(conf));
|
||||||
|
addHandlerIfNotNull(handlerList,
|
||||||
|
initCGroupsCpuResourceHandler(conf));
|
||||||
addHandlersFromConfiguredResourcePlugins(handlerList, conf, nmContext);
|
addHandlersFromConfiguredResourcePlugins(handlerList, conf, nmContext);
|
||||||
resourceHandlerChain = new ResourceHandlerChain(handlerList);
|
resourceHandlerChain = new ResourceHandlerChain(handlerList);
|
||||||
}
|
}
|
||||||
|
|
|
@ -215,15 +215,25 @@ public class ContainersMonitorImpl extends AbstractService implements
|
||||||
YarnConfiguration.DEFAULT_NM_CONTAINER_MONITOR_ENABLED);
|
YarnConfiguration.DEFAULT_NM_CONTAINER_MONITOR_ENABLED);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the best process tree calculator.
|
||||||
|
* @param pId container process id
|
||||||
|
* @return process tree calculator
|
||||||
|
*/
|
||||||
|
private ResourceCalculatorProcessTree
|
||||||
|
getResourceCalculatorProcessTree(String pId) {
|
||||||
|
return ResourceCalculatorProcessTree.
|
||||||
|
getResourceCalculatorProcessTree(
|
||||||
|
pId, processTreeClass, conf);
|
||||||
|
}
|
||||||
|
|
||||||
private boolean isResourceCalculatorAvailable() {
|
private boolean isResourceCalculatorAvailable() {
|
||||||
if (resourceCalculatorPlugin == null) {
|
if (resourceCalculatorPlugin == null) {
|
||||||
LOG.info("ResourceCalculatorPlugin is unavailable on this system. " + this
|
LOG.info("ResourceCalculatorPlugin is unavailable on this system. " + this
|
||||||
.getClass().getName() + " is disabled.");
|
.getClass().getName() + " is disabled.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (ResourceCalculatorProcessTree
|
if (getResourceCalculatorProcessTree("0") == null) {
|
||||||
.getResourceCalculatorProcessTree("0", processTreeClass, conf)
|
|
||||||
== null) {
|
|
||||||
LOG.info("ResourceCalculatorProcessTree is unavailable on this system. "
|
LOG.info("ResourceCalculatorProcessTree is unavailable on this system. "
|
||||||
+ this.getClass().getName() + " is disabled.");
|
+ this.getClass().getName() + " is disabled.");
|
||||||
return false;
|
return false;
|
||||||
|
@ -535,9 +545,7 @@ public class ContainersMonitorImpl extends AbstractService implements
|
||||||
LOG.debug("Tracking ProcessTree " + pId + " for the first time");
|
LOG.debug("Tracking ProcessTree " + pId + " for the first time");
|
||||||
}
|
}
|
||||||
ResourceCalculatorProcessTree pt =
|
ResourceCalculatorProcessTree pt =
|
||||||
ResourceCalculatorProcessTree.
|
getResourceCalculatorProcessTree(pId);
|
||||||
getResourceCalculatorProcessTree(
|
|
||||||
pId, processTreeClass, conf);
|
|
||||||
ptInfo.setPid(pId);
|
ptInfo.setPid(pId);
|
||||||
ptInfo.setProcessTree(pt);
|
ptInfo.setProcessTree(pt);
|
||||||
|
|
||||||
|
@ -599,11 +607,14 @@ public class ContainersMonitorImpl extends AbstractService implements
|
||||||
long pmemLimit = ptInfo.getPmemLimit();
|
long pmemLimit = ptInfo.getPmemLimit();
|
||||||
if (AUDITLOG.isDebugEnabled()) {
|
if (AUDITLOG.isDebugEnabled()) {
|
||||||
AUDITLOG.debug(String.format(
|
AUDITLOG.debug(String.format(
|
||||||
"Memory usage of ProcessTree %s for container-id %s: ",
|
"Resource usage of ProcessTree %s for container-id %s:" +
|
||||||
pId, containerId.toString()) +
|
" %s CPU:%f CPU/core:%f",
|
||||||
formatUsageString(
|
pId, containerId.toString(),
|
||||||
currentVmemUsage, vmemLimit,
|
formatUsageString(
|
||||||
currentPmemUsage, pmemLimit));
|
currentVmemUsage, vmemLimit,
|
||||||
|
currentPmemUsage, pmemLimit),
|
||||||
|
cpuUsagePercentPerCore,
|
||||||
|
cpuUsageTotalCoresPercentage));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add resource utilization for this container
|
// Add resource utilization for this container
|
||||||
|
|
|
@ -148,6 +148,51 @@ public class TestCGroupsMemoryResourceHandlerImpl {
|
||||||
args.get(0));
|
args.get(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPreStartNonEnforced() throws Exception {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
|
||||||
|
conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);
|
||||||
|
conf.setBoolean(YarnConfiguration.NM_MEMORY_RESOURCE_ENFORCED, false);
|
||||||
|
cGroupsMemoryResourceHandler.bootstrap(conf);
|
||||||
|
String id = "container_01_01";
|
||||||
|
String path = "test-path/" + id;
|
||||||
|
ContainerId mockContainerId = mock(ContainerId.class);
|
||||||
|
when(mockContainerId.toString()).thenReturn(id);
|
||||||
|
Container mockContainer = mock(Container.class);
|
||||||
|
when(mockContainer.getContainerId()).thenReturn(mockContainerId);
|
||||||
|
when(mockCGroupsHandler
|
||||||
|
.getPathForCGroupTasks(CGroupsHandler.CGroupController.MEMORY, id))
|
||||||
|
.thenReturn(path);
|
||||||
|
int memory = 1024;
|
||||||
|
when(mockContainer.getResource())
|
||||||
|
.thenReturn(Resource.newInstance(memory, 1));
|
||||||
|
List<PrivilegedOperation> ret =
|
||||||
|
cGroupsMemoryResourceHandler.preStart(mockContainer);
|
||||||
|
verify(mockCGroupsHandler, times(1))
|
||||||
|
.createCGroup(CGroupsHandler.CGroupController.MEMORY, id);
|
||||||
|
verify(mockCGroupsHandler, times(0))
|
||||||
|
.updateCGroupParam(CGroupsHandler.CGroupController.MEMORY, id,
|
||||||
|
CGroupsHandler.CGROUP_PARAM_MEMORY_HARD_LIMIT_BYTES,
|
||||||
|
String.valueOf(memory) + "M");
|
||||||
|
verify(mockCGroupsHandler, times(0))
|
||||||
|
.updateCGroupParam(CGroupsHandler.CGroupController.MEMORY, id,
|
||||||
|
CGroupsHandler.CGROUP_PARAM_MEMORY_SOFT_LIMIT_BYTES,
|
||||||
|
String.valueOf((int) (memory * 0.9)) + "M");
|
||||||
|
verify(mockCGroupsHandler, times(0))
|
||||||
|
.updateCGroupParam(CGroupsHandler.CGroupController.MEMORY, id,
|
||||||
|
CGroupsHandler.CGROUP_PARAM_MEMORY_SWAPPINESS, String.valueOf(0));
|
||||||
|
Assert.assertNotNull(ret);
|
||||||
|
Assert.assertEquals(1, ret.size());
|
||||||
|
PrivilegedOperation op = ret.get(0);
|
||||||
|
Assert.assertEquals(PrivilegedOperation.OperationType.ADD_PID_TO_CGROUP,
|
||||||
|
op.getOperationType());
|
||||||
|
List<String> args = op.getArguments();
|
||||||
|
Assert.assertEquals(1, args.size());
|
||||||
|
Assert.assertEquals(PrivilegedOperation.CGROUP_ARG_PREFIX + path,
|
||||||
|
args.get(0));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testReacquireContainer() throws Exception {
|
public void testReacquireContainer() throws Exception {
|
||||||
ContainerId containerIdMock = mock(ContainerId.class);
|
ContainerId containerIdMock = mock(ContainerId.class);
|
||||||
|
|
|
@ -0,0 +1,274 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
|
import org.apache.hadoop.yarn.util.ControlledClock;
|
||||||
|
import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
import static org.mockito.Mockito.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unit test for CGroupsResourceCalculator.
|
||||||
|
*/
|
||||||
|
public class TestCGroupsResourceCalculator {
|
||||||
|
|
||||||
|
private ControlledClock clock = new ControlledClock();
|
||||||
|
private CGroupsHandler cGroupsHandler = mock(CGroupsHandler.class);
|
||||||
|
private String basePath = "/tmp/" + this.getClass().getName();
|
||||||
|
|
||||||
|
public TestCGroupsResourceCalculator() {
|
||||||
|
when(cGroupsHandler.getRelativePathForCGroup("container_1"))
|
||||||
|
.thenReturn("/yarn/container_1");
|
||||||
|
when(cGroupsHandler.getRelativePathForCGroup("")).thenReturn("/yarn/");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = YarnException.class)
|
||||||
|
public void testPidNotFound() throws Exception {
|
||||||
|
CGroupsResourceCalculator calculator =
|
||||||
|
new CGroupsResourceCalculator(
|
||||||
|
"1234", ".", cGroupsHandler, clock, 10);
|
||||||
|
calculator.setCGroupFilePaths();
|
||||||
|
Assert.assertEquals("Expected exception", null, calculator);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = YarnException.class)
|
||||||
|
public void testNoMemoryCGgroupMount() throws Exception {
|
||||||
|
File procfs = new File(basePath + "/1234");
|
||||||
|
Assert.assertTrue("Setup error", procfs.mkdirs());
|
||||||
|
try {
|
||||||
|
FileUtils.writeStringToFile(
|
||||||
|
new File(procfs, CGroupsResourceCalculator.CGROUP),
|
||||||
|
"7:devices:/yarn/container_1\n" +
|
||||||
|
"6:cpuacct,cpu:/yarn/container_1\n" +
|
||||||
|
"5:pids:/yarn/container_1\n");
|
||||||
|
CGroupsResourceCalculator calculator =
|
||||||
|
new CGroupsResourceCalculator(
|
||||||
|
"1234", basePath,
|
||||||
|
cGroupsHandler, clock, 10);
|
||||||
|
calculator.setCGroupFilePaths();
|
||||||
|
Assert.assertEquals("Expected exception", null, calculator);
|
||||||
|
} finally {
|
||||||
|
FileUtils.deleteDirectory(new File(basePath));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCGgroupNotFound() throws Exception {
|
||||||
|
File procfs = new File(basePath + "/1234");
|
||||||
|
Assert.assertTrue("Setup error", procfs.mkdirs());
|
||||||
|
try {
|
||||||
|
FileUtils.writeStringToFile(
|
||||||
|
new File(procfs, CGroupsResourceCalculator.CGROUP),
|
||||||
|
"7:devices:/yarn/container_1\n" +
|
||||||
|
"6:cpuacct,cpu:/yarn/container_1\n" +
|
||||||
|
"5:pids:/yarn/container_1\n" +
|
||||||
|
"4:memory:/yarn/container_1\n");
|
||||||
|
|
||||||
|
CGroupsResourceCalculator calculator =
|
||||||
|
new CGroupsResourceCalculator(
|
||||||
|
"1234", basePath,
|
||||||
|
cGroupsHandler, clock, 10);
|
||||||
|
calculator.setCGroupFilePaths();
|
||||||
|
calculator.updateProcessTree();
|
||||||
|
Assert.assertEquals("cgroups should be missing",
|
||||||
|
(long)ResourceCalculatorProcessTree.UNAVAILABLE,
|
||||||
|
calculator.getRssMemorySize(0));
|
||||||
|
} finally {
|
||||||
|
FileUtils.deleteDirectory(new File(basePath));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCPUParsing() throws Exception {
|
||||||
|
File cgcpuacctDir =
|
||||||
|
new File(basePath + "/cgcpuacct");
|
||||||
|
File cgcpuacctContainerDir =
|
||||||
|
new File(cgcpuacctDir, "/yarn/container_1");
|
||||||
|
File procfs = new File(basePath + "/1234");
|
||||||
|
when(cGroupsHandler.getControllerPath(
|
||||||
|
CGroupsHandler.CGroupController.CPUACCT)).
|
||||||
|
thenReturn(cgcpuacctDir.getAbsolutePath());
|
||||||
|
Assert.assertTrue("Setup error", procfs.mkdirs());
|
||||||
|
Assert.assertTrue("Setup error", cgcpuacctContainerDir.mkdirs());
|
||||||
|
try {
|
||||||
|
FileUtils.writeStringToFile(
|
||||||
|
new File(procfs, CGroupsResourceCalculator.CGROUP),
|
||||||
|
"7:devices:/yarn/container_1\n" +
|
||||||
|
"6:cpuacct,cpu:/yarn/container_1\n" +
|
||||||
|
"5:pids:/yarn/container_1\n" +
|
||||||
|
"4:memory:/yarn/container_1\n");
|
||||||
|
FileUtils.writeStringToFile(
|
||||||
|
new File(cgcpuacctContainerDir, CGroupsResourceCalculator.CPU_STAT),
|
||||||
|
"Can you handle this?\n" +
|
||||||
|
"user 5415\n" +
|
||||||
|
"system 3632");
|
||||||
|
CGroupsResourceCalculator calculator =
|
||||||
|
new CGroupsResourceCalculator(
|
||||||
|
"1234", basePath,
|
||||||
|
cGroupsHandler, clock, 10);
|
||||||
|
calculator.setCGroupFilePaths();
|
||||||
|
calculator.updateProcessTree();
|
||||||
|
Assert.assertEquals("Incorrect CPU usage",
|
||||||
|
90470,
|
||||||
|
calculator.getCumulativeCpuTime());
|
||||||
|
} finally {
|
||||||
|
FileUtils.deleteDirectory(new File(basePath));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMemoryParsing() throws Exception {
|
||||||
|
File cgcpuacctDir =
|
||||||
|
new File(basePath + "/cgcpuacct");
|
||||||
|
File cgcpuacctContainerDir =
|
||||||
|
new File(cgcpuacctDir, "/yarn/container_1");
|
||||||
|
File cgmemoryDir =
|
||||||
|
new File(basePath + "/memory");
|
||||||
|
File cgMemoryContainerDir =
|
||||||
|
new File(cgmemoryDir, "/yarn/container_1");
|
||||||
|
File procfs = new File(basePath + "/1234");
|
||||||
|
when(cGroupsHandler.getControllerPath(
|
||||||
|
CGroupsHandler.CGroupController.MEMORY)).
|
||||||
|
thenReturn(cgmemoryDir.getAbsolutePath());
|
||||||
|
Assert.assertTrue("Setup error", procfs.mkdirs());
|
||||||
|
Assert.assertTrue("Setup error", cgcpuacctContainerDir.mkdirs());
|
||||||
|
Assert.assertTrue("Setup error", cgMemoryContainerDir.mkdirs());
|
||||||
|
try {
|
||||||
|
FileUtils.writeStringToFile(
|
||||||
|
new File(procfs, CGroupsResourceCalculator.CGROUP),
|
||||||
|
"6:cpuacct,cpu:/yarn/container_1\n" +
|
||||||
|
"4:memory:/yarn/container_1\n");
|
||||||
|
FileUtils.writeStringToFile(
|
||||||
|
new File(cgMemoryContainerDir, CGroupsResourceCalculator.MEM_STAT),
|
||||||
|
"418496512\n");
|
||||||
|
|
||||||
|
CGroupsResourceCalculator calculator =
|
||||||
|
new CGroupsResourceCalculator(
|
||||||
|
"1234", basePath,
|
||||||
|
cGroupsHandler, clock, 10);
|
||||||
|
calculator.setCGroupFilePaths();
|
||||||
|
|
||||||
|
calculator.updateProcessTree();
|
||||||
|
// Test the case where memsw is not available (Ubuntu)
|
||||||
|
Assert.assertEquals("Incorrect memory usage",
|
||||||
|
418496512,
|
||||||
|
calculator.getRssMemorySize());
|
||||||
|
Assert.assertEquals("Incorrect swap usage",
|
||||||
|
(long)ResourceCalculatorProcessTree.UNAVAILABLE,
|
||||||
|
calculator.getVirtualMemorySize());
|
||||||
|
|
||||||
|
// Test the case where memsw is available
|
||||||
|
FileUtils.writeStringToFile(
|
||||||
|
new File(cgMemoryContainerDir, CGroupsResourceCalculator.MEMSW_STAT),
|
||||||
|
"418496513\n");
|
||||||
|
calculator.updateProcessTree();
|
||||||
|
Assert.assertEquals("Incorrect swap usage",
|
||||||
|
418496513,
|
||||||
|
calculator.getVirtualMemorySize());
|
||||||
|
} finally {
|
||||||
|
FileUtils.deleteDirectory(new File(basePath));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCPUParsingRoot() throws Exception {
|
||||||
|
File cgcpuacctDir =
|
||||||
|
new File(basePath + "/cgcpuacct");
|
||||||
|
File cgcpuacctRootDir =
|
||||||
|
new File(cgcpuacctDir, "/yarn");
|
||||||
|
when(cGroupsHandler.getControllerPath(
|
||||||
|
CGroupsHandler.CGroupController.CPUACCT)).
|
||||||
|
thenReturn(cgcpuacctDir.getAbsolutePath());
|
||||||
|
Assert.assertTrue("Setup error", cgcpuacctRootDir.mkdirs());
|
||||||
|
try {
|
||||||
|
FileUtils.writeStringToFile(
|
||||||
|
new File(cgcpuacctRootDir, CGroupsResourceCalculator.CPU_STAT),
|
||||||
|
"user 5415\n" +
|
||||||
|
"system 3632");
|
||||||
|
CGroupsResourceCalculator calculator =
|
||||||
|
new CGroupsResourceCalculator(
|
||||||
|
null, basePath,
|
||||||
|
cGroupsHandler, clock, 10);
|
||||||
|
calculator.setCGroupFilePaths();
|
||||||
|
calculator.updateProcessTree();
|
||||||
|
Assert.assertEquals("Incorrect CPU usage",
|
||||||
|
90470,
|
||||||
|
calculator.getCumulativeCpuTime());
|
||||||
|
} finally {
|
||||||
|
FileUtils.deleteDirectory(new File(basePath));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMemoryParsingRoot() throws Exception {
|
||||||
|
File cgcpuacctDir =
|
||||||
|
new File(basePath + "/cgcpuacct");
|
||||||
|
File cgcpuacctRootDir =
|
||||||
|
new File(cgcpuacctDir, "/yarn");
|
||||||
|
File cgmemoryDir =
|
||||||
|
new File(basePath + "/memory");
|
||||||
|
File cgMemoryRootDir =
|
||||||
|
new File(cgmemoryDir, "/yarn");
|
||||||
|
File procfs = new File(basePath + "/1234");
|
||||||
|
when(cGroupsHandler.getControllerPath(
|
||||||
|
CGroupsHandler.CGroupController.MEMORY)).
|
||||||
|
thenReturn(cgmemoryDir.getAbsolutePath());
|
||||||
|
Assert.assertTrue("Setup error", procfs.mkdirs());
|
||||||
|
Assert.assertTrue("Setup error", cgcpuacctRootDir.mkdirs());
|
||||||
|
Assert.assertTrue("Setup error", cgMemoryRootDir.mkdirs());
|
||||||
|
try {
|
||||||
|
FileUtils.writeStringToFile(
|
||||||
|
new File(cgMemoryRootDir, CGroupsResourceCalculator.MEM_STAT),
|
||||||
|
"418496512\n");
|
||||||
|
|
||||||
|
CGroupsResourceCalculator calculator =
|
||||||
|
new CGroupsResourceCalculator(
|
||||||
|
null, basePath,
|
||||||
|
cGroupsHandler, clock, 10);
|
||||||
|
calculator.setCGroupFilePaths();
|
||||||
|
|
||||||
|
calculator.updateProcessTree();
|
||||||
|
|
||||||
|
// Test the case where memsw is not available (Ubuntu)
|
||||||
|
Assert.assertEquals("Incorrect memory usage",
|
||||||
|
418496512,
|
||||||
|
calculator.getRssMemorySize());
|
||||||
|
Assert.assertEquals("Incorrect swap usage",
|
||||||
|
(long)ResourceCalculatorProcessTree.UNAVAILABLE,
|
||||||
|
calculator.getVirtualMemorySize());
|
||||||
|
|
||||||
|
// Test the case where memsw is available
|
||||||
|
FileUtils.writeStringToFile(
|
||||||
|
new File(cgMemoryRootDir, CGroupsResourceCalculator.MEMSW_STAT),
|
||||||
|
"418496513\n");
|
||||||
|
calculator.updateProcessTree();
|
||||||
|
Assert.assertEquals("Incorrect swap usage",
|
||||||
|
418496513,
|
||||||
|
calculator.getVirtualMemorySize());
|
||||||
|
} finally {
|
||||||
|
FileUtils.deleteDirectory(new File(basePath));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,227 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources;
|
||||||
|
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.SystemUtils;
|
||||||
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
|
import org.apache.hadoop.yarn.server.nodemanager.Context;
|
||||||
|
import org.apache.hadoop.yarn.util.ProcfsBasedProcessTree;
|
||||||
|
import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
|
||||||
|
import org.junit.*;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.lang.reflect.Field;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Functional test for CGroupsResourceCalculator to compare two resource
|
||||||
|
* calculators. It is OS dependent.
|
||||||
|
* Ignored in automated tests due to flakiness by design.
|
||||||
|
*/
|
||||||
|
public class TestCompareResourceCalculators {
|
||||||
|
private Process target = null;
|
||||||
|
private String cgroup = null;
|
||||||
|
private String cgroupCPU = null;
|
||||||
|
private String cgroupMemory = null;
|
||||||
|
public static final long SHMEM_KB = 100 * 1024;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setup() throws IOException, YarnException {
|
||||||
|
Assume.assumeTrue(SystemUtils.IS_OS_LINUX);
|
||||||
|
|
||||||
|
YarnConfiguration conf = new YarnConfiguration();
|
||||||
|
conf.set(YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_HIERARCHY,
|
||||||
|
"TestCompareResourceCalculators");
|
||||||
|
conf.setBoolean(YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_MOUNT, false);
|
||||||
|
conf.setStrings(YarnConfiguration.NM_LINUX_CONTAINER_CGROUPS_MOUNT_PATH,
|
||||||
|
"/sys/fs/cgroup");
|
||||||
|
conf.setBoolean(YarnConfiguration.NM_CPU_RESOURCE_ENABLED, true);
|
||||||
|
ResourceHandlerChain module = null;
|
||||||
|
try {
|
||||||
|
module = ResourceHandlerModule.getConfiguredResourceHandlerChain(conf,
|
||||||
|
mock(Context.class));
|
||||||
|
} catch (ResourceHandlerException e) {
|
||||||
|
throw new YarnException("Cannot access cgroups", e);
|
||||||
|
}
|
||||||
|
Assume.assumeNotNull(module);
|
||||||
|
Assume.assumeNotNull(
|
||||||
|
ResourceHandlerModule.getCGroupsHandler()
|
||||||
|
.getControllerPath(CGroupsHandler.CGroupController.CPU));
|
||||||
|
Assume.assumeNotNull(
|
||||||
|
ResourceHandlerModule.getCGroupsHandler()
|
||||||
|
.getControllerPath(CGroupsHandler.CGroupController.MEMORY));
|
||||||
|
|
||||||
|
Random random = new Random(System.currentTimeMillis());
|
||||||
|
cgroup = Long.toString(random.nextLong());
|
||||||
|
cgroupCPU = ResourceHandlerModule.getCGroupsHandler()
|
||||||
|
.getPathForCGroup(CGroupsHandler.CGroupController.CPU, cgroup);
|
||||||
|
cgroupMemory = ResourceHandlerModule.getCGroupsHandler()
|
||||||
|
.getPathForCGroup(CGroupsHandler.CGroupController.MEMORY, cgroup);
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void tearDown() throws YarnException {
|
||||||
|
stopTestProcess();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Ignored in automated tests due to flakiness by design
|
||||||
|
@Ignore
|
||||||
|
@Test
|
||||||
|
public void testCompareResults()
|
||||||
|
throws YarnException, InterruptedException, IOException {
|
||||||
|
|
||||||
|
startTestProcess();
|
||||||
|
|
||||||
|
ProcfsBasedProcessTree legacyCalculator =
|
||||||
|
new ProcfsBasedProcessTree(Long.toString(getPid()));
|
||||||
|
CGroupsResourceCalculator cgroupsCalculator =
|
||||||
|
new CGroupsResourceCalculator(Long.toString(getPid()));
|
||||||
|
cgroupsCalculator.setCGroupFilePaths();
|
||||||
|
|
||||||
|
for (int i = 0; i < 5; ++i) {
|
||||||
|
Thread.sleep(3000);
|
||||||
|
compareMetrics(legacyCalculator, cgroupsCalculator);
|
||||||
|
}
|
||||||
|
|
||||||
|
stopTestProcess();
|
||||||
|
|
||||||
|
ensureCleanedUp(legacyCalculator, cgroupsCalculator);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void ensureCleanedUp(
|
||||||
|
ResourceCalculatorProcessTree metric1,
|
||||||
|
ResourceCalculatorProcessTree metric2) {
|
||||||
|
metric1.updateProcessTree();
|
||||||
|
metric2.updateProcessTree();
|
||||||
|
long pmem1 = metric1.getRssMemorySize(0);
|
||||||
|
long pmem2 = metric2.getRssMemorySize(0);
|
||||||
|
System.out.println(pmem1 + " " + pmem2);
|
||||||
|
Assert.assertTrue("pmem should be invalid " + pmem1 + " " + pmem2,
|
||||||
|
pmem1 == ResourceCalculatorProcessTree.UNAVAILABLE &&
|
||||||
|
pmem2 == ResourceCalculatorProcessTree.UNAVAILABLE);
|
||||||
|
long vmem1 = metric1.getRssMemorySize(0);
|
||||||
|
long vmem2 = metric2.getRssMemorySize(0);
|
||||||
|
System.out.println(vmem1 + " " + vmem2);
|
||||||
|
Assert.assertTrue("vmem Error outside range " + vmem1 + " " + vmem2,
|
||||||
|
vmem1 == ResourceCalculatorProcessTree.UNAVAILABLE &&
|
||||||
|
vmem2 == ResourceCalculatorProcessTree.UNAVAILABLE);
|
||||||
|
float cpu1 = metric1.getCpuUsagePercent();
|
||||||
|
float cpu2 = metric2.getCpuUsagePercent();
|
||||||
|
// TODO ProcfsBasedProcessTree may report negative on process exit
|
||||||
|
Assert.assertTrue("CPU% Error outside range " + cpu1 + " " + cpu2,
|
||||||
|
cpu1 == 0 && cpu2 == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void compareMetrics(
|
||||||
|
ResourceCalculatorProcessTree metric1,
|
||||||
|
ResourceCalculatorProcessTree metric2) {
|
||||||
|
metric1.updateProcessTree();
|
||||||
|
metric2.updateProcessTree();
|
||||||
|
long pmem1 = metric1.getRssMemorySize(0);
|
||||||
|
long pmem2 = metric2.getRssMemorySize(0);
|
||||||
|
// TODO The calculation is different and cgroup
|
||||||
|
// can report a small amount after process stop
|
||||||
|
// This is not an issue since the cgroup is deleted
|
||||||
|
System.out.println(pmem1 + " " + (pmem2 - SHMEM_KB * 1024));
|
||||||
|
Assert.assertTrue("pmem Error outside range " + pmem1 + " " + pmem2,
|
||||||
|
Math.abs(pmem1 - (pmem2 - SHMEM_KB * 1024)) < 5000000);
|
||||||
|
long vmem1 = metric1.getRssMemorySize(0);
|
||||||
|
long vmem2 = metric2.getRssMemorySize(0);
|
||||||
|
System.out.println(vmem1 + " " + (vmem2 - SHMEM_KB * 1024));
|
||||||
|
// TODO The calculation is different and cgroup
|
||||||
|
// can report a small amount after process stop
|
||||||
|
// This is not an issue since the cgroup is deleted
|
||||||
|
Assert.assertTrue("vmem Error outside range " + vmem1 + " " + vmem2,
|
||||||
|
Math.abs(vmem1 - (vmem2 - SHMEM_KB * 1024)) < 5000000);
|
||||||
|
float cpu1 = metric1.getCpuUsagePercent();
|
||||||
|
float cpu2 = metric2.getCpuUsagePercent();
|
||||||
|
if (cpu1 > 0) {
|
||||||
|
// TODO ProcfsBasedProcessTree may report negative on process exit
|
||||||
|
Assert.assertTrue("CPU% Error outside range " + cpu1 + " " + cpu2,
|
||||||
|
Math.abs(cpu2 - cpu1) < 10);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void startTestProcess() throws IOException {
|
||||||
|
ProcessBuilder builder = new ProcessBuilder();
|
||||||
|
String script =
|
||||||
|
"mkdir -p " + cgroupCPU + ";" +
|
||||||
|
"echo $$ >" + cgroupCPU + "/tasks;" +
|
||||||
|
"mkdir -p " + cgroupMemory + ";" +
|
||||||
|
"echo $$ >" + cgroupMemory + "/tasks;" +
|
||||||
|
"dd if=/dev/zero of=/dev/shm/" +
|
||||||
|
cgroup + " bs=1k count=" + SHMEM_KB + ";" +
|
||||||
|
"dd if=/dev/zero of=/dev/null bs=1k &" +
|
||||||
|
"echo $! >/tmp/\" + cgroup + \".pid;" +
|
||||||
|
//"echo while [ -f /tmp/" + cgroup + ".pid ]; do sleep 1; done;" +
|
||||||
|
"sleep 10000;" +
|
||||||
|
"echo kill $(jobs -p);";
|
||||||
|
builder.command("bash", "-c", script);
|
||||||
|
builder.redirectError(new File("/tmp/a.txt"));
|
||||||
|
builder.redirectOutput(new File("/tmp/b.txt"));
|
||||||
|
target = builder.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void stopTestProcess() throws YarnException {
|
||||||
|
if (target != null) {
|
||||||
|
target.destroyForcibly();
|
||||||
|
target = null;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
ProcessBuilder builder = new ProcessBuilder();
|
||||||
|
String script =
|
||||||
|
"rm -f /dev/shm/" + cgroup + ";" +
|
||||||
|
"cat " + cgroupCPU + "/tasks | xargs kill;" +
|
||||||
|
"rm -f /tmp/" + cgroup + ".pid;" +
|
||||||
|
"sleep 4;" +
|
||||||
|
"rmdir " + cgroupCPU + ";" +
|
||||||
|
"rmdir " + cgroupMemory + ";";
|
||||||
|
builder.command("bash", "-c", script);
|
||||||
|
Process cleanup = builder.start();
|
||||||
|
cleanup.waitFor();
|
||||||
|
} catch (IOException|InterruptedException e) {
|
||||||
|
throw new YarnException("Could not clean up", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private long getPid() throws YarnException {
|
||||||
|
Class processClass = target.getClass();
|
||||||
|
if (processClass.getName().equals("java.lang.UNIXProcess")) {
|
||||||
|
try {
|
||||||
|
Field pidField = processClass.getDeclaredField("pid");
|
||||||
|
pidField.setAccessible(true);
|
||||||
|
long pid = pidField.getLong(target);
|
||||||
|
pidField.setAccessible(false);
|
||||||
|
return pid;
|
||||||
|
} catch (NoSuchFieldException|IllegalAccessException e) {
|
||||||
|
throw new YarnException("Reflection error", e);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new YarnException("Not Unix " + processClass.getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -36,8 +36,8 @@ import static org.mockito.Mockito.mock;
|
||||||
public class TestResourceHandlerModule {
|
public class TestResourceHandlerModule {
|
||||||
private static final Logger LOG =
|
private static final Logger LOG =
|
||||||
LoggerFactory.getLogger(TestResourceHandlerModule.class);
|
LoggerFactory.getLogger(TestResourceHandlerModule.class);
|
||||||
Configuration emptyConf;
|
private Configuration emptyConf;
|
||||||
Configuration networkEnabledConf;
|
private Configuration networkEnabledConf;
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setup() throws Exception {
|
public void setup() throws Exception {
|
||||||
|
@ -55,23 +55,28 @@ public class TestResourceHandlerModule {
|
||||||
//This resourceHandler should be non-null only if network as a resource
|
//This resourceHandler should be non-null only if network as a resource
|
||||||
//is explicitly enabled
|
//is explicitly enabled
|
||||||
OutboundBandwidthResourceHandler resourceHandler = ResourceHandlerModule
|
OutboundBandwidthResourceHandler resourceHandler = ResourceHandlerModule
|
||||||
.getOutboundBandwidthResourceHandler(emptyConf);
|
.initOutboundBandwidthResourceHandler(emptyConf);
|
||||||
Assert.assertNull(resourceHandler);
|
Assert.assertNull(resourceHandler);
|
||||||
|
|
||||||
//When network as a resource is enabled this should be non-null
|
//When network as a resource is enabled this should be non-null
|
||||||
resourceHandler = ResourceHandlerModule
|
resourceHandler = ResourceHandlerModule
|
||||||
.getOutboundBandwidthResourceHandler(networkEnabledConf);
|
.initOutboundBandwidthResourceHandler(networkEnabledConf);
|
||||||
Assert.assertNotNull(resourceHandler);
|
Assert.assertNotNull(resourceHandler);
|
||||||
|
|
||||||
//Ensure that outbound bandwidth resource handler is present in the chain
|
//Ensure that outbound bandwidth resource handler is present in the chain
|
||||||
ResourceHandlerChain resourceHandlerChain = ResourceHandlerModule
|
ResourceHandlerChain resourceHandlerChain = ResourceHandlerModule
|
||||||
.getConfiguredResourceHandlerChain(networkEnabledConf, mock(Context.class));
|
.getConfiguredResourceHandlerChain(networkEnabledConf,
|
||||||
List<ResourceHandler> resourceHandlers = resourceHandlerChain
|
mock(Context.class));
|
||||||
.getResourceHandlerList();
|
if (resourceHandlerChain != null) {
|
||||||
//Exactly one resource handler in chain
|
List<ResourceHandler> resourceHandlers = resourceHandlerChain
|
||||||
Assert.assertEquals(resourceHandlers.size(), 1);
|
.getResourceHandlerList();
|
||||||
//Same instance is expected to be in the chain.
|
//Exactly one resource handler in chain
|
||||||
Assert.assertTrue(resourceHandlers.get(0) == resourceHandler);
|
Assert.assertEquals(resourceHandlers.size(), 1);
|
||||||
|
//Same instance is expected to be in the chain.
|
||||||
|
Assert.assertTrue(resourceHandlers.get(0) == resourceHandler);
|
||||||
|
} else {
|
||||||
|
Assert.fail("Null returned");
|
||||||
|
}
|
||||||
} catch (ResourceHandlerException e) {
|
} catch (ResourceHandlerException e) {
|
||||||
Assert.fail("Unexpected ResourceHandlerException: " + e);
|
Assert.fail("Unexpected ResourceHandlerException: " + e);
|
||||||
}
|
}
|
||||||
|
@ -81,23 +86,27 @@ public class TestResourceHandlerModule {
|
||||||
public void testDiskResourceHandler() throws Exception {
|
public void testDiskResourceHandler() throws Exception {
|
||||||
|
|
||||||
DiskResourceHandler handler =
|
DiskResourceHandler handler =
|
||||||
ResourceHandlerModule.getDiskResourceHandler(emptyConf);
|
ResourceHandlerModule.initDiskResourceHandler(emptyConf);
|
||||||
Assert.assertNull(handler);
|
Assert.assertNull(handler);
|
||||||
|
|
||||||
Configuration diskConf = new YarnConfiguration();
|
Configuration diskConf = new YarnConfiguration();
|
||||||
diskConf.setBoolean(YarnConfiguration.NM_DISK_RESOURCE_ENABLED, true);
|
diskConf.setBoolean(YarnConfiguration.NM_DISK_RESOURCE_ENABLED, true);
|
||||||
|
|
||||||
handler = ResourceHandlerModule.getDiskResourceHandler(diskConf);
|
handler = ResourceHandlerModule.initDiskResourceHandler(diskConf);
|
||||||
Assert.assertNotNull(handler);
|
Assert.assertNotNull(handler);
|
||||||
|
|
||||||
ResourceHandlerChain resourceHandlerChain =
|
ResourceHandlerChain resourceHandlerChain =
|
||||||
ResourceHandlerModule.getConfiguredResourceHandlerChain(diskConf,
|
ResourceHandlerModule.getConfiguredResourceHandlerChain(diskConf,
|
||||||
mock(Context.class));
|
mock(Context.class));
|
||||||
List<ResourceHandler> resourceHandlers =
|
if (resourceHandlerChain != null) {
|
||||||
resourceHandlerChain.getResourceHandlerList();
|
List<ResourceHandler> resourceHandlers =
|
||||||
// Exactly one resource handler in chain
|
resourceHandlerChain.getResourceHandlerList();
|
||||||
Assert.assertEquals(resourceHandlers.size(), 1);
|
// Exactly one resource handler in chain
|
||||||
// Same instance is expected to be in the chain.
|
Assert.assertEquals(resourceHandlers.size(), 1);
|
||||||
Assert.assertTrue(resourceHandlers.get(0) == handler);
|
// Same instance is expected to be in the chain.
|
||||||
|
Assert.assertTrue(resourceHandlers.get(0) == handler);
|
||||||
|
} else {
|
||||||
|
Assert.fail("Null returned");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue