svn merge -c 1382072 FIXES: YARN-57. Allow process-tree based resource calculation et al. to be pluggable to support it on multiple platforms. Contributed by Radim Kolar.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1391094 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Joseph Evans 2012-09-27 16:38:54 +00:00
parent 1e1591788d
commit aa94aea3c2
9 changed files with 288 additions and 160 deletions

View File

@ -87,6 +87,9 @@ Release 0.23.4 - UNRELEASED
YARN-108. FSDownload can create cache directories with the wrong
permissions (Jason Lowe via bobby)
YARN-57. Allow process-tree based resource calculation et al. to be
pluggable to support it on multiple platforms. (Radim Kolar via acmurthy)
OPTIMIZATIONS
BUG FIXES

View File

@ -428,6 +428,9 @@ public class YarnConfiguration extends Configuration {
/** Class that calculates containers current resource utilization.*/
public static final String NM_CONTAINER_MON_RESOURCE_CALCULATOR =
NM_PREFIX + "container-monitor.resource-calculator.class";
/** Class that calculates process tree resource utilization.*/
public static final String NM_CONTAINER_MON_PROCESS_TREE =
NM_PREFIX + "container-monitor.process-tree.class";
/**
* Enable/Disable disks' health checker. Default is true.

View File

@ -91,7 +91,7 @@ public class LinuxResourceCalculatorPlugin extends ResourceCalculatorPlugin {
private float cpuUsage = UNAVAILABLE;
private long sampleTime = UNAVAILABLE;
private long lastSampleTime = UNAVAILABLE;
private ProcfsBasedProcessTree pTree = null;
private ResourceCalculatorProcessTree pTree = null;
boolean readMemInfoFile = false;
boolean readCpuInfoFile = false;

View File

@ -44,7 +44,7 @@ import org.apache.hadoop.util.StringUtils;
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class ProcfsBasedProcessTree {
public class ProcfsBasedProcessTree extends ResourceCalculatorProcessTree {
static final Log LOG = LogFactory
.getLog(ProcfsBasedProcessTree.class);
@ -96,17 +96,12 @@ public class ProcfsBasedProcessTree {
private String pid = deadPid;
static private Pattern numberPattern = Pattern.compile("[1-9][0-9]*");
private Long cpuTime = 0L;
private boolean setsidUsed = false;
protected Map<String, ProcessInfo> processTree =
new HashMap<String, ProcessInfo>();
public ProcfsBasedProcessTree(String pid) {
this(pid, false);
}
public ProcfsBasedProcessTree(String pid, boolean setsidUsed) {
this(pid, setsidUsed, PROCFS);
this(pid, PROCFS);
}
/**
@ -116,13 +111,10 @@ public class ProcfsBasedProcessTree {
* the root of the proc file system can be adjusted.
*
* @param pid root of the process tree
* @param setsidUsed true, if setsid was used for the root pid
* @param procfsDir the root of a proc file system - only used for testing.
*/
public ProcfsBasedProcessTree(String pid, boolean setsidUsed,
String procfsDir) {
public ProcfsBasedProcessTree(String pid, String procfsDir) {
this.pid = getValidPID(pid);
this.setsidUsed = setsidUsed;
this.procfsDir = procfsDir;
}
@ -152,7 +144,8 @@ public class ProcfsBasedProcessTree {
*
* @return the process-tree with latest state.
*/
public ProcfsBasedProcessTree getProcessTree() {
@Override
public ResourceCalculatorProcessTree getProcessTree() {
if (!pid.equals(deadPid)) {
// Get the list of processes
List<String> processList = getProcessList();
@ -226,6 +219,7 @@ public class ProcfsBasedProcessTree {
/** Verify that the given process id is same as its process group id.
* @return true if the process id matches else return false.
*/
@Override
public boolean checkPidPgrpidForMatch() {
return checkPidPgrpidForMatch(pid, PROCFS);
}
@ -256,6 +250,7 @@ public class ProcfsBasedProcessTree {
* @return a string concatenating the dump of information of all the processes
* in the process-tree
*/
@Override
public String getProcessTreeDump() {
StringBuilder ret = new StringBuilder();
// The header.
@ -273,29 +268,6 @@ public class ProcfsBasedProcessTree {
return ret.toString();
}
/**
* Get the cumulative virtual memory used by all the processes in the
* process-tree.
*
* @return cumulative virtual memory used by the process-tree in bytes.
*/
public long getCumulativeVmem() {
// include all processes.. all processes will be older than 0.
return getCumulativeVmem(0);
}
/**
* Get the cumulative resident set size (rss) memory used by all the processes
* in the process-tree.
*
* @return cumulative rss memory used by the process-tree in bytes. return 0
* if it cannot be calculated
*/
public long getCumulativeRssmem() {
// include all processes.. all processes will be older than 0.
return getCumulativeRssmem(0);
}
/**
* Get the cumulative virtual memory used by all the processes in the
* process-tree that are older than the passed in age.
@ -305,6 +277,7 @@ public class ProcfsBasedProcessTree {
* @return cumulative virtual memory used by the process-tree in bytes,
* for processes older than this age.
*/
@Override
public long getCumulativeVmem(int olderThanAge) {
long total = 0;
for (ProcessInfo p : processTree.values()) {
@ -325,6 +298,7 @@ public class ProcfsBasedProcessTree {
* for processes older than this age. return 0 if it cannot be
* calculated
*/
@Override
public long getCumulativeRssmem(int olderThanAge) {
if (PAGE_SIZE < 0) {
return 0;
@ -345,6 +319,7 @@ public class ProcfsBasedProcessTree {
* @return cumulative CPU time in millisecond since the process-tree created
* return 0 if it cannot be calculated
*/
@Override
public long getCumulativeCpuTime() {
if (JIFFY_LENGTH_IN_MILLIS < 0) {
return 0;
@ -352,7 +327,7 @@ public class ProcfsBasedProcessTree {
long incJiffies = 0;
for (ProcessInfo p : processTree.values()) {
if (p != null) {
incJiffies += p.dtime;
incJiffies += p.getDtime();
}
}
cpuTime += incJiffies * JIFFY_LENGTH_IN_MILLIS;
@ -454,6 +429,7 @@ public class ProcfsBasedProcessTree {
* Returns a string printing PIDs of process present in the
* ProcfsBasedProcessTree. Output format : [pid pid ..]
*/
@Override
public String toString() {
StringBuffer pTree = new StringBuffer("[ ");
for (String p : processTree.keySet()) {
@ -540,13 +516,6 @@ public class ProcfsBasedProcessTree {
return age;
}
public boolean isParent(ProcessInfo p) {
if (pid.equals(p.getPpid())) {
return true;
}
return false;
}
public void updateProcessInfo(String name, String ppid, Integer pgrpId,
Integer sessionId, Long utime, BigInteger stime, Long vmem, Long rssmem) {
this.name = name;

View File

@ -133,13 +133,14 @@ public abstract class ResourceCalculatorPlugin extends Configured {
}
/**
* Get the ResourceCalculatorPlugin from the class name and configure it. If
* Create the ResourceCalculatorPlugin from the class name and configure it. If
* class name is null, this method will try and return a memory calculator
* plugin available for this system.
*
* @param clazz class-name
* @param clazz ResourceCalculator plugin class-name
* @param conf configure the plugin with this.
* @return ResourceCalculatorPlugin
* @return ResourceCalculatorPlugin or null if ResourceCalculatorPlugin is not
* available for current system
*/
public static ResourceCalculatorPlugin getResourceCalculatorPlugin(
Class<? extends ResourceCalculatorPlugin> clazz, Configuration conf) {

View File

@ -0,0 +1,145 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.util;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.ReflectionUtils;
/**
* Interface class to obtain process resource usage
*
*/
public abstract class ResourceCalculatorProcessTree {
static final Log LOG = LogFactory
.getLog(ResourceCalculatorProcessTree.class);
/**
* Get the process-tree with latest state. If the root-process is not alive,
* an empty tree will be returned.
*
* Each call to this function should increment the age of the running
* processes that already exist in the process tree. Age is used other API's
* of the interface.
*
* @return the process-tree with latest state.
*/
public abstract ResourceCalculatorProcessTree getProcessTree();
/**
* Get a dump of the process-tree.
*
* @return a string concatenating the dump of information of all the processes
* in the process-tree
*/
public abstract String getProcessTreeDump();
/**
* Get the cumulative virtual memory used by all the processes in the
* process-tree.
*
* @return cumulative virtual memory used by the process-tree in bytes.
*/
public long getCumulativeVmem() {
return getCumulativeVmem(0);
}
/**
* Get the cumulative resident set size (rss) memory used by all the processes
* in the process-tree.
*
* @return cumulative rss memory used by the process-tree in bytes. return 0
* if it cannot be calculated
*/
public long getCumulativeRssmem() {
return getCumulativeRssmem(0);
}
/**
* Get the cumulative virtual memory used by all the processes in the
* process-tree that are older than the passed in age.
*
* @param olderThanAge processes above this age are included in the
* memory addition
* @return cumulative virtual memory used by the process-tree in bytes,
* for processes older than this age.
*/
public abstract long getCumulativeVmem(int olderThanAge);
/**
* Get the cumulative resident set size (rss) memory used by all the processes
* in the process-tree that are older than the passed in age.
*
* @param olderThanAge processes above this age are included in the
* memory addition
* @return cumulative rss memory used by the process-tree in bytes,
* for processes older than this age. return 0 if it cannot be
* calculated
*/
public abstract long getCumulativeRssmem(int olderThanAge);
/**
* Get the CPU time in millisecond used by all the processes in the
* process-tree since the process-tree created
*
* @return cumulative CPU time in millisecond since the process-tree created
* return 0 if it cannot be calculated
*/
public abstract long getCumulativeCpuTime();
/** Verify that the tree process id is same as its process group id.
* @return true if the process id matches else return false.
*/
public abstract boolean checkPidPgrpidForMatch();
/**
* Create the ResourceCalculatorProcessTree rooted to specified process
* from the class name and configure it. If class name is null, this method
* will try and return a process tree plugin available for this system.
*
* @param pid process pid of the root of the process tree
* @param clazz class-name
* @param conf configure the plugin with this.
*
* @return ResourceCalculatorProcessTree or null if ResourceCalculatorPluginTree
* is not available for this system.
*/
public static ResourceCalculatorProcessTree getResourceCalculatorProcessTree(
String pid, Class<? extends ResourceCalculatorProcessTree> clazz, Configuration conf) {
if (clazz != null) {
return ReflectionUtils.newInstance(clazz, conf);
}
// No class given, try a os specific class
try {
String osName = System.getProperty("os.name");
if (osName.startsWith("Linux")) {
return new ProcfsBasedProcessTree(pid);
}
} catch (SecurityException se) {
// Failed to get Operating System name.
return null;
}
// Not supported on this system.
return null;
}
}

View File

@ -161,7 +161,7 @@ public class TestProcfsBasedProcessTree {
String pid = getRogueTaskPID();
LOG.info("Root process pid: " + pid);
ProcfsBasedProcessTree p = createProcessTree(pid);
p = p.getProcessTree(); // initialize
p.getProcessTree(); // initialize
LOG.info("ProcessTree: " + p.toString());
File leaf = new File(lowestDescendant);
@ -174,7 +174,7 @@ public class TestProcfsBasedProcessTree {
}
}
p = p.getProcessTree(); // reconstruct
p.getProcessTree(); // reconstruct
LOG.info("ProcessTree: " + p.toString());
// Get the process-tree dump
@ -213,7 +213,7 @@ public class TestProcfsBasedProcessTree {
}
// ProcessTree is gone now. Any further calls should be sane.
p = p.getProcessTree();
p.getProcessTree();
Assert.assertFalse("ProcessTree must have been gone", isAlive(pid));
Assert.assertTrue("Cumulative vmem for the gone-process is "
+ p.getCumulativeVmem() + " . It should be zero.", p
@ -222,13 +222,11 @@ public class TestProcfsBasedProcessTree {
}
protected ProcfsBasedProcessTree createProcessTree(String pid) {
return new ProcfsBasedProcessTree(pid,
isSetsidAvailable());
return new ProcfsBasedProcessTree(pid);
}
protected ProcfsBasedProcessTree createProcessTree(String pid,
boolean setsidUsed, String procfsRootDir) {
return new ProcfsBasedProcessTree(pid, setsidUsed, procfsRootDir);
protected ProcfsBasedProcessTree createProcessTree(String pid, String procfsRootDir) {
return new ProcfsBasedProcessTree(pid, procfsRootDir);
}
protected void destroyProcessTree(String pid) throws IOException {
@ -358,7 +356,7 @@ public class TestProcfsBasedProcessTree {
// crank up the process tree class.
ProcfsBasedProcessTree processTree =
createProcessTree("100", true, procfsRootDir.getAbsolutePath());
createProcessTree("100", procfsRootDir.getAbsolutePath());
// build the process tree.
processTree.getProcessTree();
@ -431,7 +429,7 @@ public class TestProcfsBasedProcessTree {
// crank up the process tree class.
ProcfsBasedProcessTree processTree =
createProcessTree("100", true, procfsRootDir.getAbsolutePath());
createProcessTree("100", procfsRootDir.getAbsolutePath());
// build the process tree.
processTree.getProcessTree();
@ -522,8 +520,7 @@ public class TestProcfsBasedProcessTree {
setupProcfsRootDir(procfsRootDir);
// crank up the process tree class.
ProcfsBasedProcessTree processTree =
createProcessTree(pid, true, procfsRootDir.getAbsolutePath());
createProcessTree(pid, procfsRootDir.getAbsolutePath());
// Let us not create stat file for pid 100.
Assert.assertTrue(ProcfsBasedProcessTree.checkPidPgrpidForMatch(
@ -578,7 +575,7 @@ public class TestProcfsBasedProcessTree {
writeCmdLineFiles(procfsRootDir, pids, cmdLines);
ProcfsBasedProcessTree processTree = createProcessTree(
"100", true, procfsRootDir.getAbsolutePath());
"100", procfsRootDir.getAbsolutePath());
// build the process tree.
processTree.getProcessTree();

View File

@ -37,7 +37,7 @@ import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
import org.apache.hadoop.yarn.server.nodemanager.Context;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerKillEvent;
import org.apache.hadoop.yarn.service.AbstractService;
import org.apache.hadoop.yarn.util.ProcfsBasedProcessTree;
import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin;
import com.google.common.base.Preconditions;
@ -60,6 +60,8 @@ public class ContainersMonitorImpl extends AbstractService implements
private final Dispatcher eventDispatcher;
private final Context context;
private ResourceCalculatorPlugin resourceCalculatorPlugin;
private Configuration conf;
private Class<? extends ResourceCalculatorProcessTree> processTreeClass;
private long maxVmemAllottedForContainers = DISABLED_MEMORY_LIMIT;
private long maxPmemAllottedForContainers = DISABLED_MEMORY_LIMIT;
@ -96,6 +98,11 @@ public class ContainersMonitorImpl extends AbstractService implements
ResourceCalculatorPlugin.getResourceCalculatorPlugin(clazz, conf);
LOG.info(" Using ResourceCalculatorPlugin : "
+ this.resourceCalculatorPlugin);
processTreeClass = conf.getClass(YarnConfiguration.NM_CONTAINER_MON_PROCESS_TREE, null,
ResourceCalculatorProcessTree.class);
this.conf = conf;
LOG.info(" Using ResourceCalculatorProcessTree : "
+ this.processTreeClass);
long totalPhysicalMemoryOnNM = DISABLED_MEMORY_LIMIT;
if (this.resourceCalculatorPlugin != null) {
@ -157,12 +164,16 @@ public class ContainersMonitorImpl extends AbstractService implements
}
private boolean isEnabled() {
if (!ProcfsBasedProcessTree.isAvailable()) {
LOG.info("ProcessTree implementation is missing on this system. "
if (resourceCalculatorPlugin == null) {
LOG.info("ResourceCalculatorPlugin is unavailable on this system. "
+ this.getClass().getName() + " is disabled.");
return false;
}
if (ResourceCalculatorProcessTree.getResourceCalculatorProcessTree("0", processTreeClass, conf) == null) {
LOG.info("ResourceCalculatorProcessTree is unavailable on this system. "
+ this.getClass().getName() + " is disabled.");
return false;
}
if (!(isPhysicalMemoryCheckEnabled() || isVirtualMemoryCheckEnabled())) {
LOG.info("Neither virutal-memory nor physical-memory monitoring is " +
"needed. Not running the monitor-thread");
@ -196,12 +207,12 @@ public class ContainersMonitorImpl extends AbstractService implements
private static class ProcessTreeInfo {
private ContainerId containerId;
private String pid;
private ProcfsBasedProcessTree pTree;
private ResourceCalculatorProcessTree pTree;
private long vmemLimit;
private long pmemLimit;
public ProcessTreeInfo(ContainerId containerId, String pid,
ProcfsBasedProcessTree pTree, long vmemLimit, long pmemLimit) {
ResourceCalculatorProcessTree pTree, long vmemLimit, long pmemLimit) {
this.containerId = containerId;
this.pid = pid;
this.pTree = pTree;
@ -221,11 +232,11 @@ public class ContainersMonitorImpl extends AbstractService implements
this.pid = pid;
}
public ProcfsBasedProcessTree getProcessTree() {
public ResourceCalculatorProcessTree getProcessTree() {
return this.pTree;
}
public void setProcessTree(ProcfsBasedProcessTree pTree) {
public void setProcessTree(ResourceCalculatorProcessTree pTree) {
this.pTree = pTree;
}
@ -295,7 +306,7 @@ public class ContainersMonitorImpl extends AbstractService implements
}
// method provided just for easy testing purposes
boolean isProcessTreeOverLimit(ProcfsBasedProcessTree pTree,
boolean isProcessTreeOverLimit(ResourceCalculatorProcessTree pTree,
String containerId, long limit) {
long currentMemUsage = pTree.getCumulativeVmem();
// as processes begin with an age 1, we want to see if there are processes
@ -370,9 +381,8 @@ public class ContainersMonitorImpl extends AbstractService implements
LOG.debug("Tracking ProcessTree " + pId
+ " for the first time");
ProcfsBasedProcessTree pt =
new ProcfsBasedProcessTree(pId,
ContainerExecutor.isSetsidAvailable);
ResourceCalculatorProcessTree pt =
ResourceCalculatorProcessTree.getResourceCalculatorProcessTree(pId, processTreeClass, conf);
ptInfo.setPid(pId);
ptInfo.setProcessTree(pt);
}
@ -385,7 +395,7 @@ public class ContainersMonitorImpl extends AbstractService implements
LOG.debug("Constructing ProcessTree for : PID = " + pId
+ " ContainerId = " + containerId);
ProcfsBasedProcessTree pTree = ptInfo.getProcessTree();
ResourceCalculatorProcessTree pTree = ptInfo.getProcessTree();
pTree = pTree.getProcessTree(); // get the updated process-tree
ptInfo.setProcessTree(pTree); // update ptInfo with proces-tree of
// updated state
@ -471,7 +481,7 @@ public class ContainersMonitorImpl extends AbstractService implements
private String formatErrorMessage(String memTypeExceeded,
long currentVmemUsage, long vmemLimit,
long currentPmemUsage, long pmemLimit,
String pId, ContainerId containerId, ProcfsBasedProcessTree pTree) {
String pId, ContainerId containerId, ResourceCalculatorProcessTree pTree) {
return
String.format("Container [pid=%s,containerID=%s] is running beyond %s memory limits. ",
pId, containerId, memTypeExceeded) +

View File

@ -132,7 +132,7 @@ public class TestContainersMonitor extends BaseContainerManagerTest {
// tree rooted at 100 is over limit immediately, as it is
// twice over the mem limit.
ProcfsBasedProcessTree pTree = new ProcfsBasedProcessTree(
"100", true,
"100",
procfsRootDir.getAbsolutePath());
pTree.getProcessTree();
assertTrue("tree rooted at 100 should be over limit " +
@ -140,7 +140,7 @@ public class TestContainersMonitor extends BaseContainerManagerTest {
test.isProcessTreeOverLimit(pTree, "dummyId", limit));
// the tree rooted at 200 is initially below limit.
pTree = new ProcfsBasedProcessTree("200", true,
pTree = new ProcfsBasedProcessTree("200",
procfsRootDir.getAbsolutePath());
pTree.getProcessTree();
assertFalse("tree rooted at 200 shouldn't be over limit " +
@ -154,7 +154,7 @@ public class TestContainersMonitor extends BaseContainerManagerTest {
test.isProcessTreeOverLimit(pTree, "dummyId", limit));
// the tree rooted at 600 is never over limit.
pTree = new ProcfsBasedProcessTree("600", true,
pTree = new ProcfsBasedProcessTree("600",
procfsRootDir.getAbsolutePath());
pTree.getProcessTree();
assertFalse("tree rooted at 600 should never be over limit.",