YARN-5776. Checkstyle: MonitoringThread.Run method length is too long (miklos.szegedi@cloudera.com via rkanter)

This commit is contained in:
Robert Kanter 2016-10-27 14:36:27 -07:00
parent dd4ed6a587
commit 9449519a25
1 changed files with 279 additions and 181 deletions

View File

@ -48,10 +48,14 @@ import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
/**
* Monitors containers collecting resource usage and preempting the container
* if it exceeds its limits.
*/
public class ContainersMonitorImpl extends AbstractService implements public class ContainersMonitorImpl extends AbstractService implements
ContainersMonitor { ContainersMonitor {
final static Log LOG = LogFactory private final static Log LOG = LogFactory
.getLog(ContainersMonitorImpl.class); .getLog(ContainersMonitorImpl.class);
private long monitoringInterval; private long monitoringInterval;
@ -66,7 +70,7 @@ public class ContainersMonitorImpl extends AbstractService implements
private final ContainerExecutor containerExecutor; private final ContainerExecutor containerExecutor;
private final Dispatcher eventDispatcher; private final Dispatcher eventDispatcher;
protected final Context context; private final Context context;
private ResourceCalculatorPlugin resourceCalculatorPlugin; private ResourceCalculatorPlugin resourceCalculatorPlugin;
private Configuration conf; private Configuration conf;
private static float vmemRatio; private static float vmemRatio;
@ -84,15 +88,18 @@ public class ContainersMonitorImpl extends AbstractService implements
private static final long UNKNOWN_MEMORY_LIMIT = -1L; private static final long UNKNOWN_MEMORY_LIMIT = -1L;
private int nodeCpuPercentageForYARN; private int nodeCpuPercentageForYARN;
/**
* Type of container metric.
*/
@Private @Private
public static enum ContainerMetric { public enum ContainerMetric {
CPU, MEMORY CPU, MEMORY
} }
private ResourceUtilization containersUtilization; private ResourceUtilization containersUtilization;
// Tracks the aggregated allocation of the currently allocated containers // Tracks the aggregated allocation of the currently allocated containers
// when queuing of containers at the NMs is enabled. // when queuing of containers at the NMs is enabled.
private ResourceUtilization containersAllocation; private final ResourceUtilization containersAllocation;
private volatile boolean stopped = false; private volatile boolean stopped = false;
@ -111,44 +118,47 @@ public class ContainersMonitorImpl extends AbstractService implements
} }
@Override @Override
protected void serviceInit(Configuration conf) throws Exception { protected void serviceInit(Configuration myConf) throws Exception {
this.conf = myConf;
this.monitoringInterval = this.monitoringInterval =
conf.getLong(YarnConfiguration.NM_CONTAINER_MON_INTERVAL_MS, this.conf.getLong(YarnConfiguration.NM_CONTAINER_MON_INTERVAL_MS,
conf.getLong(YarnConfiguration.NM_RESOURCE_MON_INTERVAL_MS, this.conf.getLong(YarnConfiguration.NM_RESOURCE_MON_INTERVAL_MS,
YarnConfiguration.DEFAULT_NM_RESOURCE_MON_INTERVAL_MS)); YarnConfiguration.DEFAULT_NM_RESOURCE_MON_INTERVAL_MS));
Class<? extends ResourceCalculatorPlugin> clazz = Class<? extends ResourceCalculatorPlugin> clazz =
conf.getClass(YarnConfiguration.NM_CONTAINER_MON_RESOURCE_CALCULATOR, this.conf.getClass(YarnConfiguration
conf.getClass( .NM_CONTAINER_MON_RESOURCE_CALCULATOR,
this.conf.getClass(
YarnConfiguration.NM_MON_RESOURCE_CALCULATOR, null, YarnConfiguration.NM_MON_RESOURCE_CALCULATOR, null,
ResourceCalculatorPlugin.class), ResourceCalculatorPlugin.class),
ResourceCalculatorPlugin.class); ResourceCalculatorPlugin.class);
this.resourceCalculatorPlugin = this.resourceCalculatorPlugin =
ResourceCalculatorPlugin.getResourceCalculatorPlugin(clazz, conf); ResourceCalculatorPlugin.getResourceCalculatorPlugin(clazz, this.conf);
LOG.info(" Using ResourceCalculatorPlugin : " LOG.info(" Using ResourceCalculatorPlugin : "
+ this.resourceCalculatorPlugin); + this.resourceCalculatorPlugin);
processTreeClass = conf.getClass(YarnConfiguration.NM_CONTAINER_MON_PROCESS_TREE, null, processTreeClass = this.conf.getClass(
YarnConfiguration.NM_CONTAINER_MON_PROCESS_TREE, null,
ResourceCalculatorProcessTree.class); ResourceCalculatorProcessTree.class);
this.conf = conf;
LOG.info(" Using ResourceCalculatorProcessTree : " LOG.info(" Using ResourceCalculatorProcessTree : "
+ this.processTreeClass); + this.processTreeClass);
this.containerMetricsEnabled = this.containerMetricsEnabled =
conf.getBoolean(YarnConfiguration.NM_CONTAINER_METRICS_ENABLE, this.conf.getBoolean(YarnConfiguration.NM_CONTAINER_METRICS_ENABLE,
YarnConfiguration.DEFAULT_NM_CONTAINER_METRICS_ENABLE); YarnConfiguration.DEFAULT_NM_CONTAINER_METRICS_ENABLE);
this.containerMetricsPeriodMs = this.containerMetricsPeriodMs =
conf.getLong(YarnConfiguration.NM_CONTAINER_METRICS_PERIOD_MS, this.conf.getLong(YarnConfiguration.NM_CONTAINER_METRICS_PERIOD_MS,
YarnConfiguration.DEFAULT_NM_CONTAINER_METRICS_PERIOD_MS); YarnConfiguration.DEFAULT_NM_CONTAINER_METRICS_PERIOD_MS);
this.containerMetricsUnregisterDelayMs = conf.getLong( this.containerMetricsUnregisterDelayMs = this.conf.getLong(
YarnConfiguration.NM_CONTAINER_METRICS_UNREGISTER_DELAY_MS, YarnConfiguration.NM_CONTAINER_METRICS_UNREGISTER_DELAY_MS,
YarnConfiguration.DEFAULT_NM_CONTAINER_METRICS_UNREGISTER_DELAY_MS); YarnConfiguration.DEFAULT_NM_CONTAINER_METRICS_UNREGISTER_DELAY_MS);
long configuredPMemForContainers = long configuredPMemForContainers =
NodeManagerHardwareUtils.getContainerMemoryMB( NodeManagerHardwareUtils.getContainerMemoryMB(
this.resourceCalculatorPlugin, conf) * 1024 * 1024L; this.resourceCalculatorPlugin, this.conf) * 1024 * 1024L;
long configuredVCoresForContainers = long configuredVCoresForContainers =
NodeManagerHardwareUtils.getVCores(this.resourceCalculatorPlugin, conf); NodeManagerHardwareUtils.getVCores(this.resourceCalculatorPlugin,
this.conf);
// Setting these irrespective of whether checks are enabled. Required in // Setting these irrespective of whether checks are enabled. Required in
// the UI. // the UI.
@ -157,16 +167,18 @@ public class ContainersMonitorImpl extends AbstractService implements
this.maxVCoresAllottedForContainers = configuredVCoresForContainers; this.maxVCoresAllottedForContainers = configuredVCoresForContainers;
// ///////// Virtual memory configuration ////// // ///////// Virtual memory configuration //////
vmemRatio = conf.getFloat(YarnConfiguration.NM_VMEM_PMEM_RATIO, vmemRatio = this.conf.getFloat(YarnConfiguration.NM_VMEM_PMEM_RATIO,
YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO); YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO);
Preconditions.checkArgument(vmemRatio > 0.99f, Preconditions.checkArgument(vmemRatio > 0.99f,
YarnConfiguration.NM_VMEM_PMEM_RATIO + " should be at least 1.0"); YarnConfiguration.NM_VMEM_PMEM_RATIO + " should be at least 1.0");
this.maxVmemAllottedForContainers = this.maxVmemAllottedForContainers =
(long) (vmemRatio * configuredPMemForContainers); (long) (vmemRatio * configuredPMemForContainers);
pmemCheckEnabled = conf.getBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, pmemCheckEnabled = this.conf.getBoolean(
YarnConfiguration.NM_PMEM_CHECK_ENABLED,
YarnConfiguration.DEFAULT_NM_PMEM_CHECK_ENABLED); YarnConfiguration.DEFAULT_NM_PMEM_CHECK_ENABLED);
vmemCheckEnabled = conf.getBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, vmemCheckEnabled = this.conf.getBoolean(
YarnConfiguration.NM_VMEM_CHECK_ENABLED,
YarnConfiguration.DEFAULT_NM_VMEM_CHECK_ENABLED); YarnConfiguration.DEFAULT_NM_VMEM_CHECK_ENABLED);
LOG.info("Physical memory check enabled: " + pmemCheckEnabled); LOG.info("Physical memory check enabled: " + pmemCheckEnabled);
LOG.info("Virtual memory check enabled: " + vmemCheckEnabled); LOG.info("Virtual memory check enabled: " + vmemCheckEnabled);
@ -175,7 +187,7 @@ public class ContainersMonitorImpl extends AbstractService implements
LOG.info("ContainersMonitor enabled: " + containersMonitorEnabled); LOG.info("ContainersMonitor enabled: " + containersMonitorEnabled);
nodeCpuPercentageForYARN = nodeCpuPercentageForYARN =
NodeManagerHardwareUtils.getNodeCpuPercentage(conf); NodeManagerHardwareUtils.getNodeCpuPercentage(this.conf);
if (pmemCheckEnabled) { if (pmemCheckEnabled) {
// Logging if actual pmem cannot be determined. // Logging if actual pmem cannot be determined.
@ -201,7 +213,7 @@ public class ContainersMonitorImpl extends AbstractService implements
1) + "). Thrashing might happen."); 1) + "). Thrashing might happen.");
} }
} }
super.serviceInit(conf); super.serviceInit(this.conf);
} }
private boolean isContainerMonitorEnabled() { private boolean isContainerMonitorEnabled() {
@ -241,12 +253,15 @@ public class ContainersMonitorImpl extends AbstractService implements
try { try {
this.monitoringThread.join(); this.monitoringThread.join();
} catch (InterruptedException e) { } catch (InterruptedException e) {
; LOG.info("ContainersMonitorImpl monitoring thread interrupted");
} }
} }
super.serviceStop(); super.serviceStop();
} }
/**
* Encapsulates resource requirements of a process and its tree.
*/
public static class ProcessTreeInfo { public static class ProcessTreeInfo {
private ContainerId containerId; private ContainerId containerId;
private String pid; private String pid;
@ -278,49 +293,49 @@ public class ContainersMonitorImpl extends AbstractService implements
this.pid = pid; this.pid = pid;
} }
public ResourceCalculatorProcessTree getProcessTree() { ResourceCalculatorProcessTree getProcessTree() {
return this.pTree; return this.pTree;
} }
public void setProcessTree(ResourceCalculatorProcessTree pTree) { void setProcessTree(ResourceCalculatorProcessTree mypTree) {
this.pTree = pTree; this.pTree = mypTree;
} }
/** /**
* @return Virtual memory limit for the process tree in bytes * @return Virtual memory limit for the process tree in bytes
*/ */
public synchronized long getVmemLimit() { synchronized long getVmemLimit() {
return this.vmemLimit; return this.vmemLimit;
} }
/** /**
* @return Physical memory limit for the process tree in bytes * @return Physical memory limit for the process tree in bytes
*/ */
public synchronized long getPmemLimit() { synchronized long getPmemLimit() {
return this.pmemLimit; return this.pmemLimit;
} }
/** /**
* @return Number of cpu vcores assigned * @return Number of cpu vcores assigned
*/ */
public synchronized int getCpuVcores() { synchronized int getCpuVcores() {
return this.cpuVcores; return this.cpuVcores;
} }
/** /**
* Set resource limit for enforcement * Set resource limit for enforcement.
* @param pmemLimit * @param myPmemLimit
* Physical memory limit for the process tree in bytes * Physical memory limit for the process tree in bytes
* @param vmemLimit * @param myVmemLimit
* Virtual memory limit for the process tree in bytes * Virtual memory limit for the process tree in bytes
* @param cpuVcores * @param myCpuVcores
* Number of cpu vcores assigned * Number of cpu vcores assigned
*/ */
public synchronized void setResourceLimit( synchronized void setResourceLimit(
long pmemLimit, long vmemLimit, int cpuVcores) { long myPmemLimit, long myVmemLimit, int myCpuVcores) {
this.pmemLimit = pmemLimit; this.pmemLimit = myPmemLimit;
this.vmemLimit = vmemLimit; this.vmemLimit = myVmemLimit;
this.cpuVcores = cpuVcores; this.cpuVcores = myCpuVcores;
} }
} }
@ -354,7 +369,7 @@ public class ContainersMonitorImpl extends AbstractService implements
* or if processes in the tree, older than this thread's monitoring * or if processes in the tree, older than this thread's monitoring
* interval, exceed the memory limit. False, otherwise. * interval, exceed the memory limit. False, otherwise.
*/ */
boolean isProcessTreeOverLimit(String containerId, private boolean isProcessTreeOverLimit(String containerId,
long currentMemUsage, long currentMemUsage,
long curMemUsageOfAgedProcesses, long curMemUsageOfAgedProcesses,
long vmemLimit) { long vmemLimit) {
@ -388,7 +403,7 @@ public class ContainersMonitorImpl extends AbstractService implements
} }
private class MonitoringThread extends Thread { private class MonitoringThread extends Thread {
public MonitoringThread() { MonitoringThread() {
super("Container Monitor"); super("Container Monitor");
} }
@ -425,43 +440,8 @@ public class ContainersMonitorImpl extends AbstractService implements
try { try {
String pId = ptInfo.getPID(); String pId = ptInfo.getPID();
// Initialize any uninitialized processTrees // Initialize uninitialized process trees
if (pId == null) { initializeProcessTrees(entry);
// get pid from ContainerId
pId = containerExecutor.getProcessId(ptInfo.getContainerId());
if (pId != null) {
// pId will be null, either if the container is not spawned yet
// or if the container's pid is removed from ContainerExecutor
LOG.debug("Tracking ProcessTree " + pId
+ " for the first time");
ResourceCalculatorProcessTree pt =
ResourceCalculatorProcessTree.
getResourceCalculatorProcessTree(
pId, processTreeClass, conf);
ptInfo.setPid(pId);
ptInfo.setProcessTree(pt);
if (containerMetricsEnabled) {
ContainerMetrics usageMetrics = ContainerMetrics
.forContainer(containerId, containerMetricsPeriodMs,
containerMetricsUnregisterDelayMs);
usageMetrics.recordProcessId(pId);
}
Container container = context.getContainers().get(containerId);
String[] ipAndHost = containerExecutor.getIpAndHost(container);
if (ipAndHost != null && ipAndHost[0] != null
&& ipAndHost[1] != null) {
container.setIpAndHost(ipAndHost);
LOG.info(containerId + "'s ip = " + ipAndHost[0]
+ ", and hostname = " + ipAndHost[1]);
} else {
LOG.info("Can not get both ip and hostname: " + Arrays
.toString(ipAndHost));
}
}
}
// End of initializing any uninitialized processTrees
if (pId == null || !isResourceCalculatorAvailable()) { if (pId == null || !isResourceCalculatorAvailable()) {
continue; // processTree cannot be tracked continue; // processTree cannot be tracked
@ -487,74 +467,11 @@ public class ContainersMonitorImpl extends AbstractService implements
continue; continue;
} }
float cpuUsageTotalCoresPercentage = cpuUsagePercentPerCore / recordUsage(containerId, pId, pTree, ptInfo, currentVmemUsage,
resourceCalculatorPlugin.getNumProcessors(); currentPmemUsage, trackedContainersUtilization);
// Multiply by 1000 to avoid losing data when converting to int checkLimit(containerId, pId, pTree, ptInfo,
int milliVcoresUsed = (int) (cpuUsageTotalCoresPercentage * 1000 currentVmemUsage, currentPmemUsage);
* maxVCoresAllottedForContainers /nodeCpuPercentageForYARN);
// as processes begin with an age 1, we want to see if there
// are processes more than 1 iteration old.
long curMemUsageOfAgedProcesses = pTree.getVirtualMemorySize(1);
long curRssMemUsageOfAgedProcesses = pTree.getRssMemorySize(1);
long vmemLimit = ptInfo.getVmemLimit();
long pmemLimit = ptInfo.getPmemLimit();
if (LOG.isDebugEnabled()) {
LOG.debug(String.format(
"Memory usage of ProcessTree %s for container-id %s: ",
pId, containerId.toString()) +
formatUsageString(
currentVmemUsage, vmemLimit,
currentPmemUsage, pmemLimit));
}
// Add resource utilization for this container
trackedContainersUtilization.addTo(
(int) (currentPmemUsage >> 20),
(int) (currentVmemUsage >> 20),
milliVcoresUsed / 1000.0f);
// Add usage to container metrics
if (containerMetricsEnabled) {
ContainerMetrics.forContainer(
containerId, containerMetricsPeriodMs,
containerMetricsUnregisterDelayMs).recordMemoryUsage(
(int) (currentPmemUsage >> 20));
ContainerMetrics.forContainer(
containerId, containerMetricsPeriodMs,
containerMetricsUnregisterDelayMs).recordCpuUsage
((int)cpuUsagePercentPerCore, milliVcoresUsed);
}
boolean isMemoryOverLimit = false;
String msg = "";
int containerExitStatus = ContainerExitStatus.INVALID;
if (isVmemCheckEnabled()
&& isProcessTreeOverLimit(containerId.toString(),
currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) {
// Container (the root process) is still alive and overflowing
// memory.
// Dump the process-tree and then clean it up.
msg = formatErrorMessage("virtual",
currentVmemUsage, vmemLimit,
currentPmemUsage, pmemLimit,
pId, containerId, pTree);
isMemoryOverLimit = true;
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_VMEM;
} else if (isPmemCheckEnabled()
&& isProcessTreeOverLimit(containerId.toString(),
currentPmemUsage, curRssMemUsageOfAgedProcesses,
pmemLimit)) {
// Container (the root process) is still alive and overflowing
// memory.
// Dump the process-tree and then clean it up.
msg = formatErrorMessage("physical",
currentVmemUsage, vmemLimit,
currentPmemUsage, pmemLimit,
pId, containerId, pTree);
isMemoryOverLimit = true;
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM;
}
// Accounting the total memory in usage for all containers // Accounting the total memory in usage for all containers
vmemUsageByAllContainers += currentVmemUsage; vmemUsageByAllContainers += currentVmemUsage;
@ -563,32 +480,8 @@ public class ContainersMonitorImpl extends AbstractService implements
cpuUsagePercentPerCoreByAllContainers += cpuUsagePercentPerCore; cpuUsagePercentPerCoreByAllContainers += cpuUsagePercentPerCore;
cpuUsageTotalCoresByAllContainers += cpuUsagePercentPerCore; cpuUsageTotalCoresByAllContainers += cpuUsagePercentPerCore;
if (isMemoryOverLimit) { reportResourceUsage(containerId, currentPmemUsage,
// Virtual or physical memory over limit. Fail the container and cpuUsagePercentPerCore);
// remove
// the corresponding process tree
LOG.warn(msg);
// warn if not a leader
if (!pTree.checkPidPgrpidForMatch()) {
LOG.error("Killed container process with PID " + pId
+ " but it is not a process group leader.");
}
// kill the container
eventDispatcher.getEventHandler().handle(
new ContainerKillEvent(containerId,
containerExitStatus, msg));
trackingContainers.remove(containerId);
LOG.info("Removed ProcessTree with root " + pId);
}
ContainerImpl container =
(ContainerImpl) context.getContainers().get(containerId);
NMTimelinePublisher nmMetricsPublisher =
container.getNMTimelinePublisher();
if (nmMetricsPublisher != null) {
nmMetricsPublisher.reportContainerResourceUsage(container,
currentPmemUsage, cpuUsagePercentPerCore);
}
} catch (Exception e) { } catch (Exception e) {
// Log the exception and proceed to the next container. // Log the exception and proceed to the next container.
LOG.warn("Uncaught exception in ContainersMonitorImpl " LOG.warn("Uncaught exception in ContainersMonitorImpl "
@ -617,21 +510,226 @@ public class ContainersMonitorImpl extends AbstractService implements
} }
} }
private String formatErrorMessage(String memTypeExceeded, /**
long currentVmemUsage, long vmemLimit, * Initialize any uninitialized processTrees.
long currentPmemUsage, long pmemLimit, * @param entry process tree entry to fill in
String pId, ContainerId containerId, ResourceCalculatorProcessTree pTree) { */
return private void initializeProcessTrees(
String.format("Container [pid=%s,containerID=%s] is running beyond %s memory limits. ", Entry<ContainerId, ProcessTreeInfo> entry) {
pId, containerId, memTypeExceeded) + ContainerId containerId = entry.getKey();
"Current usage: " + ProcessTreeInfo ptInfo = entry.getValue();
String pId = ptInfo.getPID();
// Initialize any uninitialized processTrees
if (pId == null) {
// get pid from ContainerId
pId = containerExecutor.getProcessId(ptInfo.getContainerId());
if (pId != null) {
// pId will be null, either if the container is not spawned yet
// or if the container's pid is removed from ContainerExecutor
LOG.debug("Tracking ProcessTree " + pId
+ " for the first time");
ResourceCalculatorProcessTree pt =
ResourceCalculatorProcessTree.
getResourceCalculatorProcessTree(
pId, processTreeClass, conf);
ptInfo.setPid(pId);
ptInfo.setProcessTree(pt);
if (containerMetricsEnabled) {
ContainerMetrics usageMetrics = ContainerMetrics
.forContainer(containerId, containerMetricsPeriodMs,
containerMetricsUnregisterDelayMs);
usageMetrics.recordProcessId(pId);
}
Container container = context.getContainers().get(containerId);
String[] ipAndHost = containerExecutor.getIpAndHost(container);
if (ipAndHost != null && ipAndHost[0] != null
&& ipAndHost[1] != null) {
container.setIpAndHost(ipAndHost);
LOG.info(containerId + "'s ip = " + ipAndHost[0]
+ ", and hostname = " + ipAndHost[1]);
} else {
LOG.info("Can not get both ip and hostname: " + Arrays
.toString(ipAndHost));
}
}
}
// End of initializing any uninitialized processTrees
}
/**
* Record usage metrics.
* @param containerId container id
* @param pId process id
* @param pTree valid process tree entry with CPU measurement
* @param ptInfo process tree info with limit information
* @param currentVmemUsage virtual memory measurement
* @param currentPmemUsage physical memory measurement
* @param trackedContainersUtilization utilization tracker to update
*/
private void recordUsage(ContainerId containerId, String pId,
ResourceCalculatorProcessTree pTree,
ProcessTreeInfo ptInfo,
long currentVmemUsage, long currentPmemUsage,
ResourceUtilization trackedContainersUtilization) {
float cpuUsagePercentPerCore = pTree.getCpuUsagePercent();
float cpuUsageTotalCoresPercentage = cpuUsagePercentPerCore /
resourceCalculatorPlugin.getNumProcessors();
// Multiply by 1000 to avoid losing data when converting to int
int milliVcoresUsed = (int) (cpuUsageTotalCoresPercentage * 1000
* maxVCoresAllottedForContainers /nodeCpuPercentageForYARN);
long vmemLimit = ptInfo.getVmemLimit();
long pmemLimit = ptInfo.getPmemLimit();
if (LOG.isDebugEnabled()) {
LOG.debug(String.format(
"Memory usage of ProcessTree %s for container-id %s: ",
pId, containerId.toString()) +
formatUsageString(
currentVmemUsage, vmemLimit,
currentPmemUsage, pmemLimit));
}
// Add resource utilization for this container
trackedContainersUtilization.addTo(
(int) (currentPmemUsage >> 20),
(int) (currentVmemUsage >> 20),
milliVcoresUsed / 1000.0f);
// Add usage to container metrics
if (containerMetricsEnabled) {
ContainerMetrics.forContainer(
containerId, containerMetricsPeriodMs,
containerMetricsUnregisterDelayMs).recordMemoryUsage(
(int) (currentPmemUsage >> 20));
ContainerMetrics.forContainer(
containerId, containerMetricsPeriodMs,
containerMetricsUnregisterDelayMs).recordCpuUsage((int)
cpuUsagePercentPerCore, milliVcoresUsed);
}
}
/**
* Check resource limits and take actions if the limit is exceeded.
* @param containerId container id
* @param pId process id
* @param pTree valid process tree entry with CPU measurement
* @param ptInfo process tree info with limit information
* @param currentVmemUsage virtual memory measurement
* @param currentPmemUsage physical memory measurement
*/
@SuppressWarnings("unchecked")
private void checkLimit(ContainerId containerId, String pId,
ResourceCalculatorProcessTree pTree,
ProcessTreeInfo ptInfo,
long currentVmemUsage,
long currentPmemUsage) {
boolean isMemoryOverLimit = false;
long vmemLimit = ptInfo.getVmemLimit();
long pmemLimit = ptInfo.getPmemLimit();
// as processes begin with an age 1, we want to see if there
// are processes more than 1 iteration old.
long curMemUsageOfAgedProcesses = pTree.getVirtualMemorySize(1);
long curRssMemUsageOfAgedProcesses = pTree.getRssMemorySize(1);
String msg = "";
int containerExitStatus = ContainerExitStatus.INVALID;
if (isVmemCheckEnabled()
&& isProcessTreeOverLimit(containerId.toString(),
currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) {
// Container (the root process) is still alive and overflowing
// memory.
// Dump the process-tree and then clean it up.
msg = formatErrorMessage("virtual",
formatUsageString(currentVmemUsage, vmemLimit, formatUsageString(currentVmemUsage, vmemLimit,
currentPmemUsage, pmemLimit) + currentPmemUsage, pmemLimit),
pId, containerId, pTree);
isMemoryOverLimit = true;
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_VMEM;
} else if (isPmemCheckEnabled()
&& isProcessTreeOverLimit(containerId.toString(),
currentPmemUsage, curRssMemUsageOfAgedProcesses,
pmemLimit)) {
// Container (the root process) is still alive and overflowing
// memory.
// Dump the process-tree and then clean it up.
msg = formatErrorMessage("physical",
formatUsageString(currentVmemUsage, vmemLimit,
currentPmemUsage, pmemLimit),
pId, containerId, pTree);
isMemoryOverLimit = true;
containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM;
}
if (isMemoryOverLimit) {
// Virtual or physical memory over limit. Fail the container and
// remove
// the corresponding process tree
LOG.warn(msg);
// warn if not a leader
if (!pTree.checkPidPgrpidForMatch()) {
LOG.error("Killed container process with PID " + pId
+ " but it is not a process group leader.");
}
// kill the container
eventDispatcher.getEventHandler().handle(
new ContainerKillEvent(containerId,
containerExitStatus, msg));
trackingContainers.remove(containerId);
LOG.info("Removed ProcessTree with root " + pId);
}
}
/**
* Report usage metrics to the timeline service.
* @param containerId container id
* @param currentPmemUsage physical memory measurement
* @param cpuUsagePercentPerCore CPU usage
*/
private void reportResourceUsage(ContainerId containerId,
long currentPmemUsage, float cpuUsagePercentPerCore) {
ContainerImpl container =
(ContainerImpl) context.getContainers().get(containerId);
NMTimelinePublisher nmMetricsPublisher =
container.getNMTimelinePublisher();
if (nmMetricsPublisher != null) {
nmMetricsPublisher.reportContainerResourceUsage(container,
currentPmemUsage, cpuUsagePercentPerCore);
}
}
/**
* Format string when memory limit has been exceeded.
* @param memTypeExceeded type of memory
* @param usageString general memory usage information string
* @param pId process id
* @param containerId container id
* @param pTree process tree to dump full resource utilization graph
* @return formatted resource usage information
*/
private String formatErrorMessage(String memTypeExceeded,
String usageString, String pId, ContainerId containerId,
ResourceCalculatorProcessTree pTree) {
return
String.format("Container [pid=%s,containerID=%s] is " +
"running beyond %s memory limits. ",
pId, containerId, memTypeExceeded) +
"Current usage: " + usageString +
". Killing container.\n" + ". Killing container.\n" +
"Dump of the process-tree for " + containerId + " :\n" + "Dump of the process-tree for " + containerId + " :\n" +
pTree.getProcessTreeDump(); pTree.getProcessTreeDump();
} }
/**
* Format memory usage string for reporting.
* @param currentVmemUsage virtual memory usage
* @param vmemLimit virtual memory limit
* @param currentPmemUsage physical memory usage
* @param pmemLimit physical memory limit
* @return formatted memory information
*/
private String formatUsageString(long currentVmemUsage, long vmemLimit, private String formatUsageString(long currentVmemUsage, long vmemLimit,
long currentPmemUsage, long pmemLimit) { long currentPmemUsage, long pmemLimit) {
return String.format("%sB of %sB physical memory used; " + return String.format("%sB of %sB physical memory used; " +
@ -746,7 +844,7 @@ public class ContainersMonitorImpl extends AbstractService implements
return this.containersUtilization; return this.containersUtilization;
} }
public void setContainersUtilization(ResourceUtilization utilization) { private void setContainersUtilization(ResourceUtilization utilization) {
this.containersUtilization = utilization; this.containersUtilization = utilization;
} }
@ -858,7 +956,7 @@ public class ContainersMonitorImpl extends AbstractService implements
} }
} }
protected void onChangeMonitoringContainerResource( private void onChangeMonitoringContainerResource(
ContainersMonitorEvent monitoringEvent, ContainerId containerId) { ContainersMonitorEvent monitoringEvent, ContainerId containerId) {
ChangeMonitoringContainerResourceEvent changeEvent = ChangeMonitoringContainerResourceEvent changeEvent =
(ChangeMonitoringContainerResourceEvent) monitoringEvent; (ChangeMonitoringContainerResourceEvent) monitoringEvent;
@ -878,14 +976,14 @@ public class ContainersMonitorImpl extends AbstractService implements
changeContainerResource(containerId, changeEvent.getResource()); changeContainerResource(containerId, changeEvent.getResource());
} }
protected void onStopMonitoringContainer( private void onStopMonitoringContainer(
ContainersMonitorEvent monitoringEvent, ContainerId containerId) { ContainersMonitorEvent monitoringEvent, ContainerId containerId) {
LOG.info("Stopping resource-monitoring for " + containerId); LOG.info("Stopping resource-monitoring for " + containerId);
updateContainerMetrics(monitoringEvent); updateContainerMetrics(monitoringEvent);
trackingContainers.remove(containerId); trackingContainers.remove(containerId);
} }
protected void onStartMonitoringContainer( private void onStartMonitoringContainer(
ContainersMonitorEvent monitoringEvent, ContainerId containerId) { ContainersMonitorEvent monitoringEvent, ContainerId containerId) {
ContainerStartMonitoringEvent startEvent = ContainerStartMonitoringEvent startEvent =
(ContainerStartMonitoringEvent) monitoringEvent; (ContainerStartMonitoringEvent) monitoringEvent;