YARN-4308. ContainersAggregated CPU resource utilization reports negative usage in first few heartbeats. Contributed by Sunil G

This commit is contained in:
Naganarasimha 2016-06-09 05:41:09 +05:30
parent ae047655f4
commit 1500a0a300
7 changed files with 163 additions and 3 deletions

View File

@ -467,6 +467,14 @@ public class ProcfsBasedProcessTree extends ResourceCalculatorProcessTree {
return totalStime.add(BigInteger.valueOf(totalUtime));
}
/**
* Get the CPU usage by all the processes in the process-tree in Unix.
* Note: UNAVAILABLE will be returned in case when CPU usage is not
* available. It is NOT advised to return any other error code.
*
* @return percentage CPU usage since the process-tree was created,
* {@link #UNAVAILABLE} if CPU usage cannot be calculated or not available.
*/
@Override
public float getCpuUsagePercent() {
BigInteger processTotalJiffies = getTotalProcessJiffies();

View File

@ -187,9 +187,11 @@ public abstract class ResourceCalculatorProcessTree extends Configured {
* Get the CPU usage by all the processes in the process-tree based on
* average between samples as a ratio of overall CPU cycles similar to top.
* Thus, if 2 out of 4 cores are used this should return 200.0.
* Note: UNAVAILABLE will be returned in case when CPU usage is not
* available. It is NOT advised to return any other error code.
*
* @return percentage CPU usage since the process-tree was created,
* {@link #UNAVAILABLE} if it cannot be calculated.
* {@link #UNAVAILABLE} if CPU usage cannot be calculated or not available.
*/
public float getCpuUsagePercent() {
return UNAVAILABLE;

View File

@ -268,6 +268,14 @@ public class WindowsBasedProcessTree extends ResourceCalculatorProcessTree {
return BigInteger.valueOf(totalMs);
}
/**
* Get the CPU usage by all the processes in the process-tree in Windows.
* Note: UNAVAILABLE will be returned in case when CPU usage is not
* available. It is NOT advised to return any other error code.
*
* @return percentage CPU usage since the process-tree was created,
* {@link #UNAVAILABLE} if CPU usage cannot be calculated or not available.
*/
@Override
public float getCpuUsagePercent() {
BigInteger processTotalMs = getTotalProcessMs();

View File

@ -455,6 +455,15 @@ public class ContainersMonitorImpl extends AbstractService implements
// cpuUsagePercentPerCore should be 300% and
// cpuUsageTotalCoresPercentage should be 50%
float cpuUsagePercentPerCore = pTree.getCpuUsagePercent();
if (cpuUsagePercentPerCore < 0) {
// CPU usage is not available likely because the container just
// started. Let us skip this turn and consider this container
// in the next iteration.
LOG.info("Skipping monitoring container " + containerId
+ " since CPU usage is not yet available.");
continue;
}
float cpuUsageTotalCoresPercentage = cpuUsagePercentPerCore /
resourceCalculatorPlugin.getNumProcessors();

View File

@ -0,0 +1,70 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
/**
* Mock class to obtain resource usage (CPU).
*/
public class MockCPUResourceCalculatorProcessTree
extends ResourceCalculatorProcessTree {
private long cpuPercentage = ResourceCalculatorProcessTree.UNAVAILABLE;
/**
* Constructor for MockCPUResourceCalculatorProcessTree with specified root
* process.
* @param root
*/
public MockCPUResourceCalculatorProcessTree(String root) {
super(root);
}
@Override
public void updateProcessTree() {
}
@Override
public String getProcessTreeDump() {
return "";
}
@Override
public long getCumulativeCpuTime() {
return 0;
}
@Override
public boolean checkPidPgrpidForMatch() {
return true;
}
@Override
public float getCpuUsagePercent() {
long cpu = this.cpuPercentage;
// First getter call will be returned with -1, and other calls will
// return non-zero value as defined below.
if (cpu == ResourceCalculatorProcessTree.UNAVAILABLE) {
// Set a default value other than 0 for test.
this.cpuPercentage = 50;
}
return cpu;
}
}

View File

@ -54,4 +54,9 @@ public class MockResourceCalculatorProcessTree extends ResourceCalculatorProcess
public long getRssMemorySize() {
return this.rssMemorySize;
}
@Override
public float getCpuUsagePercent() {
return 0;
}
}

View File

@ -27,8 +27,8 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ExecutionType;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceUtilization;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.AsyncDispatcher;
import org.apache.hadoop.yarn.event.EventHandler;
@ -43,19 +43,21 @@ import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext;
import org.apache.hadoop.yarn.server.nodemanager.executor.DeletionAsUserContext;
import org.apache.hadoop.yarn.server.nodemanager.executor.LocalizerStartContext;
import org.apache.log4j.Logger;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.mockito.Mockito;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertFalse;
public class TestContainersMonitorResourceChange {
static final Logger LOG = Logger
.getLogger(TestContainersMonitorResourceChange.class);
private ContainersMonitorImpl containersMonitor;
private MockExecutor executor;
private Configuration conf;
@ -63,6 +65,8 @@ public class TestContainersMonitorResourceChange {
private Context context;
private MockContainerEventHandler containerEventHandler;
static final int WAIT_MS_PER_LOOP = 20; // 20 milli seconds
private static class MockExecutor extends ContainerExecutor {
@Override
public void init() throws IOException {
@ -232,6 +236,60 @@ public class TestContainersMonitorResourceChange {
containersMonitor.stop();
}
@Test
public void testContainersCPUResourceForDefaultValue() throws Exception {
Configuration newConf = new Configuration(conf);
// set container monitor interval to be 20s
newConf.setLong(YarnConfiguration.NM_CONTAINER_MON_INTERVAL_MS, 20L);
containersMonitor = createContainersMonitor(executor, dispatcher, context);
newConf.set(YarnConfiguration.NM_CONTAINER_MON_PROCESS_TREE,
MockCPUResourceCalculatorProcessTree.class.getCanonicalName());
// set container monitor interval to be 20ms
containersMonitor.init(newConf);
containersMonitor.start();
// create container 1
containersMonitor.handle(new ContainerStartMonitoringEvent(
getContainerId(1), 2100L, 1000L, 1, 0, 0));
// Verify the container utilization value.
// Since MockCPUResourceCalculatorProcessTree will return a -1 as CPU
// utilization, containersUtilization will not be calculated and hence it
// will be 0.
assertEquals(
"Resource utilization must be default with MonitorThread's first run",
0, containersMonitor.getContainersUtilization()
.compareTo(ResourceUtilization.newInstance(0, 0, 0.0f)));
// Verify the container utilization value. Since atleast one round is done,
// we can expect a non-zero value for container utilization as
// MockCPUResourceCalculatorProcessTree#getCpuUsagePercent will return 50.
waitForContainerResourceUtilizationChange(containersMonitor, 100);
containersMonitor.stop();
}
public static void waitForContainerResourceUtilizationChange(
ContainersMonitorImpl containersMonitor, int timeoutMsecs)
throws InterruptedException {
int timeWaiting = 0;
while (0 == containersMonitor.getContainersUtilization()
.compareTo(ResourceUtilization.newInstance(0, 0, 0.0f))) {
if (timeWaiting >= timeoutMsecs) {
break;
}
LOG.info(
"Monitor thread is waiting for resource utlization change.");
Thread.sleep(WAIT_MS_PER_LOOP);
timeWaiting += WAIT_MS_PER_LOOP;
}
assertTrue("Resource utilization is not changed from second run onwards",
0 != containersMonitor.getContainersUtilization()
.compareTo(ResourceUtilization.newInstance(0, 0, 0.0f)));
}
private ContainersMonitorImpl createContainersMonitor(
ContainerExecutor containerExecutor, AsyncDispatcher dispatcher,
Context context) {