YARN-3458. CPU resource monitoring in Windows. Contributed by Inigo Goiri.

(cherry picked from commit 114b590955)
(cherry picked from commit fdfcffb3d1)
This commit is contained in:
cnauroth 2015-12-21 16:59:09 -08:00
parent e507ed1f90
commit eb5f46e34f
3 changed files with 68 additions and 10 deletions

View File

@ -206,6 +206,8 @@ Release 2.8.0 - UNRELEASED
YARN-3226. UI changes for decommissioning node. (Sunil G via YARN-3226. UI changes for decommissioning node. (Sunil G via
junping_du) junping_du)
YARN-3458. CPU resource monitoring in Windows. (Inigo Goiri via cnauroth)
IMPROVEMENTS IMPROVEMENTS
YARN-644. Basic null check is not performed on passed in arguments before YARN-644. Basic null check is not performed on passed in arguments before

View File

@ -19,12 +19,14 @@
package org.apache.hadoop.yarn.util; package org.apache.hadoop.yarn.util;
import java.io.IOException; import java.io.IOException;
import java.math.BigInteger;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.util.CpuTimeTracker;
import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.Shell;
import org.apache.hadoop.util.Shell.ShellCommandExecutor; import org.apache.hadoop.util.Shell.ShellCommandExecutor;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
@ -49,6 +51,11 @@ static class ProcessInfo {
private Map<String, ProcessInfo> processTree = private Map<String, ProcessInfo> processTree =
new HashMap<String, ProcessInfo>(); new HashMap<String, ProcessInfo>();
/** Track CPU utilization. */
private final CpuTimeTracker cpuTimeTracker;
/** Clock to account for CPU utilization. */
private Clock clock;
public static boolean isAvailable() { public static boolean isAvailable() {
if (Shell.WINDOWS) { if (Shell.WINDOWS) {
if (!Shell.hasWinutilsPath()) { if (!Shell.hasWinutilsPath()) {
@ -71,9 +78,25 @@ public static boolean isAvailable() {
return false; return false;
} }
public WindowsBasedProcessTree(String pid) { /**
* Create a monitor for a Windows process tree.
* @param pid Identifier of the job object.
*/
public WindowsBasedProcessTree(final String pid) {
this(pid, new SystemClock());
}
/**
* Create a monitor for a Windows process tree.
* @param pid Identifier of the job object.
* @param pClock Clock to keep track of time for CPU utilization.
*/
public WindowsBasedProcessTree(final String pid, final Clock pClock) {
super(pid); super(pid);
taskProcessId = pid; this.taskProcessId = pid;
this.clock = pClock;
// Instead of jiffies, Windows uses milliseconds directly; 1ms = 1 jiffy
this.cpuTimeTracker = new CpuTimeTracker(1L);
} }
// helper method to override while testing // helper method to override while testing
@ -231,9 +254,27 @@ public long getCumulativeCpuTime() {
return cpuTimeMs; return cpuTimeMs;
} }
/**
* Get the number of used ms for all the processes under the monitored job
* object.
* @return Total consumed milliseconds by all processes in the job object.
*/
private BigInteger getTotalProcessMs() {
long totalMs = 0;
for (ProcessInfo p : processTree.values()) {
if (p != null) {
totalMs += p.cpuTimeMs;
}
}
return BigInteger.valueOf(totalMs);
}
@Override @Override
public float getCpuUsagePercent() { public float getCpuUsagePercent() {
return UNAVAILABLE; BigInteger processTotalMs = getTotalProcessMs();
cpuTimeTracker.updateElapsedJiffies(processTotalMs, clock.getTime());
return cpuTimeTracker.getCpuTrackerUsagePercent();
} }
} }

View File

@ -21,8 +21,9 @@
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.Shell;
import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
public class TestWindowsBasedProcessTree { public class TestWindowsBasedProcessTree {
@ -31,8 +32,9 @@ public class TestWindowsBasedProcessTree {
class WindowsBasedProcessTreeTester extends WindowsBasedProcessTree { class WindowsBasedProcessTreeTester extends WindowsBasedProcessTree {
String infoStr = null; String infoStr = null;
public WindowsBasedProcessTreeTester(String pid) {
super(pid); public WindowsBasedProcessTreeTester(String pid, Clock clock) {
super(pid, clock);
} }
@Override @Override
String getAllProcessInfoFromShell() { String getAllProcessInfoFromShell() {
@ -49,8 +51,11 @@ public void tree() {
} }
assertTrue("WindowsBasedProcessTree should be available on Windows", assertTrue("WindowsBasedProcessTree should be available on Windows",
WindowsBasedProcessTree.isAvailable()); WindowsBasedProcessTree.isAvailable());
ControlledClock testClock = new ControlledClock(new SystemClock());
long elapsedTimeBetweenUpdatesMsec = 0;
testClock.setTime(elapsedTimeBetweenUpdatesMsec);
WindowsBasedProcessTreeTester pTree = new WindowsBasedProcessTreeTester("-1"); WindowsBasedProcessTreeTester pTree = new WindowsBasedProcessTreeTester("-1", testClock);
pTree.infoStr = "3524,1024,1024,500\r\n2844,1024,1024,500\r\n"; pTree.infoStr = "3524,1024,1024,500\r\n2844,1024,1024,500\r\n";
pTree.updateProcessTree(); pTree.updateProcessTree();
assertTrue(pTree.getVirtualMemorySize() == 2048); assertTrue(pTree.getVirtualMemorySize() == 2048);
@ -63,8 +68,11 @@ public void tree() {
assertTrue(pTree.getRssMemorySize(0) == 2048); assertTrue(pTree.getRssMemorySize(0) == 2048);
assertTrue(pTree.getCumulativeRssmem(0) == 2048); assertTrue(pTree.getCumulativeRssmem(0) == 2048);
assertTrue(pTree.getCumulativeCpuTime() == 1000); assertTrue(pTree.getCumulativeCpuTime() == 1000);
assertTrue(pTree.getCpuUsagePercent() == ResourceCalculatorProcessTree.UNAVAILABLE);
pTree.infoStr = "3524,1024,1024,1000\r\n2844,1024,1024,1000\r\n1234,1024,1024,1000\r\n"; pTree.infoStr = "3524,1024,1024,1000\r\n2844,1024,1024,1000\r\n1234,1024,1024,1000\r\n";
elapsedTimeBetweenUpdatesMsec = 1000;
testClock.setTime(elapsedTimeBetweenUpdatesMsec);
pTree.updateProcessTree(); pTree.updateProcessTree();
assertTrue(pTree.getVirtualMemorySize() == 3072); assertTrue(pTree.getVirtualMemorySize() == 3072);
assertTrue(pTree.getCumulativeVmem() == 3072); assertTrue(pTree.getCumulativeVmem() == 3072);
@ -75,8 +83,13 @@ public void tree() {
assertTrue(pTree.getRssMemorySize(1) == 2048); assertTrue(pTree.getRssMemorySize(1) == 2048);
assertTrue(pTree.getCumulativeRssmem(1) == 2048); assertTrue(pTree.getCumulativeRssmem(1) == 2048);
assertTrue(pTree.getCumulativeCpuTime() == 3000); assertTrue(pTree.getCumulativeCpuTime() == 3000);
assertTrue(pTree.getCpuUsagePercent() == 200);
Assert.assertEquals("Percent CPU time is not correct",
pTree.getCpuUsagePercent(), 200, 0.01);
pTree.infoStr = "3524,1024,1024,1500\r\n2844,1024,1024,1500\r\n"; pTree.infoStr = "3524,1024,1024,1500\r\n2844,1024,1024,1500\r\n";
elapsedTimeBetweenUpdatesMsec = 2000;
testClock.setTime(elapsedTimeBetweenUpdatesMsec);
pTree.updateProcessTree(); pTree.updateProcessTree();
assertTrue(pTree.getVirtualMemorySize() == 2048); assertTrue(pTree.getVirtualMemorySize() == 2048);
assertTrue(pTree.getCumulativeVmem() == 2048); assertTrue(pTree.getCumulativeVmem() == 2048);
@ -87,5 +100,7 @@ public void tree() {
assertTrue(pTree.getRssMemorySize(2) == 2048); assertTrue(pTree.getRssMemorySize(2) == 2048);
assertTrue(pTree.getCumulativeRssmem(2) == 2048); assertTrue(pTree.getCumulativeRssmem(2) == 2048);
assertTrue(pTree.getCumulativeCpuTime() == 4000); assertTrue(pTree.getCumulativeCpuTime() == 4000);
Assert.assertEquals("Percent CPU time is not correct",
pTree.getCpuUsagePercent(), 0, 0.01);
} }
} }