YARN-3458. CPU resource monitoring in Windows. Contributed by Inigo Goiri.
(cherry picked from commit 114b590955
)
This commit is contained in:
parent
daf5fae369
commit
fdfcffb3d1
|
@ -228,6 +228,8 @@ Release 2.8.0 - UNRELEASED
|
||||||
YARN-3226. UI changes for decommissioning node. (Sunil G via
|
YARN-3226. UI changes for decommissioning node. (Sunil G via
|
||||||
junping_du)
|
junping_du)
|
||||||
|
|
||||||
|
YARN-3458. CPU resource monitoring in Windows. (Inigo Goiri via cnauroth)
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
|
|
||||||
YARN-644. Basic null check is not performed on passed in arguments before
|
YARN-644. Basic null check is not performed on passed in arguments before
|
||||||
|
|
|
@ -19,12 +19,14 @@
|
||||||
package org.apache.hadoop.yarn.util;
|
package org.apache.hadoop.yarn.util;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.math.BigInteger;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
|
import org.apache.hadoop.util.CpuTimeTracker;
|
||||||
import org.apache.hadoop.util.Shell;
|
import org.apache.hadoop.util.Shell;
|
||||||
import org.apache.hadoop.util.Shell.ShellCommandExecutor;
|
import org.apache.hadoop.util.Shell.ShellCommandExecutor;
|
||||||
import org.apache.hadoop.util.StringUtils;
|
import org.apache.hadoop.util.StringUtils;
|
||||||
|
@ -48,7 +50,12 @@ public class WindowsBasedProcessTree extends ResourceCalculatorProcessTree {
|
||||||
private long cpuTimeMs = UNAVAILABLE;
|
private long cpuTimeMs = UNAVAILABLE;
|
||||||
private Map<String, ProcessInfo> processTree =
|
private Map<String, ProcessInfo> processTree =
|
||||||
new HashMap<String, ProcessInfo>();
|
new HashMap<String, ProcessInfo>();
|
||||||
|
|
||||||
|
/** Track CPU utilization. */
|
||||||
|
private final CpuTimeTracker cpuTimeTracker;
|
||||||
|
/** Clock to account for CPU utilization. */
|
||||||
|
private Clock clock;
|
||||||
|
|
||||||
public static boolean isAvailable() {
|
public static boolean isAvailable() {
|
||||||
if (Shell.WINDOWS) {
|
if (Shell.WINDOWS) {
|
||||||
if (!Shell.hasWinutilsPath()) {
|
if (!Shell.hasWinutilsPath()) {
|
||||||
|
@ -71,9 +78,25 @@ public class WindowsBasedProcessTree extends ResourceCalculatorProcessTree {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public WindowsBasedProcessTree(String pid) {
|
/**
|
||||||
|
* Create a monitor for a Windows process tree.
|
||||||
|
* @param pid Identifier of the job object.
|
||||||
|
*/
|
||||||
|
public WindowsBasedProcessTree(final String pid) {
|
||||||
|
this(pid, new SystemClock());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a monitor for a Windows process tree.
|
||||||
|
* @param pid Identifier of the job object.
|
||||||
|
* @param pClock Clock to keep track of time for CPU utilization.
|
||||||
|
*/
|
||||||
|
public WindowsBasedProcessTree(final String pid, final Clock pClock) {
|
||||||
super(pid);
|
super(pid);
|
||||||
taskProcessId = pid;
|
this.taskProcessId = pid;
|
||||||
|
this.clock = pClock;
|
||||||
|
// Instead of jiffies, Windows uses milliseconds directly; 1ms = 1 jiffy
|
||||||
|
this.cpuTimeTracker = new CpuTimeTracker(1L);
|
||||||
}
|
}
|
||||||
|
|
||||||
// helper method to override while testing
|
// helper method to override while testing
|
||||||
|
@ -213,7 +236,7 @@ public class WindowsBasedProcessTree extends ResourceCalculatorProcessTree {
|
||||||
}
|
}
|
||||||
return total;
|
return total;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@SuppressWarnings("deprecation")
|
@SuppressWarnings("deprecation")
|
||||||
public long getCumulativeRssmem(int olderThanAge) {
|
public long getCumulativeRssmem(int olderThanAge) {
|
||||||
|
@ -231,9 +254,27 @@ public class WindowsBasedProcessTree extends ResourceCalculatorProcessTree {
|
||||||
return cpuTimeMs;
|
return cpuTimeMs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the number of used ms for all the processes under the monitored job
|
||||||
|
* object.
|
||||||
|
* @return Total consumed milliseconds by all processes in the job object.
|
||||||
|
*/
|
||||||
|
private BigInteger getTotalProcessMs() {
|
||||||
|
long totalMs = 0;
|
||||||
|
for (ProcessInfo p : processTree.values()) {
|
||||||
|
if (p != null) {
|
||||||
|
totalMs += p.cpuTimeMs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return BigInteger.valueOf(totalMs);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float getCpuUsagePercent() {
|
public float getCpuUsagePercent() {
|
||||||
return UNAVAILABLE;
|
BigInteger processTotalMs = getTotalProcessMs();
|
||||||
|
cpuTimeTracker.updateElapsedJiffies(processTotalMs, clock.getTime());
|
||||||
|
|
||||||
|
return cpuTimeTracker.getCpuTrackerUsagePercent();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,8 +21,9 @@ package org.apache.hadoop.yarn.util;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.util.Shell;
|
import org.apache.hadoop.util.Shell;
|
||||||
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
public class TestWindowsBasedProcessTree {
|
public class TestWindowsBasedProcessTree {
|
||||||
|
@ -31,8 +32,9 @@ public class TestWindowsBasedProcessTree {
|
||||||
|
|
||||||
class WindowsBasedProcessTreeTester extends WindowsBasedProcessTree {
|
class WindowsBasedProcessTreeTester extends WindowsBasedProcessTree {
|
||||||
String infoStr = null;
|
String infoStr = null;
|
||||||
public WindowsBasedProcessTreeTester(String pid) {
|
|
||||||
super(pid);
|
public WindowsBasedProcessTreeTester(String pid, Clock clock) {
|
||||||
|
super(pid, clock);
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
String getAllProcessInfoFromShell() {
|
String getAllProcessInfoFromShell() {
|
||||||
|
@ -49,8 +51,11 @@ public class TestWindowsBasedProcessTree {
|
||||||
}
|
}
|
||||||
assertTrue("WindowsBasedProcessTree should be available on Windows",
|
assertTrue("WindowsBasedProcessTree should be available on Windows",
|
||||||
WindowsBasedProcessTree.isAvailable());
|
WindowsBasedProcessTree.isAvailable());
|
||||||
|
ControlledClock testClock = new ControlledClock(new SystemClock());
|
||||||
WindowsBasedProcessTreeTester pTree = new WindowsBasedProcessTreeTester("-1");
|
long elapsedTimeBetweenUpdatesMsec = 0;
|
||||||
|
testClock.setTime(elapsedTimeBetweenUpdatesMsec);
|
||||||
|
|
||||||
|
WindowsBasedProcessTreeTester pTree = new WindowsBasedProcessTreeTester("-1", testClock);
|
||||||
pTree.infoStr = "3524,1024,1024,500\r\n2844,1024,1024,500\r\n";
|
pTree.infoStr = "3524,1024,1024,500\r\n2844,1024,1024,500\r\n";
|
||||||
pTree.updateProcessTree();
|
pTree.updateProcessTree();
|
||||||
assertTrue(pTree.getVirtualMemorySize() == 2048);
|
assertTrue(pTree.getVirtualMemorySize() == 2048);
|
||||||
|
@ -63,8 +68,11 @@ public class TestWindowsBasedProcessTree {
|
||||||
assertTrue(pTree.getRssMemorySize(0) == 2048);
|
assertTrue(pTree.getRssMemorySize(0) == 2048);
|
||||||
assertTrue(pTree.getCumulativeRssmem(0) == 2048);
|
assertTrue(pTree.getCumulativeRssmem(0) == 2048);
|
||||||
assertTrue(pTree.getCumulativeCpuTime() == 1000);
|
assertTrue(pTree.getCumulativeCpuTime() == 1000);
|
||||||
|
assertTrue(pTree.getCpuUsagePercent() == ResourceCalculatorProcessTree.UNAVAILABLE);
|
||||||
|
|
||||||
pTree.infoStr = "3524,1024,1024,1000\r\n2844,1024,1024,1000\r\n1234,1024,1024,1000\r\n";
|
pTree.infoStr = "3524,1024,1024,1000\r\n2844,1024,1024,1000\r\n1234,1024,1024,1000\r\n";
|
||||||
|
elapsedTimeBetweenUpdatesMsec = 1000;
|
||||||
|
testClock.setTime(elapsedTimeBetweenUpdatesMsec);
|
||||||
pTree.updateProcessTree();
|
pTree.updateProcessTree();
|
||||||
assertTrue(pTree.getVirtualMemorySize() == 3072);
|
assertTrue(pTree.getVirtualMemorySize() == 3072);
|
||||||
assertTrue(pTree.getCumulativeVmem() == 3072);
|
assertTrue(pTree.getCumulativeVmem() == 3072);
|
||||||
|
@ -75,8 +83,13 @@ public class TestWindowsBasedProcessTree {
|
||||||
assertTrue(pTree.getRssMemorySize(1) == 2048);
|
assertTrue(pTree.getRssMemorySize(1) == 2048);
|
||||||
assertTrue(pTree.getCumulativeRssmem(1) == 2048);
|
assertTrue(pTree.getCumulativeRssmem(1) == 2048);
|
||||||
assertTrue(pTree.getCumulativeCpuTime() == 3000);
|
assertTrue(pTree.getCumulativeCpuTime() == 3000);
|
||||||
|
assertTrue(pTree.getCpuUsagePercent() == 200);
|
||||||
|
Assert.assertEquals("Percent CPU time is not correct",
|
||||||
|
pTree.getCpuUsagePercent(), 200, 0.01);
|
||||||
|
|
||||||
pTree.infoStr = "3524,1024,1024,1500\r\n2844,1024,1024,1500\r\n";
|
pTree.infoStr = "3524,1024,1024,1500\r\n2844,1024,1024,1500\r\n";
|
||||||
|
elapsedTimeBetweenUpdatesMsec = 2000;
|
||||||
|
testClock.setTime(elapsedTimeBetweenUpdatesMsec);
|
||||||
pTree.updateProcessTree();
|
pTree.updateProcessTree();
|
||||||
assertTrue(pTree.getVirtualMemorySize() == 2048);
|
assertTrue(pTree.getVirtualMemorySize() == 2048);
|
||||||
assertTrue(pTree.getCumulativeVmem() == 2048);
|
assertTrue(pTree.getCumulativeVmem() == 2048);
|
||||||
|
@ -87,5 +100,7 @@ public class TestWindowsBasedProcessTree {
|
||||||
assertTrue(pTree.getRssMemorySize(2) == 2048);
|
assertTrue(pTree.getRssMemorySize(2) == 2048);
|
||||||
assertTrue(pTree.getCumulativeRssmem(2) == 2048);
|
assertTrue(pTree.getCumulativeRssmem(2) == 2048);
|
||||||
assertTrue(pTree.getCumulativeCpuTime() == 4000);
|
assertTrue(pTree.getCumulativeCpuTime() == 4000);
|
||||||
|
Assert.assertEquals("Percent CPU time is not correct",
|
||||||
|
pTree.getCpuUsagePercent(), 0, 0.01);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue