HADOOP-12356. Fix computing CPU usage statistics on Windows. (Inigo Goiri via wangda)

(cherry picked from commit 89d1fd5dac)

Conflicts:
	hadoop-common-project/hadoop-common/CHANGES.txt
This commit is contained in:
Wangda Tan 2016-01-19 21:26:38 +08:00
parent 914a8fff56
commit 5dc2e78c97
11 changed files with 124 additions and 25 deletions

View File

@ -939,6 +939,9 @@ Release 2.8.0 - UNRELEASED
HADOOP-12689. S3 filesystem operations stopped working correctly HADOOP-12689. S3 filesystem operations stopped working correctly
(Matt Paduano via raviprak) (Matt Paduano via raviprak)
HADOOP-12356. Fix computing CPU usage statistics on Windows.
(Inigo Goiri via wangda)
Release 2.7.3 - UNRELEASED Release 2.7.3 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -104,9 +104,16 @@ public abstract class SysInfo {
/** /**
* Obtain the CPU usage % of the machine. Return -1 if it is unavailable * Obtain the CPU usage % of the machine. Return -1 if it is unavailable
* *
* @return CPU usage as a percentage of available cycles. * @return CPU usage as a percentage (from 0 to 100) of available cycles.
*/ */
public abstract float getCpuUsage(); public abstract float getCpuUsagePercentage();
/**
* Obtain the number of VCores used. Return -1 if it is unavailable
*
* @return Number of VCores used a percentage (from 0 to #VCores).
*/
public abstract float getNumVCoresUsed();
/** /**
* Obtain the aggregated number of bytes read over the network. * Obtain the aggregated number of bytes read over the network.

View File

@ -608,7 +608,7 @@ public class SysInfoLinux extends SysInfo {
/** {@inheritDoc} */ /** {@inheritDoc} */
@Override @Override
public float getCpuUsage() { public float getCpuUsagePercentage() {
readProcStatFile(); readProcStatFile();
float overallCpuUsage = cpuTimeTracker.getCpuTrackerUsagePercent(); float overallCpuUsage = cpuTimeTracker.getCpuTrackerUsagePercent();
if (overallCpuUsage != CpuTimeTracker.UNAVAILABLE) { if (overallCpuUsage != CpuTimeTracker.UNAVAILABLE) {
@ -617,6 +617,17 @@ public class SysInfoLinux extends SysInfo {
return overallCpuUsage; return overallCpuUsage;
} }
/** {@inheritDoc} */
@Override
public float getNumVCoresUsed() {
readProcStatFile();
float overallVCoresUsage = cpuTimeTracker.getCpuTrackerUsagePercent();
if (overallVCoresUsage != CpuTimeTracker.UNAVAILABLE) {
overallVCoresUsage = overallVCoresUsage / 100F;
}
return overallVCoresUsage;
}
/** {@inheritDoc} */ /** {@inheritDoc} */
@Override @Override
public long getNetworkBytesRead() { public long getNetworkBytesRead() {
@ -676,7 +687,7 @@ public class SysInfoLinux extends SysInfo {
} catch (InterruptedException e) { } catch (InterruptedException e) {
// do nothing // do nothing
} }
System.out.println("CPU usage % : " + plugin.getCpuUsage()); System.out.println("CPU usage % : " + plugin.getCpuUsagePercentage());
} }
@VisibleForTesting @VisibleForTesting

View File

@ -104,8 +104,13 @@ public class SysInfoWindows extends SysInfo {
cpuFrequencyKhz = Long.parseLong(sysInfo[5]); cpuFrequencyKhz = Long.parseLong(sysInfo[5]);
cumulativeCpuTimeMs = Long.parseLong(sysInfo[6]); cumulativeCpuTimeMs = Long.parseLong(sysInfo[6]);
if (lastCumCpuTimeMs != -1) { if (lastCumCpuTimeMs != -1) {
/**
* This number will be the aggregated usage across all cores in
* [0.0, 100.0]. For example, it will be 400.0 if there are 8
* cores and each of them is running at 50% utilization.
*/
cpuUsage = (cumulativeCpuTimeMs - lastCumCpuTimeMs) cpuUsage = (cumulativeCpuTimeMs - lastCumCpuTimeMs)
/ (refreshInterval * 1.0f); * 100F / refreshInterval;
} }
} catch (NumberFormatException nfe) { } catch (NumberFormatException nfe) {
LOG.warn("Error parsing sysInfo", nfe); LOG.warn("Error parsing sysInfo", nfe);
@ -175,9 +180,24 @@ public class SysInfoWindows extends SysInfo {
/** {@inheritDoc} */ /** {@inheritDoc} */
@Override @Override
public float getCpuUsage() { public float getCpuUsagePercentage() {
refreshIfNeeded(); refreshIfNeeded();
return cpuUsage; float ret = cpuUsage;
if (ret != -1) {
ret = ret / numProcessors;
}
return ret;
}
/** {@inheritDoc} */
@Override
public float getNumVCoresUsed() {
refreshIfNeeded();
float ret = cpuUsage;
if (ret != -1) {
ret = ret / 100F;
}
return ret;
} }
/** {@inheritDoc} */ /** {@inheritDoc} */

View File

@ -229,7 +229,10 @@ public class TestSysInfoLinux {
updateStatFile(uTime, nTime, sTime); updateStatFile(uTime, nTime, sTime);
assertEquals(plugin.getCumulativeCpuTime(), assertEquals(plugin.getCumulativeCpuTime(),
FAKE_JIFFY_LENGTH * (uTime + nTime + sTime)); FAKE_JIFFY_LENGTH * (uTime + nTime + sTime));
assertEquals(plugin.getCpuUsage(), (float)(CpuTimeTracker.UNAVAILABLE),0.0); assertEquals(plugin.getCpuUsagePercentage(),
(float)(CpuTimeTracker.UNAVAILABLE),0.0);
assertEquals(plugin.getNumVCoresUsed(),
(float)(CpuTimeTracker.UNAVAILABLE),0.0);
// Advance the time and sample again to test the CPU usage calculation // Advance the time and sample again to test the CPU usage calculation
uTime += 100L; uTime += 100L;
@ -237,13 +240,15 @@ public class TestSysInfoLinux {
updateStatFile(uTime, nTime, sTime); updateStatFile(uTime, nTime, sTime);
assertEquals(plugin.getCumulativeCpuTime(), assertEquals(plugin.getCumulativeCpuTime(),
FAKE_JIFFY_LENGTH * (uTime + nTime + sTime)); FAKE_JIFFY_LENGTH * (uTime + nTime + sTime));
assertEquals(plugin.getCpuUsage(), 6.25F, 0.0); assertEquals(plugin.getCpuUsagePercentage(), 6.25F, 0.0);
assertEquals(plugin.getNumVCoresUsed(), 0.5F, 0.0);
// Advance the time and sample again. This time, we call getCpuUsage() only. // Advance the time and sample again. This time, we call getCpuUsagePercentage() only.
uTime += 600L; uTime += 600L;
plugin.advanceTime(300L); plugin.advanceTime(300L);
updateStatFile(uTime, nTime, sTime); updateStatFile(uTime, nTime, sTime);
assertEquals(plugin.getCpuUsage(), 25F, 0.0); assertEquals(plugin.getCpuUsagePercentage(), 25F, 0.0);
assertEquals(plugin.getNumVCoresUsed(), 2F, 0.0);
// Advance very short period of time (one jiffy length). // Advance very short period of time (one jiffy length).
// In this case, CPU usage should not be updated. // In this case, CPU usage should not be updated.
@ -252,7 +257,10 @@ public class TestSysInfoLinux {
updateStatFile(uTime, nTime, sTime); updateStatFile(uTime, nTime, sTime);
assertEquals(plugin.getCumulativeCpuTime(), assertEquals(plugin.getCumulativeCpuTime(),
FAKE_JIFFY_LENGTH * (uTime + nTime + sTime)); FAKE_JIFFY_LENGTH * (uTime + nTime + sTime));
assertEquals(plugin.getCpuUsage(), 25F, 0.0); // CPU usage is not updated. assertEquals(
plugin.getCpuUsagePercentage(), 25F, 0.0); // CPU usage is not updated.
assertEquals(
plugin.getNumVCoresUsed(), 2F, 0.0); // CPU usage is not updated.
} }
/** /**

View File

@ -58,7 +58,10 @@ public class TestSysInfoWindows {
assertEquals(2805000L, tester.getCpuFrequency()); assertEquals(2805000L, tester.getCpuFrequency());
assertEquals(6261812L, tester.getCumulativeCpuTime()); assertEquals(6261812L, tester.getCumulativeCpuTime());
// undef on first call // undef on first call
assertEquals(-1.0, tester.getCpuUsage(), 0.0); assertEquals((float)CpuTimeTracker.UNAVAILABLE,
tester.getCpuUsagePercentage(), 0.0);
assertEquals((float)CpuTimeTracker.UNAVAILABLE,
tester.getNumVCoresUsed(), 0.0);
} }
@Test(timeout = 10000) @Test(timeout = 10000)
@ -70,22 +73,60 @@ public class TestSysInfoWindows {
tester.getAvailablePhysicalMemorySize(); tester.getAvailablePhysicalMemorySize();
// verify information has been refreshed // verify information has been refreshed
assertEquals(6400417792L, tester.getAvailablePhysicalMemorySize()); assertEquals(6400417792L, tester.getAvailablePhysicalMemorySize());
assertEquals(-1.0, tester.getCpuUsage(), 0.0); assertEquals((float)CpuTimeTracker.UNAVAILABLE,
tester.getCpuUsagePercentage(), 0.0);
assertEquals((float)CpuTimeTracker.UNAVAILABLE,
tester.getNumVCoresUsed(), 0.0);
tester.setSysinfoString( tester.setSysinfoString(
"17177038848,8589467648,15232745472,5400417792,1,2805000,6263012\r\n"); "17177038848,8589467648,15232745472,5400417792,1,2805000,6263012\r\n");
tester.getAvailablePhysicalMemorySize(); tester.getAvailablePhysicalMemorySize();
// verify information has not been refreshed // verify information has not been refreshed
assertEquals(6400417792L, tester.getAvailablePhysicalMemorySize()); assertEquals(6400417792L, tester.getAvailablePhysicalMemorySize());
assertEquals(-1.0, tester.getCpuUsage(), 0.0); assertEquals((float)CpuTimeTracker.UNAVAILABLE,
tester.getCpuUsagePercentage(), 0.0);
assertEquals((float)CpuTimeTracker.UNAVAILABLE,
tester.getNumVCoresUsed(), 0.0);
// advance clock // advance clock
tester.advance(SysInfoWindows.REFRESH_INTERVAL_MS + 1); tester.advance(SysInfoWindows.REFRESH_INTERVAL_MS + 1);
// verify information has been refreshed // verify information has been refreshed
assertEquals(5400417792L, tester.getAvailablePhysicalMemorySize()); assertEquals(5400417792L, tester.getAvailablePhysicalMemorySize());
assertEquals((6263012 - 6261812) / (SysInfoWindows.REFRESH_INTERVAL_MS + 1f), assertEquals((6263012 - 6261812) * 100F /
tester.getCpuUsage(), 0.0); (SysInfoWindows.REFRESH_INTERVAL_MS + 1f) / 1,
tester.getCpuUsagePercentage(), 0.0);
assertEquals((6263012 - 6261812) /
(SysInfoWindows.REFRESH_INTERVAL_MS + 1f) / 1,
tester.getNumVCoresUsed(), 0.0);
}
@Test(timeout = 10000)
public void refreshAndCpuUsageMulticore() throws InterruptedException {
// test with 12 cores
SysInfoWindowsMock tester = new SysInfoWindowsMock();
tester.setSysinfoString(
"17177038848,8589467648,15232745472,6400417792,12,2805000,6261812\r\n");
// verify information has been refreshed
assertEquals(6400417792L, tester.getAvailablePhysicalMemorySize());
tester.setSysinfoString(
"17177038848,8589467648,15232745472,5400417792,12,2805000,6263012\r\n");
// verify information has not been refreshed
assertEquals(6400417792L, tester.getAvailablePhysicalMemorySize());
// advance clock
tester.advance(SysInfoWindows.REFRESH_INTERVAL_MS + 1);
// verify information has been refreshed
assertEquals(5400417792L, tester.getAvailablePhysicalMemorySize());
// verify information has been refreshed
assertEquals((6263012 - 6261812) * 100F /
(SysInfoWindows.REFRESH_INTERVAL_MS + 1f) / 12,
tester.getCpuUsagePercentage(), 0.0);
assertEquals((6263012 - 6261812) /
(SysInfoWindows.REFRESH_INTERVAL_MS + 1f),
tester.getNumVCoresUsed(), 0.0);
} }
@Test(timeout = 10000) @Test(timeout = 10000)

View File

@ -120,7 +120,7 @@ public class DummyResourceCalculatorPlugin extends ResourceCalculatorPlugin {
/** {@inheritDoc} */ /** {@inheritDoc} */
@Override @Override
public float getCpuUsage() { public float getCpuUsagePercentage() {
return getConf().getFloat(CPU_USAGE, -1); return getConf().getFloat(CPU_USAGE, -1);
} }

View File

@ -120,12 +120,21 @@ public class ResourceCalculatorPlugin extends Configured {
} }
/** /**
* Obtain the CPU usage % of the machine. Return -1 if it is unavailable * Obtain the CPU usage % of the machine. Return -1 if it is unavailable.
* *
* @return CPU usage in % * @return CPU usage in %
*/ */
public float getCpuUsage() { public float getCpuUsagePercentage() {
return sys.getCpuUsage(); return sys.getCpuUsagePercentage();
}
/**
* Obtain the number of VCores used. Return -1 if it is unavailable.
*
* @return Number of VCores used a percentage (from 0 to #VCores)
*/
public float getNumVCoresUsed() {
return sys.getNumVCoresUsed();
} }
/** /**

View File

@ -141,12 +141,12 @@ public class NodeResourceMonitorImpl extends AbstractService implements
long vmem = long vmem =
resourceCalculatorPlugin.getVirtualMemorySize() resourceCalculatorPlugin.getVirtualMemorySize()
- resourceCalculatorPlugin.getAvailableVirtualMemorySize(); - resourceCalculatorPlugin.getAvailableVirtualMemorySize();
float cpu = resourceCalculatorPlugin.getCpuUsage(); float vcores = resourceCalculatorPlugin.getNumVCoresUsed();
nodeUtilization = nodeUtilization =
ResourceUtilization.newInstance( ResourceUtilization.newInstance(
(int) (pmem >> 20), // B -> MB (int) (pmem >> 20), // B -> MB
(int) (vmem >> 20), // B -> MB (int) (vmem >> 20), // B -> MB
cpu); // 1 CPU at 100% is 1 vcores); // Used Virtual Cores
try { try {
Thread.sleep(monitoringInterval); Thread.sleep(monitoringInterval);

View File

@ -63,7 +63,7 @@ public class MockResourceCalculatorPlugin extends ResourceCalculatorPlugin {
} }
@Override @Override
public float getCpuUsage() { public float getCpuUsagePercentage() {
return 0; return 0;
} }
} }

View File

@ -73,7 +73,7 @@ public class TestNodeManagerHardwareUtils {
} }
@Override @Override
public float getCpuUsage() { public float getCpuUsagePercentage() {
return 0; return 0;
} }