HADOOP-12356. Fix computing CPU usage statistics on Windows. (Inigo Goiri via wangda)
(cherry picked from commit 89d1fd5dac
)
Conflicts:
hadoop-common-project/hadoop-common/CHANGES.txt
This commit is contained in:
parent
914a8fff56
commit
5dc2e78c97
|
@ -939,6 +939,9 @@ Release 2.8.0 - UNRELEASED
|
|||
HADOOP-12689. S3 filesystem operations stopped working correctly
|
||||
(Matt Paduano via raviprak)
|
||||
|
||||
HADOOP-12356. Fix computing CPU usage statistics on Windows.
|
||||
(Inigo Goiri via wangda)
|
||||
|
||||
Release 2.7.3 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -104,9 +104,16 @@ public abstract class SysInfo {
|
|||
/**
|
||||
* Obtain the CPU usage % of the machine. Return -1 if it is unavailable
|
||||
*
|
||||
* @return CPU usage as a percentage of available cycles.
|
||||
* @return CPU usage as a percentage (from 0 to 100) of available cycles.
|
||||
*/
|
||||
public abstract float getCpuUsage();
|
||||
public abstract float getCpuUsagePercentage();
|
||||
|
||||
/**
|
||||
* Obtain the number of VCores used. Return -1 if it is unavailable
|
||||
*
|
||||
* @return Number of VCores used a percentage (from 0 to #VCores).
|
||||
*/
|
||||
public abstract float getNumVCoresUsed();
|
||||
|
||||
/**
|
||||
* Obtain the aggregated number of bytes read over the network.
|
||||
|
|
|
@ -608,7 +608,7 @@ public class SysInfoLinux extends SysInfo {
|
|||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public float getCpuUsage() {
|
||||
public float getCpuUsagePercentage() {
|
||||
readProcStatFile();
|
||||
float overallCpuUsage = cpuTimeTracker.getCpuTrackerUsagePercent();
|
||||
if (overallCpuUsage != CpuTimeTracker.UNAVAILABLE) {
|
||||
|
@ -617,6 +617,17 @@ public class SysInfoLinux extends SysInfo {
|
|||
return overallCpuUsage;
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public float getNumVCoresUsed() {
|
||||
readProcStatFile();
|
||||
float overallVCoresUsage = cpuTimeTracker.getCpuTrackerUsagePercent();
|
||||
if (overallVCoresUsage != CpuTimeTracker.UNAVAILABLE) {
|
||||
overallVCoresUsage = overallVCoresUsage / 100F;
|
||||
}
|
||||
return overallVCoresUsage;
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public long getNetworkBytesRead() {
|
||||
|
@ -676,7 +687,7 @@ public class SysInfoLinux extends SysInfo {
|
|||
} catch (InterruptedException e) {
|
||||
// do nothing
|
||||
}
|
||||
System.out.println("CPU usage % : " + plugin.getCpuUsage());
|
||||
System.out.println("CPU usage % : " + plugin.getCpuUsagePercentage());
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
|
|
|
@ -104,8 +104,13 @@ public class SysInfoWindows extends SysInfo {
|
|||
cpuFrequencyKhz = Long.parseLong(sysInfo[5]);
|
||||
cumulativeCpuTimeMs = Long.parseLong(sysInfo[6]);
|
||||
if (lastCumCpuTimeMs != -1) {
|
||||
/**
|
||||
* This number will be the aggregated usage across all cores in
|
||||
* [0.0, 100.0]. For example, it will be 400.0 if there are 8
|
||||
* cores and each of them is running at 50% utilization.
|
||||
*/
|
||||
cpuUsage = (cumulativeCpuTimeMs - lastCumCpuTimeMs)
|
||||
/ (refreshInterval * 1.0f);
|
||||
* 100F / refreshInterval;
|
||||
}
|
||||
} catch (NumberFormatException nfe) {
|
||||
LOG.warn("Error parsing sysInfo", nfe);
|
||||
|
@ -175,9 +180,24 @@ public class SysInfoWindows extends SysInfo {
|
|||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public float getCpuUsage() {
|
||||
public float getCpuUsagePercentage() {
|
||||
refreshIfNeeded();
|
||||
return cpuUsage;
|
||||
float ret = cpuUsage;
|
||||
if (ret != -1) {
|
||||
ret = ret / numProcessors;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public float getNumVCoresUsed() {
|
||||
refreshIfNeeded();
|
||||
float ret = cpuUsage;
|
||||
if (ret != -1) {
|
||||
ret = ret / 100F;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
|
|
|
@ -229,7 +229,10 @@ public class TestSysInfoLinux {
|
|||
updateStatFile(uTime, nTime, sTime);
|
||||
assertEquals(plugin.getCumulativeCpuTime(),
|
||||
FAKE_JIFFY_LENGTH * (uTime + nTime + sTime));
|
||||
assertEquals(plugin.getCpuUsage(), (float)(CpuTimeTracker.UNAVAILABLE),0.0);
|
||||
assertEquals(plugin.getCpuUsagePercentage(),
|
||||
(float)(CpuTimeTracker.UNAVAILABLE),0.0);
|
||||
assertEquals(plugin.getNumVCoresUsed(),
|
||||
(float)(CpuTimeTracker.UNAVAILABLE),0.0);
|
||||
|
||||
// Advance the time and sample again to test the CPU usage calculation
|
||||
uTime += 100L;
|
||||
|
@ -237,13 +240,15 @@ public class TestSysInfoLinux {
|
|||
updateStatFile(uTime, nTime, sTime);
|
||||
assertEquals(plugin.getCumulativeCpuTime(),
|
||||
FAKE_JIFFY_LENGTH * (uTime + nTime + sTime));
|
||||
assertEquals(plugin.getCpuUsage(), 6.25F, 0.0);
|
||||
assertEquals(plugin.getCpuUsagePercentage(), 6.25F, 0.0);
|
||||
assertEquals(plugin.getNumVCoresUsed(), 0.5F, 0.0);
|
||||
|
||||
// Advance the time and sample again. This time, we call getCpuUsage() only.
|
||||
// Advance the time and sample again. This time, we call getCpuUsagePercentage() only.
|
||||
uTime += 600L;
|
||||
plugin.advanceTime(300L);
|
||||
updateStatFile(uTime, nTime, sTime);
|
||||
assertEquals(plugin.getCpuUsage(), 25F, 0.0);
|
||||
assertEquals(plugin.getCpuUsagePercentage(), 25F, 0.0);
|
||||
assertEquals(plugin.getNumVCoresUsed(), 2F, 0.0);
|
||||
|
||||
// Advance very short period of time (one jiffy length).
|
||||
// In this case, CPU usage should not be updated.
|
||||
|
@ -252,7 +257,10 @@ public class TestSysInfoLinux {
|
|||
updateStatFile(uTime, nTime, sTime);
|
||||
assertEquals(plugin.getCumulativeCpuTime(),
|
||||
FAKE_JIFFY_LENGTH * (uTime + nTime + sTime));
|
||||
assertEquals(plugin.getCpuUsage(), 25F, 0.0); // CPU usage is not updated.
|
||||
assertEquals(
|
||||
plugin.getCpuUsagePercentage(), 25F, 0.0); // CPU usage is not updated.
|
||||
assertEquals(
|
||||
plugin.getNumVCoresUsed(), 2F, 0.0); // CPU usage is not updated.
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -58,7 +58,10 @@ public class TestSysInfoWindows {
|
|||
assertEquals(2805000L, tester.getCpuFrequency());
|
||||
assertEquals(6261812L, tester.getCumulativeCpuTime());
|
||||
// undef on first call
|
||||
assertEquals(-1.0, tester.getCpuUsage(), 0.0);
|
||||
assertEquals((float)CpuTimeTracker.UNAVAILABLE,
|
||||
tester.getCpuUsagePercentage(), 0.0);
|
||||
assertEquals((float)CpuTimeTracker.UNAVAILABLE,
|
||||
tester.getNumVCoresUsed(), 0.0);
|
||||
}
|
||||
|
||||
@Test(timeout = 10000)
|
||||
|
@ -70,22 +73,60 @@ public class TestSysInfoWindows {
|
|||
tester.getAvailablePhysicalMemorySize();
|
||||
// verify information has been refreshed
|
||||
assertEquals(6400417792L, tester.getAvailablePhysicalMemorySize());
|
||||
assertEquals(-1.0, tester.getCpuUsage(), 0.0);
|
||||
assertEquals((float)CpuTimeTracker.UNAVAILABLE,
|
||||
tester.getCpuUsagePercentage(), 0.0);
|
||||
assertEquals((float)CpuTimeTracker.UNAVAILABLE,
|
||||
tester.getNumVCoresUsed(), 0.0);
|
||||
|
||||
tester.setSysinfoString(
|
||||
"17177038848,8589467648,15232745472,5400417792,1,2805000,6263012\r\n");
|
||||
tester.getAvailablePhysicalMemorySize();
|
||||
// verify information has not been refreshed
|
||||
assertEquals(6400417792L, tester.getAvailablePhysicalMemorySize());
|
||||
assertEquals(-1.0, tester.getCpuUsage(), 0.0);
|
||||
assertEquals((float)CpuTimeTracker.UNAVAILABLE,
|
||||
tester.getCpuUsagePercentage(), 0.0);
|
||||
assertEquals((float)CpuTimeTracker.UNAVAILABLE,
|
||||
tester.getNumVCoresUsed(), 0.0);
|
||||
|
||||
// advance clock
|
||||
tester.advance(SysInfoWindows.REFRESH_INTERVAL_MS + 1);
|
||||
|
||||
// verify information has been refreshed
|
||||
assertEquals(5400417792L, tester.getAvailablePhysicalMemorySize());
|
||||
assertEquals((6263012 - 6261812) / (SysInfoWindows.REFRESH_INTERVAL_MS + 1f),
|
||||
tester.getCpuUsage(), 0.0);
|
||||
assertEquals((6263012 - 6261812) * 100F /
|
||||
(SysInfoWindows.REFRESH_INTERVAL_MS + 1f) / 1,
|
||||
tester.getCpuUsagePercentage(), 0.0);
|
||||
assertEquals((6263012 - 6261812) /
|
||||
(SysInfoWindows.REFRESH_INTERVAL_MS + 1f) / 1,
|
||||
tester.getNumVCoresUsed(), 0.0);
|
||||
}
|
||||
|
||||
@Test(timeout = 10000)
|
||||
public void refreshAndCpuUsageMulticore() throws InterruptedException {
|
||||
// test with 12 cores
|
||||
SysInfoWindowsMock tester = new SysInfoWindowsMock();
|
||||
tester.setSysinfoString(
|
||||
"17177038848,8589467648,15232745472,6400417792,12,2805000,6261812\r\n");
|
||||
// verify information has been refreshed
|
||||
assertEquals(6400417792L, tester.getAvailablePhysicalMemorySize());
|
||||
|
||||
tester.setSysinfoString(
|
||||
"17177038848,8589467648,15232745472,5400417792,12,2805000,6263012\r\n");
|
||||
// verify information has not been refreshed
|
||||
assertEquals(6400417792L, tester.getAvailablePhysicalMemorySize());
|
||||
|
||||
// advance clock
|
||||
tester.advance(SysInfoWindows.REFRESH_INTERVAL_MS + 1);
|
||||
|
||||
// verify information has been refreshed
|
||||
assertEquals(5400417792L, tester.getAvailablePhysicalMemorySize());
|
||||
// verify information has been refreshed
|
||||
assertEquals((6263012 - 6261812) * 100F /
|
||||
(SysInfoWindows.REFRESH_INTERVAL_MS + 1f) / 12,
|
||||
tester.getCpuUsagePercentage(), 0.0);
|
||||
assertEquals((6263012 - 6261812) /
|
||||
(SysInfoWindows.REFRESH_INTERVAL_MS + 1f),
|
||||
tester.getNumVCoresUsed(), 0.0);
|
||||
}
|
||||
|
||||
@Test(timeout = 10000)
|
||||
|
|
|
@ -120,7 +120,7 @@ public class DummyResourceCalculatorPlugin extends ResourceCalculatorPlugin {
|
|||
|
||||
/** {@inheritDoc} */
|
||||
@Override
|
||||
public float getCpuUsage() {
|
||||
public float getCpuUsagePercentage() {
|
||||
return getConf().getFloat(CPU_USAGE, -1);
|
||||
}
|
||||
|
||||
|
|
|
@ -120,12 +120,21 @@ public class ResourceCalculatorPlugin extends Configured {
|
|||
}
|
||||
|
||||
/**
|
||||
* Obtain the CPU usage % of the machine. Return -1 if it is unavailable
|
||||
* Obtain the CPU usage % of the machine. Return -1 if it is unavailable.
|
||||
*
|
||||
* @return CPU usage in %
|
||||
*/
|
||||
public float getCpuUsage() {
|
||||
return sys.getCpuUsage();
|
||||
public float getCpuUsagePercentage() {
|
||||
return sys.getCpuUsagePercentage();
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain the number of VCores used. Return -1 if it is unavailable.
|
||||
*
|
||||
* @return Number of VCores used a percentage (from 0 to #VCores)
|
||||
*/
|
||||
public float getNumVCoresUsed() {
|
||||
return sys.getNumVCoresUsed();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -141,12 +141,12 @@ public class NodeResourceMonitorImpl extends AbstractService implements
|
|||
long vmem =
|
||||
resourceCalculatorPlugin.getVirtualMemorySize()
|
||||
- resourceCalculatorPlugin.getAvailableVirtualMemorySize();
|
||||
float cpu = resourceCalculatorPlugin.getCpuUsage();
|
||||
float vcores = resourceCalculatorPlugin.getNumVCoresUsed();
|
||||
nodeUtilization =
|
||||
ResourceUtilization.newInstance(
|
||||
(int) (pmem >> 20), // B -> MB
|
||||
(int) (vmem >> 20), // B -> MB
|
||||
cpu); // 1 CPU at 100% is 1
|
||||
vcores); // Used Virtual Cores
|
||||
|
||||
try {
|
||||
Thread.sleep(monitoringInterval);
|
||||
|
|
|
@ -63,7 +63,7 @@ public class MockResourceCalculatorPlugin extends ResourceCalculatorPlugin {
|
|||
}
|
||||
|
||||
@Override
|
||||
public float getCpuUsage() {
|
||||
public float getCpuUsagePercentage() {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -73,7 +73,7 @@ public class TestNodeManagerHardwareUtils {
|
|||
}
|
||||
|
||||
@Override
|
||||
public float getCpuUsage() {
|
||||
public float getCpuUsagePercentage() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue