Add basic cgroup CPU metrics

This commit adds basic cgroup CPU metrics to the node stats API.

Relates #21029
This commit is contained in:
Jason Tedor 2016-10-24 08:26:56 -04:00 committed by GitHub
parent 0a410d3916
commit 3d642ab0eb
7 changed files with 674 additions and 13 deletions

View File

@ -20,8 +20,6 @@
package org.elasticsearch.monitor.os;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.logging.log4j.util.Supplier;
import org.apache.lucene.util.Constants;
import org.elasticsearch.common.SuppressForbidden;
import org.elasticsearch.common.io.PathUtils;
@ -33,7 +31,13 @@ import java.lang.management.ManagementFactory;
import java.lang.management.OperatingSystemMXBean;
import java.lang.reflect.Method;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class OsProbe {
@ -136,7 +140,9 @@ public class OsProbe {
try {
return new double[]{Double.parseDouble(fields[0]), Double.parseDouble(fields[1]), Double.parseDouble(fields[2])};
} catch (final NumberFormatException e) {
logger.debug((Supplier<?>) () -> new ParameterizedMessage("error parsing /proc/loadavg [{}]", procLoadAvg), e);
if (logger.isDebugEnabled()) {
logger.debug(String.format(Locale.ROOT, "error parsing /proc/loadavg [%s]", procLoadAvg), e);
}
}
}
// fallback
@ -180,6 +186,272 @@ public class OsProbe {
return Probes.getLoadAndScaleToPercent(getSystemCpuLoad, osMxBean);
}
/**
* Reads a file containing a single line.
*
* @param path path to the file to read
* @return the single line
* @throws IOException if an I/O exception occurs reading the file
*/
private String readSingleLine(final Path path) throws IOException {
final List<String> lines = Files.readAllLines(path);
assert lines != null && lines.size() == 1;
return lines.get(0);
}
// pattern for lines in /proc/self/cgroup
private static final Pattern CONTROL_GROUP_PATTERN = Pattern.compile("\\d+:([^:,]+(?:,[^:,]+)?):(/.*)");
/**
* A map of the control groups to which the Elasticsearch process
* belongs. Note that this is a map because the control groups can
* vary from subsystem to subsystem. Additionally, this map can not
* be cached because a running process can be reclassified.
*
* @return a map from subsystems to the control group for the
* Elasticsearch process.
* @throws IOException if an I/O exception occurs reading
* {@code /proc/self/cgroup}
*/
private Map<String, String> getControlGroups() throws IOException {
final List<String> lines = readProcSelfCgroup();
final Map<String, String> controllerMap = new HashMap<>();
for (final String line : lines) {
final Matcher matcher = CONTROL_GROUP_PATTERN.matcher(line);
// note that Matcher#matches must be invoked as
// matching is lazy; this can not happen in an assert
// as assertions might not be enabled
final boolean matches = matcher.matches();
assert matches : line;
// at this point we have captured the subsystems and the
// control group
final String[] controllers = matcher.group(1).split(",");
for (final String controller : controllers) {
controllerMap.put(controller, matcher.group(2));
}
}
return controllerMap;
}
/**
* The lines from {@code /proc/self/cgroup}. This file represents
* the control groups to which the Elasticsearch process belongs.
* Each line in this file represents a control group hierarchy of
* the form
* <p>
* {@code \d+:([^:,]+(?:,[^:,]+)?):(/.*)}
* <p>
* with the first field representing the hierarchy ID, the second
* field representing a comma-separated list of the subsystems
* bound to the hierarchy, and the last field representing the
* control group.
*
* @return the lines from {@code /proc/self/cgroup}
* @throws IOException if an I/O exception occurs reading
* {@code /proc/self/cgroup}
*/
@SuppressForbidden(reason = "access /proc/self/cgroup")
List<String> readProcSelfCgroup() throws IOException {
final List<String> lines = Files.readAllLines(PathUtils.get("/proc/self/cgroup"));
assert lines != null && !lines.isEmpty();
return lines;
}
/**
* The total CPU time in nanoseconds consumed by all tasks in the
* cgroup to which the Elasticsearch process belongs for the
* {@code cpuacct} subsystem.
*
* @param controlGroup the control group for the Elasticsearch
* process for the {@code cpuacct} subsystem
* @return the total CPU time in nanoseconds
* @throws IOException if an I/O exception occurs reading
* {@code cpuacct.usage} for the control group
*/
private long getCgroupCpuAcctUsageNanos(final String controlGroup) throws IOException {
return Long.parseLong(readSysFsCgroupCpuAcctCpuAcctUsage(controlGroup));
}
/**
* Returns the line from {@code cpuacct.usage} for the control
* group to which the Elasticsearch process belongs for the
* {@code cpuacct} subsystem. This line represents the total CPU
* time in nanoseconds consumed by all tasks in the same control
* group.
*
* @param controlGroup the control group to which the Elasticsearch
* process belongs for the {@code cpuacct}
* subsystem
* @return the line from {@code cpuacct.usage}
* @throws IOException if an I/O exception occurs reading
* {@code cpuacct.usage} for the control group
*/
@SuppressForbidden(reason = "access /sys/fs/cgroup/cpuacct")
String readSysFsCgroupCpuAcctCpuAcctUsage(final String controlGroup) throws IOException {
return readSingleLine(PathUtils.get("/sys/fs/cgroup/cpuacct", controlGroup, "cpuacct.usage"));
}
/**
* The total period of time in microseconds for how frequently the
* Elasticsearch control group's access to CPU resources will be
* reallocated.
*
* @param controlGroup the control group for the Elasticsearch
* process for the {@code cpuacct} subsystem
* @return the CFS quota period in microseconds
* @throws IOException if an I/O exception occurs reading
* {@code cpu.cfs_period_us} for the control group
*/
private long getCgroupCpuAcctCpuCfsPeriodMicros(final String controlGroup) throws IOException {
return Long.parseLong(readSysFsCgroupCpuAcctCpuCfsPeriod(controlGroup));
}
/**
* Returns the line from {@code cpu.cfs_period_us} for the control
* group to which the Elasticsearch process belongs for the
* {@code cpu} subsystem. This line represents the period of time
* in microseconds for how frequently the control group's access to
* CPU resources will be reallocated.
*
* @param controlGroup the control group to which the Elasticsearch
* process belongs for the {@code cpu}
* subsystem
* @return the line from {@code cpu.cfs_period_us}
* @throws IOException if an I/O exception occurs reading
* {@code cpu.cfs_period_us} for the control group
*/
@SuppressForbidden(reason = "access /sys/fs/cgroup/cpu")
String readSysFsCgroupCpuAcctCpuCfsPeriod(final String controlGroup) throws IOException {
return readSingleLine(PathUtils.get("/sys/fs/cgroup/cpu", controlGroup, "cpu.cfs_period_us"));
}
/**
* The total time in microseconds that all tasks in the
* Elasticsearch control group can run during one period as
* specified by {@code cpu.cfs_period_us}.
*
* @param controlGroup the control group for the Elasticsearch
* process for the {@code cpuacct} subsystem
* @return the CFS quota in microseconds
* @throws IOException if an I/O exception occurs reading
* {@code cpu.cfs_quota_us} for the control group
*/
private long getCGroupCpuAcctCpuCfsQuotaMicros(final String controlGroup) throws IOException {
return Long.parseLong(readSysFsCgroupCpuAcctCpuAcctCfsQuota(controlGroup));
}
/**
* Returns the line from {@code cpu.cfs_quota_us} for the control
* group to which the Elasticsearch process belongs for the
* {@code cpu} subsystem. This line represents the total time in
* microseconds that all tasks in the control group can run during
* one period as specified by {@code cpu.cfs_period_us}.
*
* @param controlGroup the control group to which the Elasticsearch
* process belongs for the {@code cpu}
* subsystem
* @return the line from {@code cpu.cfs_quota_us}
* @throws IOException if an I/O exception occurs reading
* {@code cpu.cfs_quota_us} for the control group
*/
@SuppressForbidden(reason = "access /sys/fs/cgroup/cpu")
String readSysFsCgroupCpuAcctCpuAcctCfsQuota(final String controlGroup) throws IOException {
return readSingleLine(PathUtils.get("/sys/fs/cgroup/cpu", controlGroup, "cpu.cfs_quota_us"));
}
/**
* The CPU time statistics for all tasks in the Elasticsearch
* control group.
*
* @param controlGroup the control group for the Elasticsearch
* process for the {@code cpuacct} subsystem
* @return the CPU time statistics
* @throws IOException if an I/O exception occurs reading
* {@code cpu.stat} for the control group
*/
private OsStats.Cgroup.CpuStat getCgroupCpuAcctCpuStat(final String controlGroup) throws IOException {
final List<String> lines = readSysFsCgroupCpuAcctCpuStat(controlGroup);
long numberOfPeriods = -1;
long numberOfTimesThrottled = -1;
long timeThrottledNanos = -1;
for (final String line : lines) {
final String[] fields = line.split("\\s+");
switch (fields[0]) {
case "nr_periods":
numberOfPeriods = Long.parseLong(fields[1]);
break;
case "nr_throttled":
numberOfTimesThrottled = Long.parseLong(fields[1]);
break;
case "throttled_time":
timeThrottledNanos = Long.parseLong(fields[1]);
break;
}
}
assert numberOfPeriods != -1;
assert numberOfTimesThrottled != -1;
assert timeThrottledNanos != -1;
return new OsStats.Cgroup.CpuStat(numberOfPeriods, numberOfTimesThrottled, timeThrottledNanos);
}
/**
* Returns the lines from {@code cpu.stat} for the control
* group to which the Elasticsearch process belongs for the
* {@code cpu} subsystem. These lines represent the CPU time
* statistics and have the form
*
* nr_periods \d+
* nr_throttled \d+
* throttled_time \d+
*
* where {@code nr_periods} is the number of period intervals
* as specified by {@code cpu.cfs_period_us} that have elapsed,
* {@code nr_throttled} is the number of times tasks in the given
* control group have been throttled, and {@code throttled_time} is
* the total time in nanoseconds for which tasks in the given
* control group have been throttled.
*
* @param controlGroup the control group to which the Elasticsearch
* process belongs for the {@code cpu}
* subsystem
*
* @return the lines from {@code cpu.stat}
* @throws IOException if an I/O exception occurs reading
* {@code cpu.stat} for the control group
*/
@SuppressForbidden(reason = "access /sys/fs/cgroup/cpu")
List<String> readSysFsCgroupCpuAcctCpuStat(final String controlGroup) throws IOException {
final List<String> lines = Files.readAllLines(PathUtils.get("/sys/fs/cgroup/cpu", controlGroup, "cpu.stat"));
assert lines != null && lines.size() == 3;
return lines;
}
/**
* Basic cgroup stats.
*
* @return basic cgroup stats, or {@code null} if an I/O exception
* occurred reading the cgroup stats
*/
private OsStats.Cgroup getCgroup() {
try {
final Map<String, String> controllerMap = getControlGroups();
final String cpuControlGroup = controllerMap.get("cpu");
final String cpuAcctControlGroup = controllerMap.get("cpuacct");
return new OsStats.Cgroup(
cpuAcctControlGroup,
getCgroupCpuAcctUsageNanos(cpuAcctControlGroup),
cpuControlGroup,
getCgroupCpuAcctCpuCfsPeriodMicros(cpuControlGroup),
getCGroupCpuAcctCpuCfsQuotaMicros(cpuControlGroup),
getCgroupCpuAcctCpuStat(cpuControlGroup));
} catch (final IOException e) {
if (logger.isDebugEnabled()) {
logger.debug("error reading control group stats", e);
}
return null;
}
}
private static class OsProbeHolder {
private static final OsProbe INSTANCE = new OsProbe();
}
@ -189,6 +461,7 @@ public class OsProbe {
}
OsProbe() {
}
private final Logger logger = ESLoggerFactory.getLogger(getClass());
@ -199,10 +472,11 @@ public class OsProbe {
}
public OsStats osStats() {
OsStats.Cpu cpu = new OsStats.Cpu(getSystemCpuPercent(), getSystemLoadAverage());
OsStats.Mem mem = new OsStats.Mem(getTotalPhysicalMemorySize(), getFreePhysicalMemorySize());
OsStats.Swap swap = new OsStats.Swap(getTotalSwapSpaceSize(), getFreeSwapSpaceSize());
return new OsStats(System.currentTimeMillis(), cpu, mem , swap);
final OsStats.Cpu cpu = new OsStats.Cpu(getSystemCpuPercent(), getSystemLoadAverage());
final OsStats.Mem mem = new OsStats.Mem(getTotalPhysicalMemorySize(), getFreePhysicalMemorySize());
final OsStats.Swap swap = new OsStats.Swap(getTotalSwapSpaceSize(), getFreeSwapSpaceSize());
final OsStats.Cgroup cgroup = Constants.LINUX ? getCgroup() : null;
return new OsStats(System.currentTimeMillis(), cpu, mem, swap, cgroup);
}
/**
@ -217,4 +491,5 @@ public class OsProbe {
return null;
}
}
}

View File

@ -36,12 +36,14 @@ public class OsStats implements Writeable, ToXContent {
private final Cpu cpu;
private final Mem mem;
private final Swap swap;
private final Cgroup cgroup;
public OsStats(long timestamp, Cpu cpu, Mem mem, Swap swap) {
public OsStats(final long timestamp, final Cpu cpu, final Mem mem, final Swap swap, final Cgroup cgroup) {
this.timestamp = timestamp;
this.cpu = Objects.requireNonNull(cpu, "cpu must not be null");
this.mem = Objects.requireNonNull(mem, "mem must not be null");;
this.swap = Objects.requireNonNull(swap, "swap must not be null");;
this.cpu = Objects.requireNonNull(cpu);
this.mem = Objects.requireNonNull(mem);
this.swap = Objects.requireNonNull(swap);
this.cgroup = cgroup;
}
public OsStats(StreamInput in) throws IOException {
@ -49,6 +51,7 @@ public class OsStats implements Writeable, ToXContent {
this.cpu = new Cpu(in);
this.mem = new Mem(in);
this.swap = new Swap(in);
this.cgroup = in.readOptionalWriteable(Cgroup::new);
}
@Override
@ -57,6 +60,7 @@ public class OsStats implements Writeable, ToXContent {
cpu.writeTo(out);
mem.writeTo(out);
swap.writeTo(out);
out.writeOptionalWriteable(cgroup);
}
public long getTimestamp() {
@ -73,6 +77,10 @@ public class OsStats implements Writeable, ToXContent {
return swap;
}
public Cgroup getCgroup() {
return cgroup;
}
static final class Fields {
static final String OS = "os";
static final String TIMESTAMP = "timestamp";
@ -103,6 +111,9 @@ public class OsStats implements Writeable, ToXContent {
cpu.toXContent(builder, params);
mem.toXContent(builder, params);
swap.toXContent(builder, params);
if (cgroup != null) {
cgroup.toXContent(builder, params);
}
builder.endObject();
return builder;
}
@ -265,7 +276,211 @@ public class OsStats implements Writeable, ToXContent {
}
}
/**
* Encapsulates basic cgroup statistics.
*/
public static class Cgroup implements Writeable, ToXContent {
private final String cpuAcctControlGroup;
private final long cpuAcctUsageNanos;
private final String cpuControlGroup;
private final long cpuCfsPeriodMicros;
private final long cpuCfsQuotaMicros;
private final CpuStat cpuStat;
/**
* The control group for the {@code cpuacct} subsystem.
*
* @return the control group
*/
public String getCpuAcctControlGroup() {
return cpuAcctControlGroup;
}
/**
* The total CPU time consumed by all tasks in the
* {@code cpuacct} control group from
* {@link Cgroup#cpuAcctControlGroup}.
*
* @return the total CPU time in nanoseconds
*/
public long getCpuAcctUsageNanos() {
return cpuAcctUsageNanos;
}
/**
* The control group for the {@code cpu} subsystem.
*
* @return the control group
*/
public String getCpuControlGroup() {
return cpuControlGroup;
}
/**
* The period of time for how frequently the control group from
* {@link Cgroup#cpuControlGroup} has its access to CPU
* resources reallocated.
*
* @return the period of time in microseconds
*/
public long getCpuCfsPeriodMicros() {
return cpuCfsPeriodMicros;
}
/**
* The total amount of time for which all tasks in the control
* group from {@link Cgroup#cpuControlGroup} can run in one
* period as represented by {@link Cgroup#cpuCfsPeriodMicros}.
*
* @return the total amount of time in microseconds
*/
public long getCpuCfsQuotaMicros() {
return cpuCfsQuotaMicros;
}
/**
* The CPU time statistics. See {@link CpuStat}.
*
* @return the CPU time statistics.
*/
public CpuStat getCpuStat() {
return cpuStat;
}
public Cgroup(
final String cpuAcctControlGroup,
final long cpuAcctUsageNanos,
final String cpuControlGroup,
final long cpuCfsPeriodMicros,
final long cpuCfsQuotaMicros,
final CpuStat cpuStat) {
this.cpuAcctControlGroup = cpuAcctControlGroup;
this.cpuAcctUsageNanos = cpuAcctUsageNanos;
this.cpuControlGroup = cpuControlGroup;
this.cpuCfsPeriodMicros = cpuCfsPeriodMicros;
this.cpuCfsQuotaMicros = cpuCfsQuotaMicros;
this.cpuStat = Objects.requireNonNull(cpuStat);
}
Cgroup(final StreamInput in) throws IOException {
cpuAcctControlGroup = in.readString();
cpuAcctUsageNanos = in.readLong();
cpuControlGroup = in.readString();
cpuCfsPeriodMicros = in.readLong();
cpuCfsQuotaMicros = in.readLong();
cpuStat = new CpuStat(in);
}
@Override
public void writeTo(final StreamOutput out) throws IOException {
out.writeString(cpuAcctControlGroup);
out.writeLong(cpuAcctUsageNanos);
out.writeString(cpuControlGroup);
out.writeLong(cpuCfsPeriodMicros);
out.writeLong(cpuCfsQuotaMicros);
cpuStat.writeTo(out);
}
@Override
public XContentBuilder toXContent(final XContentBuilder builder, final Params params) throws IOException {
builder.startObject("cgroup");
{
builder.startObject("cpuacct");
{
builder.field("control_group", cpuAcctControlGroup);
builder.field("usage_nanos", cpuAcctUsageNanos);
}
builder.endObject();
builder.startObject("cpu");
{
builder.field("control_group", cpuControlGroup);
builder.field("cfs_period_micros", cpuCfsPeriodMicros);
builder.field("cfs_quota_micros", cpuCfsQuotaMicros);
cpuStat.toXContent(builder, params);
}
builder.endObject();
}
builder.endObject();
return builder;
}
/**
* Encapsulates CPU time statistics.
*/
public static class CpuStat implements Writeable, ToXContent {
private final long numberOfElapsedPeriods;
private final long numberOfTimesThrottled;
private final long timeThrottledNanos;
/**
* The number of elapsed periods.
*
* @return the number of elapsed periods as measured by
* {@code cpu.cfs_period_us}
*/
public long getNumberOfElapsedPeriods() {
return numberOfElapsedPeriods;
}
/**
* The number of times tasks in the control group have been
* throttled.
*
* @return the number of times
*/
public long getNumberOfTimesThrottled() {
return numberOfTimesThrottled;
}
/**
* The total time duration for which tasks in the control
* group have been throttled.
*
* @return the total time in nanoseconds
*/
public long getTimeThrottledNanos() {
return timeThrottledNanos;
}
public CpuStat(final long numberOfElapsedPeriods, final long numberOfTimesThrottled, final long timeThrottledNanos) {
this.numberOfElapsedPeriods = numberOfElapsedPeriods;
this.numberOfTimesThrottled = numberOfTimesThrottled;
this.timeThrottledNanos = timeThrottledNanos;
}
CpuStat(final StreamInput in) throws IOException {
numberOfElapsedPeriods = in.readLong();
numberOfTimesThrottled = in.readLong();
timeThrottledNanos = in.readLong();
}
@Override
public void writeTo(final StreamOutput out) throws IOException {
out.writeLong(numberOfElapsedPeriods);
out.writeLong(numberOfTimesThrottled);
out.writeLong(timeThrottledNanos);
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject("stat");
{
builder.field("number_of_elapsed_periods", numberOfElapsedPeriods);
builder.field("number_of_times_throttled", numberOfTimesThrottled);
builder.field("time_throttled_nanos", timeThrottledNanos);
}
builder.endObject();
return builder;
}
}
}
public static short calculatePercentage(long used, long max) {
return max <= 0 ? 0 : (short) (Math.round((100d * used) / max));
}
}

View File

@ -121,4 +121,9 @@ grant {
// io stats on Linux
permission java.io.FilePermission "/proc/diskstats", "read";
// control group stats on Linux
permission java.io.FilePermission "/proc/self/cgroup", "read";
permission java.io.FilePermission "/sys/fs/cgroup/cpu/-", "read";
permission java.io.FilePermission "/sys/fs/cgroup/cpuacct/-", "read";
};

View File

@ -72,6 +72,30 @@ public class NodeStatsTests extends ESTestCase {
assertEquals(nodeStats.getOs().getMem().getFreePercent(), deserializedNodeStats.getOs().getMem().getFreePercent());
assertEquals(nodeStats.getOs().getMem().getUsedPercent(), deserializedNodeStats.getOs().getMem().getUsedPercent());
assertEquals(nodeStats.getOs().getCpu().getPercent(), deserializedNodeStats.getOs().getCpu().getPercent());
assertEquals(
nodeStats.getOs().getCgroup().getCpuAcctControlGroup(),
deserializedNodeStats.getOs().getCgroup().getCpuAcctControlGroup());
assertEquals(
nodeStats.getOs().getCgroup().getCpuAcctUsageNanos(),
deserializedNodeStats.getOs().getCgroup().getCpuAcctUsageNanos());
assertEquals(
nodeStats.getOs().getCgroup().getCpuControlGroup(),
deserializedNodeStats.getOs().getCgroup().getCpuControlGroup());
assertEquals(
nodeStats.getOs().getCgroup().getCpuCfsPeriodMicros(),
deserializedNodeStats.getOs().getCgroup().getCpuCfsPeriodMicros());
assertEquals(
nodeStats.getOs().getCgroup().getCpuCfsQuotaMicros(),
deserializedNodeStats.getOs().getCgroup().getCpuCfsQuotaMicros());
assertEquals(
nodeStats.getOs().getCgroup().getCpuStat().getNumberOfElapsedPeriods(),
deserializedNodeStats.getOs().getCgroup().getCpuStat().getNumberOfElapsedPeriods());
assertEquals(
nodeStats.getOs().getCgroup().getCpuStat().getNumberOfTimesThrottled(),
deserializedNodeStats.getOs().getCgroup().getCpuStat().getNumberOfTimesThrottled());
assertEquals(
nodeStats.getOs().getCgroup().getCpuStat().getTimeThrottledNanos(),
deserializedNodeStats.getOs().getCgroup().getCpuStat().getTimeThrottledNanos());
assertArrayEquals(nodeStats.getOs().getCpu().getLoadAverage(),
deserializedNodeStats.getOs().getCpu().getLoadAverage(), 0);
}
@ -264,7 +288,14 @@ public class NodeStatsTests extends ESTestCase {
}
osStats = new OsStats(System.currentTimeMillis(), new OsStats.Cpu(randomShort(), loadAverages),
new OsStats.Mem(randomLong(), randomLong()),
new OsStats.Swap(randomLong(), randomLong()));
new OsStats.Swap(randomLong(), randomLong()),
new OsStats.Cgroup(
randomAsciiOfLength(8),
randomPositiveLong(),
randomAsciiOfLength(8),
randomPositiveLong(),
randomPositiveLong(),
new OsStats.Cgroup.CpuStat(randomPositiveLong(), randomPositiveLong(), randomPositiveLong())));
}
ProcessStats processStats = frequently() ? new ProcessStats(randomPositiveLong(), randomPositiveLong(), randomPositiveLong(),
new ProcessStats.Cpu(randomShort(), randomPositiveLong()),

View File

@ -22,6 +22,9 @@ package org.elasticsearch.monitor.os;
import org.apache.lucene.util.Constants;
import org.elasticsearch.test.ESTestCase;
import java.util.Arrays;
import java.util.List;
import static org.hamcrest.Matchers.allOf;
import static org.hamcrest.Matchers.anyOf;
import static org.hamcrest.Matchers.both;
@ -30,8 +33,10 @@ import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.lessThanOrEqualTo;
import static org.hamcrest.Matchers.notNullValue;
public class OsProbeTests extends ESTestCase {
private final OsProbe probe = OsProbe.getInstance();
public void testOsInfo() {
@ -102,6 +107,20 @@ public class OsProbeTests extends ESTestCase {
assertThat(stats.getSwap().getFree().getBytes(), equalTo(0L));
assertThat(stats.getSwap().getUsed().getBytes(), equalTo(0L));
}
if (Constants.LINUX) {
if (stats.getCgroup() != null) {
assertThat(stats.getCgroup().getCpuAcctControlGroup(), notNullValue());
assertThat(stats.getCgroup().getCpuAcctUsageNanos(), greaterThan(0L));
assertThat(stats.getCgroup().getCpuCfsQuotaMicros(), anyOf(equalTo(-1L), greaterThanOrEqualTo(0L)));
assertThat(stats.getCgroup().getCpuCfsPeriodMicros(), greaterThanOrEqualTo(0L));
assertThat(stats.getCgroup().getCpuStat().getNumberOfElapsedPeriods(), greaterThanOrEqualTo(0L));
assertThat(stats.getCgroup().getCpuStat().getNumberOfTimesThrottled(), greaterThanOrEqualTo(0L));
assertThat(stats.getCgroup().getCpuStat().getTimeThrottledNanos(), greaterThanOrEqualTo(0L));
}
} else {
assertNull(stats.getCgroup());
}
}
public void testGetSystemLoadAverage() {
@ -125,4 +144,66 @@ public class OsProbeTests extends ESTestCase {
assertThat(systemLoadAverage[2], equalTo(Double.parseDouble("1.99")));
}
public void testCgroupProbe() {
assumeTrue("test runs on Linux only", Constants.LINUX);
final String hierarchy = randomAsciiOfLength(16);
final OsProbe probe = new OsProbe() {
@Override
List<String> readProcSelfCgroup() {
return Arrays.asList(
"11:freezer:/",
"10:net_cls,net_prio:/",
"9:pids:/",
"8:cpuset:/",
"7:blkio:/",
"6:memory:/",
"5:devices:/user.slice",
"4:hugetlb:/",
"3:perf_event:/",
"2:cpu,cpuacct:/" + hierarchy,
"1:name=systemd:/user.slice/user-1000.slice/session-2359.scope");
}
@Override
String readSysFsCgroupCpuAcctCpuAcctUsage(String controlGroup) {
assertThat(controlGroup, equalTo("/" + hierarchy));
return "364869866063112";
}
@Override
String readSysFsCgroupCpuAcctCpuCfsPeriod(String controlGroup) {
assertThat(controlGroup, equalTo("/" + hierarchy));
return "100000";
}
@Override
String readSysFsCgroupCpuAcctCpuAcctCfsQuota(String controlGroup) {
assertThat(controlGroup, equalTo("/" + hierarchy));
return "50000";
}
@Override
List<String> readSysFsCgroupCpuAcctCpuStat(String controlGroup) {
return Arrays.asList(
"nr_periods 17992",
"nr_throttled 1311",
"throttled_time 139298645489");
}
};
final OsStats.Cgroup cgroup = probe.osStats().getCgroup();
assertThat(cgroup.getCpuAcctControlGroup(), equalTo("/" + hierarchy));
assertThat(cgroup.getCpuAcctUsageNanos(), equalTo(364869866063112L));
assertThat(cgroup.getCpuControlGroup(), equalTo("/" + hierarchy));
assertThat(cgroup.getCpuCfsPeriodMicros(), equalTo(100000L));
assertThat(cgroup.getCpuCfsQuotaMicros(), equalTo(50000L));
assertThat(cgroup.getCpuStat().getNumberOfElapsedPeriods(), equalTo(17992L));
assertThat(cgroup.getCpuStat().getNumberOfTimesThrottled(), equalTo(1311L));
assertThat(cgroup.getCpuStat().getTimeThrottledNanos(), equalTo(139298645489L));
}
}

View File

@ -36,7 +36,14 @@ public class OsStatsTests extends ESTestCase {
OsStats.Cpu cpu = new OsStats.Cpu(randomShort(), loadAverages);
OsStats.Mem mem = new OsStats.Mem(randomLong(), randomLong());
OsStats.Swap swap = new OsStats.Swap(randomLong(), randomLong());
OsStats osStats = new OsStats(System.currentTimeMillis(), cpu, mem, swap);
OsStats.Cgroup cgroup = new OsStats.Cgroup(
randomAsciiOfLength(8),
randomPositiveLong(),
randomAsciiOfLength(8),
randomPositiveLong(),
randomPositiveLong(),
new OsStats.Cgroup.CpuStat(randomPositiveLong(), randomPositiveLong(), randomPositiveLong()));
OsStats osStats = new OsStats(System.currentTimeMillis(), cpu, mem, swap, cgroup);
try (BytesStreamOutput out = new BytesStreamOutput()) {
osStats.writeTo(out);
@ -49,7 +56,22 @@ public class OsStatsTests extends ESTestCase {
assertEquals(osStats.getMem().getTotal(), deserializedOsStats.getMem().getTotal());
assertEquals(osStats.getSwap().getFree(), deserializedOsStats.getSwap().getFree());
assertEquals(osStats.getSwap().getTotal(), deserializedOsStats.getSwap().getTotal());
assertEquals(osStats.getCgroup().getCpuAcctControlGroup(), deserializedOsStats.getCgroup().getCpuAcctControlGroup());
assertEquals(osStats.getCgroup().getCpuAcctUsageNanos(), deserializedOsStats.getCgroup().getCpuAcctUsageNanos());
assertEquals(osStats.getCgroup().getCpuControlGroup(), deserializedOsStats.getCgroup().getCpuControlGroup());
assertEquals(osStats.getCgroup().getCpuCfsPeriodMicros(), deserializedOsStats.getCgroup().getCpuCfsPeriodMicros());
assertEquals(osStats.getCgroup().getCpuCfsQuotaMicros(), deserializedOsStats.getCgroup().getCpuCfsQuotaMicros());
assertEquals(
osStats.getCgroup().getCpuStat().getNumberOfElapsedPeriods(),
deserializedOsStats.getCgroup().getCpuStat().getNumberOfElapsedPeriods());
assertEquals(
osStats.getCgroup().getCpuStat().getNumberOfTimesThrottled(),
deserializedOsStats.getCgroup().getCpuStat().getNumberOfTimesThrottled());
assertEquals(
osStats.getCgroup().getCpuStat().getTimeThrottledNanos(),
deserializedOsStats.getCgroup().getCpuStat().getTimeThrottledNanos());
}
}
}
}

View File

@ -217,6 +217,38 @@ the operating system:
`os.swap.used_in_bytes`::
Amount of used swap space in bytes
`os.cgroup.cpuacct.control_group` (Linux only)::
The `cpuacct` control group to which the Elasticsearch process
belongs
`os.cgroup.cpuacct.usage` (Linux only)::
The total CPU time (in nanoseconds) consumed by all tasks in the
same cgroup as the Elasticsearch process
`os.cgroup.cpu.control_group` (Linux only)::
The `cpu` control group to which the Elasticsearch process belongs
`os.cgroup.cpu.cfs_period_micros` (Linux only)::
The period of time (in microseconds) for how regularly all tasks in
the same cgroup as the Elasticsearch process should have their
access to CPU resources reallocated.
`os.cgroup.cpu.cfs_quota_micros` (Linux only)::
The total amount of time (in microseconds) for which all tasks in
the same cgroup as the Elasticsearch process can run during one
period `os.cgroup.cpu.cfs_period_micros`
`os.cgroup.cpu.stat.number_of_elapsed_periods` (Linux only)::
The number of reporting periods (as specified by
`os.cgroup.cpu.cfs_period_micros`) that have elapsed
`os.cgroup.cpu.stat.number_of_times_throttled` (Linux only)::
The number of times all tasks in the same cgroup as the
Elasticsearch process have been throttled.
`os.cgroup.cpu.stat.time_throttled_nanos` (Linux only)::
The total amount of time (in nanoseconds) for which all tasks in
the same cgroup as the Elasticsearch process have been throttled.
[float]
[[process-stats]]