mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-09 14:34:43 +00:00
Add basic cgroup CPU metrics
This commit adds basic cgroup CPU metrics to the node stats API. Relates #21029
This commit is contained in:
parent
0a410d3916
commit
3d642ab0eb
@ -20,8 +20,6 @@
|
||||
package org.elasticsearch.monitor.os;
|
||||
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.logging.log4j.message.ParameterizedMessage;
|
||||
import org.apache.logging.log4j.util.Supplier;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.elasticsearch.common.SuppressForbidden;
|
||||
import org.elasticsearch.common.io.PathUtils;
|
||||
@ -33,7 +31,13 @@ import java.lang.management.ManagementFactory;
|
||||
import java.lang.management.OperatingSystemMXBean;
|
||||
import java.lang.reflect.Method;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class OsProbe {
|
||||
|
||||
@ -136,7 +140,9 @@ public class OsProbe {
|
||||
try {
|
||||
return new double[]{Double.parseDouble(fields[0]), Double.parseDouble(fields[1]), Double.parseDouble(fields[2])};
|
||||
} catch (final NumberFormatException e) {
|
||||
logger.debug((Supplier<?>) () -> new ParameterizedMessage("error parsing /proc/loadavg [{}]", procLoadAvg), e);
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug(String.format(Locale.ROOT, "error parsing /proc/loadavg [%s]", procLoadAvg), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
// fallback
|
||||
@ -180,6 +186,272 @@ public class OsProbe {
|
||||
return Probes.getLoadAndScaleToPercent(getSystemCpuLoad, osMxBean);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a file containing a single line.
|
||||
*
|
||||
* @param path path to the file to read
|
||||
* @return the single line
|
||||
* @throws IOException if an I/O exception occurs reading the file
|
||||
*/
|
||||
private String readSingleLine(final Path path) throws IOException {
|
||||
final List<String> lines = Files.readAllLines(path);
|
||||
assert lines != null && lines.size() == 1;
|
||||
return lines.get(0);
|
||||
}
|
||||
|
||||
// pattern for lines in /proc/self/cgroup
|
||||
private static final Pattern CONTROL_GROUP_PATTERN = Pattern.compile("\\d+:([^:,]+(?:,[^:,]+)?):(/.*)");
|
||||
|
||||
/**
|
||||
* A map of the control groups to which the Elasticsearch process
|
||||
* belongs. Note that this is a map because the control groups can
|
||||
* vary from subsystem to subsystem. Additionally, this map can not
|
||||
* be cached because a running process can be reclassified.
|
||||
*
|
||||
* @return a map from subsystems to the control group for the
|
||||
* Elasticsearch process.
|
||||
* @throws IOException if an I/O exception occurs reading
|
||||
* {@code /proc/self/cgroup}
|
||||
*/
|
||||
private Map<String, String> getControlGroups() throws IOException {
|
||||
final List<String> lines = readProcSelfCgroup();
|
||||
final Map<String, String> controllerMap = new HashMap<>();
|
||||
for (final String line : lines) {
|
||||
final Matcher matcher = CONTROL_GROUP_PATTERN.matcher(line);
|
||||
// note that Matcher#matches must be invoked as
|
||||
// matching is lazy; this can not happen in an assert
|
||||
// as assertions might not be enabled
|
||||
final boolean matches = matcher.matches();
|
||||
assert matches : line;
|
||||
// at this point we have captured the subsystems and the
|
||||
// control group
|
||||
final String[] controllers = matcher.group(1).split(",");
|
||||
for (final String controller : controllers) {
|
||||
controllerMap.put(controller, matcher.group(2));
|
||||
}
|
||||
}
|
||||
return controllerMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* The lines from {@code /proc/self/cgroup}. This file represents
|
||||
* the control groups to which the Elasticsearch process belongs.
|
||||
* Each line in this file represents a control group hierarchy of
|
||||
* the form
|
||||
* <p>
|
||||
* {@code \d+:([^:,]+(?:,[^:,]+)?):(/.*)}
|
||||
* <p>
|
||||
* with the first field representing the hierarchy ID, the second
|
||||
* field representing a comma-separated list of the subsystems
|
||||
* bound to the hierarchy, and the last field representing the
|
||||
* control group.
|
||||
*
|
||||
* @return the lines from {@code /proc/self/cgroup}
|
||||
* @throws IOException if an I/O exception occurs reading
|
||||
* {@code /proc/self/cgroup}
|
||||
*/
|
||||
@SuppressForbidden(reason = "access /proc/self/cgroup")
|
||||
List<String> readProcSelfCgroup() throws IOException {
|
||||
final List<String> lines = Files.readAllLines(PathUtils.get("/proc/self/cgroup"));
|
||||
assert lines != null && !lines.isEmpty();
|
||||
return lines;
|
||||
}
|
||||
|
||||
/**
|
||||
* The total CPU time in nanoseconds consumed by all tasks in the
|
||||
* cgroup to which the Elasticsearch process belongs for the
|
||||
* {@code cpuacct} subsystem.
|
||||
*
|
||||
* @param controlGroup the control group for the Elasticsearch
|
||||
* process for the {@code cpuacct} subsystem
|
||||
* @return the total CPU time in nanoseconds
|
||||
* @throws IOException if an I/O exception occurs reading
|
||||
* {@code cpuacct.usage} for the control group
|
||||
*/
|
||||
private long getCgroupCpuAcctUsageNanos(final String controlGroup) throws IOException {
|
||||
return Long.parseLong(readSysFsCgroupCpuAcctCpuAcctUsage(controlGroup));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the line from {@code cpuacct.usage} for the control
|
||||
* group to which the Elasticsearch process belongs for the
|
||||
* {@code cpuacct} subsystem. This line represents the total CPU
|
||||
* time in nanoseconds consumed by all tasks in the same control
|
||||
* group.
|
||||
*
|
||||
* @param controlGroup the control group to which the Elasticsearch
|
||||
* process belongs for the {@code cpuacct}
|
||||
* subsystem
|
||||
* @return the line from {@code cpuacct.usage}
|
||||
* @throws IOException if an I/O exception occurs reading
|
||||
* {@code cpuacct.usage} for the control group
|
||||
*/
|
||||
@SuppressForbidden(reason = "access /sys/fs/cgroup/cpuacct")
|
||||
String readSysFsCgroupCpuAcctCpuAcctUsage(final String controlGroup) throws IOException {
|
||||
return readSingleLine(PathUtils.get("/sys/fs/cgroup/cpuacct", controlGroup, "cpuacct.usage"));
|
||||
}
|
||||
|
||||
/**
|
||||
* The total period of time in microseconds for how frequently the
|
||||
* Elasticsearch control group's access to CPU resources will be
|
||||
* reallocated.
|
||||
*
|
||||
* @param controlGroup the control group for the Elasticsearch
|
||||
* process for the {@code cpuacct} subsystem
|
||||
* @return the CFS quota period in microseconds
|
||||
* @throws IOException if an I/O exception occurs reading
|
||||
* {@code cpu.cfs_period_us} for the control group
|
||||
*/
|
||||
private long getCgroupCpuAcctCpuCfsPeriodMicros(final String controlGroup) throws IOException {
|
||||
return Long.parseLong(readSysFsCgroupCpuAcctCpuCfsPeriod(controlGroup));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the line from {@code cpu.cfs_period_us} for the control
|
||||
* group to which the Elasticsearch process belongs for the
|
||||
* {@code cpu} subsystem. This line represents the period of time
|
||||
* in microseconds for how frequently the control group's access to
|
||||
* CPU resources will be reallocated.
|
||||
*
|
||||
* @param controlGroup the control group to which the Elasticsearch
|
||||
* process belongs for the {@code cpu}
|
||||
* subsystem
|
||||
* @return the line from {@code cpu.cfs_period_us}
|
||||
* @throws IOException if an I/O exception occurs reading
|
||||
* {@code cpu.cfs_period_us} for the control group
|
||||
*/
|
||||
@SuppressForbidden(reason = "access /sys/fs/cgroup/cpu")
|
||||
String readSysFsCgroupCpuAcctCpuCfsPeriod(final String controlGroup) throws IOException {
|
||||
return readSingleLine(PathUtils.get("/sys/fs/cgroup/cpu", controlGroup, "cpu.cfs_period_us"));
|
||||
}
|
||||
|
||||
/**
|
||||
* The total time in microseconds that all tasks in the
|
||||
* Elasticsearch control group can run during one period as
|
||||
* specified by {@code cpu.cfs_period_us}.
|
||||
*
|
||||
* @param controlGroup the control group for the Elasticsearch
|
||||
* process for the {@code cpuacct} subsystem
|
||||
* @return the CFS quota in microseconds
|
||||
* @throws IOException if an I/O exception occurs reading
|
||||
* {@code cpu.cfs_quota_us} for the control group
|
||||
*/
|
||||
private long getCGroupCpuAcctCpuCfsQuotaMicros(final String controlGroup) throws IOException {
|
||||
return Long.parseLong(readSysFsCgroupCpuAcctCpuAcctCfsQuota(controlGroup));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the line from {@code cpu.cfs_quota_us} for the control
|
||||
* group to which the Elasticsearch process belongs for the
|
||||
* {@code cpu} subsystem. This line represents the total time in
|
||||
* microseconds that all tasks in the control group can run during
|
||||
* one period as specified by {@code cpu.cfs_period_us}.
|
||||
*
|
||||
* @param controlGroup the control group to which the Elasticsearch
|
||||
* process belongs for the {@code cpu}
|
||||
* subsystem
|
||||
* @return the line from {@code cpu.cfs_quota_us}
|
||||
* @throws IOException if an I/O exception occurs reading
|
||||
* {@code cpu.cfs_quota_us} for the control group
|
||||
*/
|
||||
@SuppressForbidden(reason = "access /sys/fs/cgroup/cpu")
|
||||
String readSysFsCgroupCpuAcctCpuAcctCfsQuota(final String controlGroup) throws IOException {
|
||||
return readSingleLine(PathUtils.get("/sys/fs/cgroup/cpu", controlGroup, "cpu.cfs_quota_us"));
|
||||
}
|
||||
|
||||
/**
|
||||
* The CPU time statistics for all tasks in the Elasticsearch
|
||||
* control group.
|
||||
*
|
||||
* @param controlGroup the control group for the Elasticsearch
|
||||
* process for the {@code cpuacct} subsystem
|
||||
* @return the CPU time statistics
|
||||
* @throws IOException if an I/O exception occurs reading
|
||||
* {@code cpu.stat} for the control group
|
||||
*/
|
||||
private OsStats.Cgroup.CpuStat getCgroupCpuAcctCpuStat(final String controlGroup) throws IOException {
|
||||
final List<String> lines = readSysFsCgroupCpuAcctCpuStat(controlGroup);
|
||||
long numberOfPeriods = -1;
|
||||
long numberOfTimesThrottled = -1;
|
||||
long timeThrottledNanos = -1;
|
||||
for (final String line : lines) {
|
||||
final String[] fields = line.split("\\s+");
|
||||
switch (fields[0]) {
|
||||
case "nr_periods":
|
||||
numberOfPeriods = Long.parseLong(fields[1]);
|
||||
break;
|
||||
case "nr_throttled":
|
||||
numberOfTimesThrottled = Long.parseLong(fields[1]);
|
||||
break;
|
||||
case "throttled_time":
|
||||
timeThrottledNanos = Long.parseLong(fields[1]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert numberOfPeriods != -1;
|
||||
assert numberOfTimesThrottled != -1;
|
||||
assert timeThrottledNanos != -1;
|
||||
return new OsStats.Cgroup.CpuStat(numberOfPeriods, numberOfTimesThrottled, timeThrottledNanos);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the lines from {@code cpu.stat} for the control
|
||||
* group to which the Elasticsearch process belongs for the
|
||||
* {@code cpu} subsystem. These lines represent the CPU time
|
||||
* statistics and have the form
|
||||
*
|
||||
* nr_periods \d+
|
||||
* nr_throttled \d+
|
||||
* throttled_time \d+
|
||||
*
|
||||
* where {@code nr_periods} is the number of period intervals
|
||||
* as specified by {@code cpu.cfs_period_us} that have elapsed,
|
||||
* {@code nr_throttled} is the number of times tasks in the given
|
||||
* control group have been throttled, and {@code throttled_time} is
|
||||
* the total time in nanoseconds for which tasks in the given
|
||||
* control group have been throttled.
|
||||
*
|
||||
* @param controlGroup the control group to which the Elasticsearch
|
||||
* process belongs for the {@code cpu}
|
||||
* subsystem
|
||||
*
|
||||
* @return the lines from {@code cpu.stat}
|
||||
* @throws IOException if an I/O exception occurs reading
|
||||
* {@code cpu.stat} for the control group
|
||||
*/
|
||||
@SuppressForbidden(reason = "access /sys/fs/cgroup/cpu")
|
||||
List<String> readSysFsCgroupCpuAcctCpuStat(final String controlGroup) throws IOException {
|
||||
final List<String> lines = Files.readAllLines(PathUtils.get("/sys/fs/cgroup/cpu", controlGroup, "cpu.stat"));
|
||||
assert lines != null && lines.size() == 3;
|
||||
return lines;
|
||||
}
|
||||
|
||||
/**
|
||||
* Basic cgroup stats.
|
||||
*
|
||||
* @return basic cgroup stats, or {@code null} if an I/O exception
|
||||
* occurred reading the cgroup stats
|
||||
*/
|
||||
private OsStats.Cgroup getCgroup() {
|
||||
try {
|
||||
final Map<String, String> controllerMap = getControlGroups();
|
||||
final String cpuControlGroup = controllerMap.get("cpu");
|
||||
final String cpuAcctControlGroup = controllerMap.get("cpuacct");
|
||||
return new OsStats.Cgroup(
|
||||
cpuAcctControlGroup,
|
||||
getCgroupCpuAcctUsageNanos(cpuAcctControlGroup),
|
||||
cpuControlGroup,
|
||||
getCgroupCpuAcctCpuCfsPeriodMicros(cpuControlGroup),
|
||||
getCGroupCpuAcctCpuCfsQuotaMicros(cpuControlGroup),
|
||||
getCgroupCpuAcctCpuStat(cpuControlGroup));
|
||||
} catch (final IOException e) {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("error reading control group stats", e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static class OsProbeHolder {
|
||||
private static final OsProbe INSTANCE = new OsProbe();
|
||||
}
|
||||
@ -189,6 +461,7 @@ public class OsProbe {
|
||||
}
|
||||
|
||||
OsProbe() {
|
||||
|
||||
}
|
||||
|
||||
private final Logger logger = ESLoggerFactory.getLogger(getClass());
|
||||
@ -199,10 +472,11 @@ public class OsProbe {
|
||||
}
|
||||
|
||||
public OsStats osStats() {
|
||||
OsStats.Cpu cpu = new OsStats.Cpu(getSystemCpuPercent(), getSystemLoadAverage());
|
||||
OsStats.Mem mem = new OsStats.Mem(getTotalPhysicalMemorySize(), getFreePhysicalMemorySize());
|
||||
OsStats.Swap swap = new OsStats.Swap(getTotalSwapSpaceSize(), getFreeSwapSpaceSize());
|
||||
return new OsStats(System.currentTimeMillis(), cpu, mem , swap);
|
||||
final OsStats.Cpu cpu = new OsStats.Cpu(getSystemCpuPercent(), getSystemLoadAverage());
|
||||
final OsStats.Mem mem = new OsStats.Mem(getTotalPhysicalMemorySize(), getFreePhysicalMemorySize());
|
||||
final OsStats.Swap swap = new OsStats.Swap(getTotalSwapSpaceSize(), getFreeSwapSpaceSize());
|
||||
final OsStats.Cgroup cgroup = Constants.LINUX ? getCgroup() : null;
|
||||
return new OsStats(System.currentTimeMillis(), cpu, mem, swap, cgroup);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -217,4 +491,5 @@ public class OsProbe {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -36,12 +36,14 @@ public class OsStats implements Writeable, ToXContent {
|
||||
private final Cpu cpu;
|
||||
private final Mem mem;
|
||||
private final Swap swap;
|
||||
private final Cgroup cgroup;
|
||||
|
||||
public OsStats(long timestamp, Cpu cpu, Mem mem, Swap swap) {
|
||||
public OsStats(final long timestamp, final Cpu cpu, final Mem mem, final Swap swap, final Cgroup cgroup) {
|
||||
this.timestamp = timestamp;
|
||||
this.cpu = Objects.requireNonNull(cpu, "cpu must not be null");
|
||||
this.mem = Objects.requireNonNull(mem, "mem must not be null");;
|
||||
this.swap = Objects.requireNonNull(swap, "swap must not be null");;
|
||||
this.cpu = Objects.requireNonNull(cpu);
|
||||
this.mem = Objects.requireNonNull(mem);
|
||||
this.swap = Objects.requireNonNull(swap);
|
||||
this.cgroup = cgroup;
|
||||
}
|
||||
|
||||
public OsStats(StreamInput in) throws IOException {
|
||||
@ -49,6 +51,7 @@ public class OsStats implements Writeable, ToXContent {
|
||||
this.cpu = new Cpu(in);
|
||||
this.mem = new Mem(in);
|
||||
this.swap = new Swap(in);
|
||||
this.cgroup = in.readOptionalWriteable(Cgroup::new);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -57,6 +60,7 @@ public class OsStats implements Writeable, ToXContent {
|
||||
cpu.writeTo(out);
|
||||
mem.writeTo(out);
|
||||
swap.writeTo(out);
|
||||
out.writeOptionalWriteable(cgroup);
|
||||
}
|
||||
|
||||
public long getTimestamp() {
|
||||
@ -73,6 +77,10 @@ public class OsStats implements Writeable, ToXContent {
|
||||
return swap;
|
||||
}
|
||||
|
||||
public Cgroup getCgroup() {
|
||||
return cgroup;
|
||||
}
|
||||
|
||||
static final class Fields {
|
||||
static final String OS = "os";
|
||||
static final String TIMESTAMP = "timestamp";
|
||||
@ -103,6 +111,9 @@ public class OsStats implements Writeable, ToXContent {
|
||||
cpu.toXContent(builder, params);
|
||||
mem.toXContent(builder, params);
|
||||
swap.toXContent(builder, params);
|
||||
if (cgroup != null) {
|
||||
cgroup.toXContent(builder, params);
|
||||
}
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
@ -265,7 +276,211 @@ public class OsStats implements Writeable, ToXContent {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Encapsulates basic cgroup statistics.
|
||||
*/
|
||||
public static class Cgroup implements Writeable, ToXContent {
|
||||
|
||||
private final String cpuAcctControlGroup;
|
||||
private final long cpuAcctUsageNanos;
|
||||
private final String cpuControlGroup;
|
||||
private final long cpuCfsPeriodMicros;
|
||||
private final long cpuCfsQuotaMicros;
|
||||
private final CpuStat cpuStat;
|
||||
|
||||
/**
|
||||
* The control group for the {@code cpuacct} subsystem.
|
||||
*
|
||||
* @return the control group
|
||||
*/
|
||||
public String getCpuAcctControlGroup() {
|
||||
return cpuAcctControlGroup;
|
||||
}
|
||||
|
||||
/**
|
||||
* The total CPU time consumed by all tasks in the
|
||||
* {@code cpuacct} control group from
|
||||
* {@link Cgroup#cpuAcctControlGroup}.
|
||||
*
|
||||
* @return the total CPU time in nanoseconds
|
||||
*/
|
||||
public long getCpuAcctUsageNanos() {
|
||||
return cpuAcctUsageNanos;
|
||||
}
|
||||
|
||||
/**
|
||||
* The control group for the {@code cpu} subsystem.
|
||||
*
|
||||
* @return the control group
|
||||
*/
|
||||
public String getCpuControlGroup() {
|
||||
return cpuControlGroup;
|
||||
}
|
||||
|
||||
/**
|
||||
* The period of time for how frequently the control group from
|
||||
* {@link Cgroup#cpuControlGroup} has its access to CPU
|
||||
* resources reallocated.
|
||||
*
|
||||
* @return the period of time in microseconds
|
||||
*/
|
||||
public long getCpuCfsPeriodMicros() {
|
||||
return cpuCfsPeriodMicros;
|
||||
}
|
||||
|
||||
/**
|
||||
* The total amount of time for which all tasks in the control
|
||||
* group from {@link Cgroup#cpuControlGroup} can run in one
|
||||
* period as represented by {@link Cgroup#cpuCfsPeriodMicros}.
|
||||
*
|
||||
* @return the total amount of time in microseconds
|
||||
*/
|
||||
public long getCpuCfsQuotaMicros() {
|
||||
return cpuCfsQuotaMicros;
|
||||
}
|
||||
|
||||
/**
|
||||
* The CPU time statistics. See {@link CpuStat}.
|
||||
*
|
||||
* @return the CPU time statistics.
|
||||
*/
|
||||
public CpuStat getCpuStat() {
|
||||
return cpuStat;
|
||||
}
|
||||
|
||||
public Cgroup(
|
||||
final String cpuAcctControlGroup,
|
||||
final long cpuAcctUsageNanos,
|
||||
final String cpuControlGroup,
|
||||
final long cpuCfsPeriodMicros,
|
||||
final long cpuCfsQuotaMicros,
|
||||
final CpuStat cpuStat) {
|
||||
this.cpuAcctControlGroup = cpuAcctControlGroup;
|
||||
this.cpuAcctUsageNanos = cpuAcctUsageNanos;
|
||||
this.cpuControlGroup = cpuControlGroup;
|
||||
this.cpuCfsPeriodMicros = cpuCfsPeriodMicros;
|
||||
this.cpuCfsQuotaMicros = cpuCfsQuotaMicros;
|
||||
this.cpuStat = Objects.requireNonNull(cpuStat);
|
||||
}
|
||||
|
||||
Cgroup(final StreamInput in) throws IOException {
|
||||
cpuAcctControlGroup = in.readString();
|
||||
cpuAcctUsageNanos = in.readLong();
|
||||
cpuControlGroup = in.readString();
|
||||
cpuCfsPeriodMicros = in.readLong();
|
||||
cpuCfsQuotaMicros = in.readLong();
|
||||
cpuStat = new CpuStat(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(final StreamOutput out) throws IOException {
|
||||
out.writeString(cpuAcctControlGroup);
|
||||
out.writeLong(cpuAcctUsageNanos);
|
||||
out.writeString(cpuControlGroup);
|
||||
out.writeLong(cpuCfsPeriodMicros);
|
||||
out.writeLong(cpuCfsQuotaMicros);
|
||||
cpuStat.writeTo(out);
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(final XContentBuilder builder, final Params params) throws IOException {
|
||||
builder.startObject("cgroup");
|
||||
{
|
||||
builder.startObject("cpuacct");
|
||||
{
|
||||
builder.field("control_group", cpuAcctControlGroup);
|
||||
builder.field("usage_nanos", cpuAcctUsageNanos);
|
||||
}
|
||||
builder.endObject();
|
||||
builder.startObject("cpu");
|
||||
{
|
||||
builder.field("control_group", cpuControlGroup);
|
||||
builder.field("cfs_period_micros", cpuCfsPeriodMicros);
|
||||
builder.field("cfs_quota_micros", cpuCfsQuotaMicros);
|
||||
cpuStat.toXContent(builder, params);
|
||||
}
|
||||
builder.endObject();
|
||||
}
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encapsulates CPU time statistics.
|
||||
*/
|
||||
public static class CpuStat implements Writeable, ToXContent {
|
||||
|
||||
private final long numberOfElapsedPeriods;
|
||||
private final long numberOfTimesThrottled;
|
||||
private final long timeThrottledNanos;
|
||||
|
||||
/**
|
||||
* The number of elapsed periods.
|
||||
*
|
||||
* @return the number of elapsed periods as measured by
|
||||
* {@code cpu.cfs_period_us}
|
||||
*/
|
||||
public long getNumberOfElapsedPeriods() {
|
||||
return numberOfElapsedPeriods;
|
||||
}
|
||||
|
||||
/**
|
||||
* The number of times tasks in the control group have been
|
||||
* throttled.
|
||||
*
|
||||
* @return the number of times
|
||||
*/
|
||||
public long getNumberOfTimesThrottled() {
|
||||
return numberOfTimesThrottled;
|
||||
}
|
||||
|
||||
/**
|
||||
* The total time duration for which tasks in the control
|
||||
* group have been throttled.
|
||||
*
|
||||
* @return the total time in nanoseconds
|
||||
*/
|
||||
public long getTimeThrottledNanos() {
|
||||
return timeThrottledNanos;
|
||||
}
|
||||
|
||||
public CpuStat(final long numberOfElapsedPeriods, final long numberOfTimesThrottled, final long timeThrottledNanos) {
|
||||
this.numberOfElapsedPeriods = numberOfElapsedPeriods;
|
||||
this.numberOfTimesThrottled = numberOfTimesThrottled;
|
||||
this.timeThrottledNanos = timeThrottledNanos;
|
||||
}
|
||||
|
||||
CpuStat(final StreamInput in) throws IOException {
|
||||
numberOfElapsedPeriods = in.readLong();
|
||||
numberOfTimesThrottled = in.readLong();
|
||||
timeThrottledNanos = in.readLong();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(final StreamOutput out) throws IOException {
|
||||
out.writeLong(numberOfElapsedPeriods);
|
||||
out.writeLong(numberOfTimesThrottled);
|
||||
out.writeLong(timeThrottledNanos);
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject("stat");
|
||||
{
|
||||
builder.field("number_of_elapsed_periods", numberOfElapsedPeriods);
|
||||
builder.field("number_of_times_throttled", numberOfTimesThrottled);
|
||||
builder.field("time_throttled_nanos", timeThrottledNanos);
|
||||
}
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static short calculatePercentage(long used, long max) {
|
||||
return max <= 0 ? 0 : (short) (Math.round((100d * used) / max));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -121,4 +121,9 @@ grant {
|
||||
|
||||
// io stats on Linux
|
||||
permission java.io.FilePermission "/proc/diskstats", "read";
|
||||
|
||||
// control group stats on Linux
|
||||
permission java.io.FilePermission "/proc/self/cgroup", "read";
|
||||
permission java.io.FilePermission "/sys/fs/cgroup/cpu/-", "read";
|
||||
permission java.io.FilePermission "/sys/fs/cgroup/cpuacct/-", "read";
|
||||
};
|
||||
|
@ -72,6 +72,30 @@ public class NodeStatsTests extends ESTestCase {
|
||||
assertEquals(nodeStats.getOs().getMem().getFreePercent(), deserializedNodeStats.getOs().getMem().getFreePercent());
|
||||
assertEquals(nodeStats.getOs().getMem().getUsedPercent(), deserializedNodeStats.getOs().getMem().getUsedPercent());
|
||||
assertEquals(nodeStats.getOs().getCpu().getPercent(), deserializedNodeStats.getOs().getCpu().getPercent());
|
||||
assertEquals(
|
||||
nodeStats.getOs().getCgroup().getCpuAcctControlGroup(),
|
||||
deserializedNodeStats.getOs().getCgroup().getCpuAcctControlGroup());
|
||||
assertEquals(
|
||||
nodeStats.getOs().getCgroup().getCpuAcctUsageNanos(),
|
||||
deserializedNodeStats.getOs().getCgroup().getCpuAcctUsageNanos());
|
||||
assertEquals(
|
||||
nodeStats.getOs().getCgroup().getCpuControlGroup(),
|
||||
deserializedNodeStats.getOs().getCgroup().getCpuControlGroup());
|
||||
assertEquals(
|
||||
nodeStats.getOs().getCgroup().getCpuCfsPeriodMicros(),
|
||||
deserializedNodeStats.getOs().getCgroup().getCpuCfsPeriodMicros());
|
||||
assertEquals(
|
||||
nodeStats.getOs().getCgroup().getCpuCfsQuotaMicros(),
|
||||
deserializedNodeStats.getOs().getCgroup().getCpuCfsQuotaMicros());
|
||||
assertEquals(
|
||||
nodeStats.getOs().getCgroup().getCpuStat().getNumberOfElapsedPeriods(),
|
||||
deserializedNodeStats.getOs().getCgroup().getCpuStat().getNumberOfElapsedPeriods());
|
||||
assertEquals(
|
||||
nodeStats.getOs().getCgroup().getCpuStat().getNumberOfTimesThrottled(),
|
||||
deserializedNodeStats.getOs().getCgroup().getCpuStat().getNumberOfTimesThrottled());
|
||||
assertEquals(
|
||||
nodeStats.getOs().getCgroup().getCpuStat().getTimeThrottledNanos(),
|
||||
deserializedNodeStats.getOs().getCgroup().getCpuStat().getTimeThrottledNanos());
|
||||
assertArrayEquals(nodeStats.getOs().getCpu().getLoadAverage(),
|
||||
deserializedNodeStats.getOs().getCpu().getLoadAverage(), 0);
|
||||
}
|
||||
@ -264,7 +288,14 @@ public class NodeStatsTests extends ESTestCase {
|
||||
}
|
||||
osStats = new OsStats(System.currentTimeMillis(), new OsStats.Cpu(randomShort(), loadAverages),
|
||||
new OsStats.Mem(randomLong(), randomLong()),
|
||||
new OsStats.Swap(randomLong(), randomLong()));
|
||||
new OsStats.Swap(randomLong(), randomLong()),
|
||||
new OsStats.Cgroup(
|
||||
randomAsciiOfLength(8),
|
||||
randomPositiveLong(),
|
||||
randomAsciiOfLength(8),
|
||||
randomPositiveLong(),
|
||||
randomPositiveLong(),
|
||||
new OsStats.Cgroup.CpuStat(randomPositiveLong(), randomPositiveLong(), randomPositiveLong())));
|
||||
}
|
||||
ProcessStats processStats = frequently() ? new ProcessStats(randomPositiveLong(), randomPositiveLong(), randomPositiveLong(),
|
||||
new ProcessStats.Cpu(randomShort(), randomPositiveLong()),
|
||||
|
@ -22,6 +22,9 @@ package org.elasticsearch.monitor.os;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import static org.hamcrest.Matchers.allOf;
|
||||
import static org.hamcrest.Matchers.anyOf;
|
||||
import static org.hamcrest.Matchers.both;
|
||||
@ -30,8 +33,10 @@ import static org.hamcrest.Matchers.greaterThan;
|
||||
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
import static org.hamcrest.Matchers.lessThanOrEqualTo;
|
||||
import static org.hamcrest.Matchers.notNullValue;
|
||||
|
||||
public class OsProbeTests extends ESTestCase {
|
||||
|
||||
private final OsProbe probe = OsProbe.getInstance();
|
||||
|
||||
public void testOsInfo() {
|
||||
@ -102,6 +107,20 @@ public class OsProbeTests extends ESTestCase {
|
||||
assertThat(stats.getSwap().getFree().getBytes(), equalTo(0L));
|
||||
assertThat(stats.getSwap().getUsed().getBytes(), equalTo(0L));
|
||||
}
|
||||
|
||||
if (Constants.LINUX) {
|
||||
if (stats.getCgroup() != null) {
|
||||
assertThat(stats.getCgroup().getCpuAcctControlGroup(), notNullValue());
|
||||
assertThat(stats.getCgroup().getCpuAcctUsageNanos(), greaterThan(0L));
|
||||
assertThat(stats.getCgroup().getCpuCfsQuotaMicros(), anyOf(equalTo(-1L), greaterThanOrEqualTo(0L)));
|
||||
assertThat(stats.getCgroup().getCpuCfsPeriodMicros(), greaterThanOrEqualTo(0L));
|
||||
assertThat(stats.getCgroup().getCpuStat().getNumberOfElapsedPeriods(), greaterThanOrEqualTo(0L));
|
||||
assertThat(stats.getCgroup().getCpuStat().getNumberOfTimesThrottled(), greaterThanOrEqualTo(0L));
|
||||
assertThat(stats.getCgroup().getCpuStat().getTimeThrottledNanos(), greaterThanOrEqualTo(0L));
|
||||
}
|
||||
} else {
|
||||
assertNull(stats.getCgroup());
|
||||
}
|
||||
}
|
||||
|
||||
public void testGetSystemLoadAverage() {
|
||||
@ -125,4 +144,66 @@ public class OsProbeTests extends ESTestCase {
|
||||
assertThat(systemLoadAverage[2], equalTo(Double.parseDouble("1.99")));
|
||||
}
|
||||
|
||||
public void testCgroupProbe() {
|
||||
assumeTrue("test runs on Linux only", Constants.LINUX);
|
||||
|
||||
final String hierarchy = randomAsciiOfLength(16);
|
||||
|
||||
final OsProbe probe = new OsProbe() {
|
||||
|
||||
@Override
|
||||
List<String> readProcSelfCgroup() {
|
||||
return Arrays.asList(
|
||||
"11:freezer:/",
|
||||
"10:net_cls,net_prio:/",
|
||||
"9:pids:/",
|
||||
"8:cpuset:/",
|
||||
"7:blkio:/",
|
||||
"6:memory:/",
|
||||
"5:devices:/user.slice",
|
||||
"4:hugetlb:/",
|
||||
"3:perf_event:/",
|
||||
"2:cpu,cpuacct:/" + hierarchy,
|
||||
"1:name=systemd:/user.slice/user-1000.slice/session-2359.scope");
|
||||
}
|
||||
|
||||
@Override
|
||||
String readSysFsCgroupCpuAcctCpuAcctUsage(String controlGroup) {
|
||||
assertThat(controlGroup, equalTo("/" + hierarchy));
|
||||
return "364869866063112";
|
||||
}
|
||||
|
||||
@Override
|
||||
String readSysFsCgroupCpuAcctCpuCfsPeriod(String controlGroup) {
|
||||
assertThat(controlGroup, equalTo("/" + hierarchy));
|
||||
return "100000";
|
||||
}
|
||||
|
||||
@Override
|
||||
String readSysFsCgroupCpuAcctCpuAcctCfsQuota(String controlGroup) {
|
||||
assertThat(controlGroup, equalTo("/" + hierarchy));
|
||||
return "50000";
|
||||
}
|
||||
|
||||
@Override
|
||||
List<String> readSysFsCgroupCpuAcctCpuStat(String controlGroup) {
|
||||
return Arrays.asList(
|
||||
"nr_periods 17992",
|
||||
"nr_throttled 1311",
|
||||
"throttled_time 139298645489");
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
final OsStats.Cgroup cgroup = probe.osStats().getCgroup();
|
||||
assertThat(cgroup.getCpuAcctControlGroup(), equalTo("/" + hierarchy));
|
||||
assertThat(cgroup.getCpuAcctUsageNanos(), equalTo(364869866063112L));
|
||||
assertThat(cgroup.getCpuControlGroup(), equalTo("/" + hierarchy));
|
||||
assertThat(cgroup.getCpuCfsPeriodMicros(), equalTo(100000L));
|
||||
assertThat(cgroup.getCpuCfsQuotaMicros(), equalTo(50000L));
|
||||
assertThat(cgroup.getCpuStat().getNumberOfElapsedPeriods(), equalTo(17992L));
|
||||
assertThat(cgroup.getCpuStat().getNumberOfTimesThrottled(), equalTo(1311L));
|
||||
assertThat(cgroup.getCpuStat().getTimeThrottledNanos(), equalTo(139298645489L));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -36,7 +36,14 @@ public class OsStatsTests extends ESTestCase {
|
||||
OsStats.Cpu cpu = new OsStats.Cpu(randomShort(), loadAverages);
|
||||
OsStats.Mem mem = new OsStats.Mem(randomLong(), randomLong());
|
||||
OsStats.Swap swap = new OsStats.Swap(randomLong(), randomLong());
|
||||
OsStats osStats = new OsStats(System.currentTimeMillis(), cpu, mem, swap);
|
||||
OsStats.Cgroup cgroup = new OsStats.Cgroup(
|
||||
randomAsciiOfLength(8),
|
||||
randomPositiveLong(),
|
||||
randomAsciiOfLength(8),
|
||||
randomPositiveLong(),
|
||||
randomPositiveLong(),
|
||||
new OsStats.Cgroup.CpuStat(randomPositiveLong(), randomPositiveLong(), randomPositiveLong()));
|
||||
OsStats osStats = new OsStats(System.currentTimeMillis(), cpu, mem, swap, cgroup);
|
||||
|
||||
try (BytesStreamOutput out = new BytesStreamOutput()) {
|
||||
osStats.writeTo(out);
|
||||
@ -49,7 +56,22 @@ public class OsStatsTests extends ESTestCase {
|
||||
assertEquals(osStats.getMem().getTotal(), deserializedOsStats.getMem().getTotal());
|
||||
assertEquals(osStats.getSwap().getFree(), deserializedOsStats.getSwap().getFree());
|
||||
assertEquals(osStats.getSwap().getTotal(), deserializedOsStats.getSwap().getTotal());
|
||||
assertEquals(osStats.getCgroup().getCpuAcctControlGroup(), deserializedOsStats.getCgroup().getCpuAcctControlGroup());
|
||||
assertEquals(osStats.getCgroup().getCpuAcctUsageNanos(), deserializedOsStats.getCgroup().getCpuAcctUsageNanos());
|
||||
assertEquals(osStats.getCgroup().getCpuControlGroup(), deserializedOsStats.getCgroup().getCpuControlGroup());
|
||||
assertEquals(osStats.getCgroup().getCpuCfsPeriodMicros(), deserializedOsStats.getCgroup().getCpuCfsPeriodMicros());
|
||||
assertEquals(osStats.getCgroup().getCpuCfsQuotaMicros(), deserializedOsStats.getCgroup().getCpuCfsQuotaMicros());
|
||||
assertEquals(
|
||||
osStats.getCgroup().getCpuStat().getNumberOfElapsedPeriods(),
|
||||
deserializedOsStats.getCgroup().getCpuStat().getNumberOfElapsedPeriods());
|
||||
assertEquals(
|
||||
osStats.getCgroup().getCpuStat().getNumberOfTimesThrottled(),
|
||||
deserializedOsStats.getCgroup().getCpuStat().getNumberOfTimesThrottled());
|
||||
assertEquals(
|
||||
osStats.getCgroup().getCpuStat().getTimeThrottledNanos(),
|
||||
deserializedOsStats.getCgroup().getCpuStat().getTimeThrottledNanos());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -217,6 +217,38 @@ the operating system:
|
||||
`os.swap.used_in_bytes`::
|
||||
Amount of used swap space in bytes
|
||||
|
||||
`os.cgroup.cpuacct.control_group` (Linux only)::
|
||||
The `cpuacct` control group to which the Elasticsearch process
|
||||
belongs
|
||||
|
||||
`os.cgroup.cpuacct.usage` (Linux only)::
|
||||
The total CPU time (in nanoseconds) consumed by all tasks in the
|
||||
same cgroup as the Elasticsearch process
|
||||
|
||||
`os.cgroup.cpu.control_group` (Linux only)::
|
||||
The `cpu` control group to which the Elasticsearch process belongs
|
||||
|
||||
`os.cgroup.cpu.cfs_period_micros` (Linux only)::
|
||||
The period of time (in microseconds) for how regularly all tasks in
|
||||
the same cgroup as the Elasticsearch process should have their
|
||||
access to CPU resources reallocated.
|
||||
|
||||
`os.cgroup.cpu.cfs_quota_micros` (Linux only)::
|
||||
The total amount of time (in microseconds) for which all tasks in
|
||||
the same cgroup as the Elasticsearch process can run during one
|
||||
period `os.cgroup.cpu.cfs_period_micros`
|
||||
|
||||
`os.cgroup.cpu.stat.number_of_elapsed_periods` (Linux only)::
|
||||
The number of reporting periods (as specified by
|
||||
`os.cgroup.cpu.cfs_period_micros`) that have elapsed
|
||||
|
||||
`os.cgroup.cpu.stat.number_of_times_throttled` (Linux only)::
|
||||
The number of times all tasks in the same cgroup as the
|
||||
Elasticsearch process have been throttled.
|
||||
|
||||
`os.cgroup.cpu.stat.time_throttled_nanos` (Linux only)::
|
||||
The total amount of time (in nanoseconds) for which all tasks in
|
||||
the same cgroup as the Elasticsearch process have been throttled.
|
||||
|
||||
[float]
|
||||
[[process-stats]]
|
||||
|
Loading…
x
Reference in New Issue
Block a user