diff --git a/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java b/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java
index 43ef51658b7..f9423f1b13c 100644
--- a/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java
+++ b/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java
@@ -36,8 +36,6 @@ import java.nio.file.Path;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
public class OsProbe {
@@ -382,12 +380,70 @@ public class OsProbe {
}
/**
- * Checks if cgroup stats are available by checking for the existence of {@code /proc/self/cgroup}, {@code /sys/fs/cgroup/cpu}, and
- * {@code /sys/fs/cgroup/cpuacct}.
+ * The maximum amount of user memory (including file cache).
+ * If there is no limit then some Linux versions return the maximum value that can be stored in an
+ * unsigned 64 bit number, and this will overflow a long, hence the result type is String
.
+ * (The alternative would have been BigInteger
but then it would not be possible to index
+ * the OS stats document into Elasticsearch without losing information, as BigInteger
is
+ * not a supported Elasticsearch type.)
+ *
+ * @param controlGroup the control group for the Elasticsearch process for the {@code memory} subsystem
+ * @return the maximum amount of user memory (including file cache)
+ * @throws IOException if an I/O exception occurs reading {@code memory.limit_in_bytes} for the control group
+ */
+ private String getCgroupMemoryLimitInBytes(final String controlGroup) throws IOException {
+ return readSysFsCgroupMemoryLimitInBytes(controlGroup);
+ }
+
+ /**
+ * Returns the line from {@code memory.limit_in_bytes} for the control group to which the Elasticsearch process belongs for the
+ * {@code memory} subsystem. This line represents the maximum amount of user memory (including file cache).
+ *
+ * @param controlGroup the control group to which the Elasticsearch process belongs for the {@code memory} subsystem
+ * @return the line from {@code memory.limit_in_bytes}
+ * @throws IOException if an I/O exception occurs reading {@code memory.limit_in_bytes} for the control group
+ */
+ @SuppressForbidden(reason = "access /sys/fs/cgroup/memory")
+ String readSysFsCgroupMemoryLimitInBytes(final String controlGroup) throws IOException {
+ return readSingleLine(PathUtils.get("/sys/fs/cgroup/memory", controlGroup, "memory.limit_in_bytes"));
+ }
+
+ /**
+ * The total current memory usage by processes in the cgroup (in bytes).
+ * If there is no limit then some Linux versions return the maximum value that can be stored in an
+ * unsigned 64 bit number, and this will overflow a long, hence the result type is String
.
+ * (The alternative would have been BigInteger
but then it would not be possible to index
+ * the OS stats document into Elasticsearch without losing information, as BigInteger
is
+ * not a supported Elasticsearch type.)
+ *
+ * @param controlGroup the control group for the Elasticsearch process for the {@code memory} subsystem
+ * @return the total current memory usage by processes in the cgroup (in bytes)
+ * @throws IOException if an I/O exception occurs reading {@code memory.limit_in_bytes} for the control group
+ */
+ private String getCgroupMemoryUsageInBytes(final String controlGroup) throws IOException {
+ return readSysFsCgroupMemoryUsageInBytes(controlGroup);
+ }
+
+ /**
+ * Returns the line from {@code memory.usage_in_bytes} for the control group to which the Elasticsearch process belongs for the
+ * {@code memory} subsystem. This line represents the total current memory usage by processes in the cgroup (in bytes).
+ *
+ * @param controlGroup the control group to which the Elasticsearch process belongs for the {@code memory} subsystem
+ * @return the line from {@code memory.usage_in_bytes}
+ * @throws IOException if an I/O exception occurs reading {@code memory.usage_in_bytes} for the control group
+ */
+ @SuppressForbidden(reason = "access /sys/fs/cgroup/memory")
+ String readSysFsCgroupMemoryUsageInBytes(final String controlGroup) throws IOException {
+ return readSingleLine(PathUtils.get("/sys/fs/cgroup/memory", controlGroup, "memory.usage_in_bytes"));
+ }
+
+ /**
+ * Checks if cgroup stats are available by checking for the existence of {@code /proc/self/cgroup}, {@code /sys/fs/cgroup/cpu},
+ * {@code /sys/fs/cgroup/cpuacct} and {@code /sys/fs/cgroup/memory}.
*
* @return {@code true} if the stats are available, otherwise {@code false}
*/
- @SuppressForbidden(reason = "access /proc/self/cgroup, /sys/fs/cgroup/cpu, and /sys/fs/cgroup/cpuacct")
+ @SuppressForbidden(reason = "access /proc/self/cgroup, /sys/fs/cgroup/cpu, /sys/fs/cgroup/cpuacct and /sys/fs/cgroup/memory")
boolean areCgroupStatsAvailable() {
if (!Files.exists(PathUtils.get("/proc/self/cgroup"))) {
return false;
@@ -398,6 +454,9 @@ public class OsProbe {
if (!Files.exists(PathUtils.get("/sys/fs/cgroup/cpuacct"))) {
return false;
}
+ if (!Files.exists(PathUtils.get("/sys/fs/cgroup/memory"))) {
+ return false;
+ }
return true;
}
@@ -424,13 +483,21 @@ public class OsProbe {
final long cgroupCpuAcctCpuCfsQuotaMicros = getCgroupCpuAcctCpuCfsQuotaMicros(cpuControlGroup);
final OsStats.Cgroup.CpuStat cpuStat = getCgroupCpuAcctCpuStat(cpuControlGroup);
+ final String memoryControlGroup = controllerMap.get("memory");
+ assert memoryControlGroup != null;
+ final String cgroupMemoryLimitInBytes = getCgroupMemoryLimitInBytes(memoryControlGroup);
+ final String cgroupMemoryUsageInBytes = getCgroupMemoryUsageInBytes(memoryControlGroup);
+
return new OsStats.Cgroup(
cpuAcctControlGroup,
cgroupCpuAcctUsageNanos,
cpuControlGroup,
cgroupCpuAcctCpuCfsPeriodMicros,
cgroupCpuAcctCpuCfsQuotaMicros,
- cpuStat);
+ cpuStat,
+ memoryControlGroup,
+ cgroupMemoryLimitInBytes,
+ cgroupMemoryUsageInBytes);
}
} catch (final IOException e) {
logger.debug("error reading control group stats", e);
diff --git a/core/src/main/java/org/elasticsearch/monitor/os/OsStats.java b/core/src/main/java/org/elasticsearch/monitor/os/OsStats.java
index dfca123d0fa..c3c23ddb091 100644
--- a/core/src/main/java/org/elasticsearch/monitor/os/OsStats.java
+++ b/core/src/main/java/org/elasticsearch/monitor/os/OsStats.java
@@ -294,6 +294,10 @@ public class OsStats implements Writeable, ToXContentFragment {
private final long cpuCfsPeriodMicros;
private final long cpuCfsQuotaMicros;
private final CpuStat cpuStat;
+ // These will be null for nodes running versions prior to 6.1.0
+ private final String memoryControlGroup;
+ private final String memoryLimitInBytes;
+ private final String memoryUsageInBytes;
/**
* The control group for the {@code cpuacct} subsystem.
@@ -355,19 +359,57 @@ public class OsStats implements Writeable, ToXContentFragment {
return cpuStat;
}
+ /**
+ * The control group for the {@code memory} subsystem.
+ *
+ * @return the control group
+ */
+ public String getMemoryControlGroup() {
+ return memoryControlGroup;
+ }
+
+ /**
+ * The maximum amount of user memory (including file cache).
+ * This is stored as a String
because the value can be too big to fit in a
+ * long
. (The alternative would have been BigInteger
but then
+ * it would not be possible to index the OS stats document into Elasticsearch without
+ * losing information, as BigInteger
is not a supported Elasticsearch type.)
+ *
+ * @return the maximum amount of user memory (including file cache).
+ */
+ public String getMemoryLimitInBytes() {
+ return memoryLimitInBytes;
+ }
+
+ /**
+ * The total current memory usage by processes in the cgroup (in bytes).
+ * This is stored as a String
for consistency with memoryLimitInBytes
.
+ *
+ * @return the total current memory usage by processes in the cgroup (in bytes).
+ */
+ public String getMemoryUsageInBytes() {
+ return memoryUsageInBytes;
+ }
+
public Cgroup(
final String cpuAcctControlGroup,
final long cpuAcctUsageNanos,
final String cpuControlGroup,
final long cpuCfsPeriodMicros,
final long cpuCfsQuotaMicros,
- final CpuStat cpuStat) {
+ final CpuStat cpuStat,
+ final String memoryControlGroup,
+ final String memoryLimitInBytes,
+ final String memoryUsageInBytes) {
this.cpuAcctControlGroup = Objects.requireNonNull(cpuAcctControlGroup);
this.cpuAcctUsageNanos = cpuAcctUsageNanos;
this.cpuControlGroup = Objects.requireNonNull(cpuControlGroup);
this.cpuCfsPeriodMicros = cpuCfsPeriodMicros;
this.cpuCfsQuotaMicros = cpuCfsQuotaMicros;
this.cpuStat = Objects.requireNonNull(cpuStat);
+ this.memoryControlGroup = memoryControlGroup;
+ this.memoryLimitInBytes = memoryLimitInBytes;
+ this.memoryUsageInBytes = memoryUsageInBytes;
}
Cgroup(final StreamInput in) throws IOException {
@@ -377,6 +419,16 @@ public class OsStats implements Writeable, ToXContentFragment {
cpuCfsPeriodMicros = in.readLong();
cpuCfsQuotaMicros = in.readLong();
cpuStat = new CpuStat(in);
+ // TODO: change this to 6.1.0 after backporting
+ if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
+ memoryControlGroup = in.readOptionalString();
+ memoryLimitInBytes = in.readOptionalString();
+ memoryUsageInBytes = in.readOptionalString();
+ } else {
+ memoryControlGroup = null;
+ memoryLimitInBytes = null;
+ memoryUsageInBytes = null;
+ }
}
@Override
@@ -387,6 +439,12 @@ public class OsStats implements Writeable, ToXContentFragment {
out.writeLong(cpuCfsPeriodMicros);
out.writeLong(cpuCfsQuotaMicros);
cpuStat.writeTo(out);
+ // TODO: change this to 6.1.0 after backporting
+ if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
+ out.writeOptionalString(memoryControlGroup);
+ out.writeOptionalString(memoryLimitInBytes);
+ out.writeOptionalString(memoryUsageInBytes);
+ }
}
@Override
@@ -407,6 +465,19 @@ public class OsStats implements Writeable, ToXContentFragment {
cpuStat.toXContent(builder, params);
}
builder.endObject();
+ if (memoryControlGroup != null) {
+ builder.startObject("memory");
+ {
+ builder.field("control_group", memoryControlGroup);
+ if (memoryLimitInBytes != null) {
+ builder.field("limit_in_bytes", memoryLimitInBytes);
+ }
+ if (memoryUsageInBytes != null) {
+ builder.field("usage_in_bytes", memoryUsageInBytes);
+ }
+ }
+ builder.endObject();
+ }
}
builder.endObject();
return builder;
diff --git a/core/src/main/resources/org/elasticsearch/bootstrap/security.policy b/core/src/main/resources/org/elasticsearch/bootstrap/security.policy
index 001af2b7f4c..38c98b87427 100644
--- a/core/src/main/resources/org/elasticsearch/bootstrap/security.policy
+++ b/core/src/main/resources/org/elasticsearch/bootstrap/security.policy
@@ -129,4 +129,6 @@ grant {
permission java.io.FilePermission "/sys/fs/cgroup/cpu/-", "read";
permission java.io.FilePermission "/sys/fs/cgroup/cpuacct", "read";
permission java.io.FilePermission "/sys/fs/cgroup/cpuacct/-", "read";
+ permission java.io.FilePermission "/sys/fs/cgroup/memory", "read";
+ permission java.io.FilePermission "/sys/fs/cgroup/memory/-", "read";
};
diff --git a/core/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java b/core/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java
index 29a7918a70e..338ffe06fb8 100644
--- a/core/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java
+++ b/core/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java
@@ -96,6 +96,12 @@ public class NodeStatsTests extends ESTestCase {
assertEquals(
nodeStats.getOs().getCgroup().getCpuStat().getTimeThrottledNanos(),
deserializedNodeStats.getOs().getCgroup().getCpuStat().getTimeThrottledNanos());
+ assertEquals(
+ nodeStats.getOs().getCgroup().getMemoryLimitInBytes(),
+ deserializedNodeStats.getOs().getCgroup().getMemoryLimitInBytes());
+ assertEquals(
+ nodeStats.getOs().getCgroup().getMemoryUsageInBytes(),
+ deserializedNodeStats.getOs().getCgroup().getMemoryUsageInBytes());
assertArrayEquals(nodeStats.getOs().getCpu().getLoadAverage(),
deserializedNodeStats.getOs().getCpu().getLoadAverage(), 0);
}
@@ -294,7 +300,10 @@ public class NodeStatsTests extends ESTestCase {
randomAlphaOfLength(8),
randomNonNegativeLong(),
randomNonNegativeLong(),
- new OsStats.Cgroup.CpuStat(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong())));
+ new OsStats.Cgroup.CpuStat(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()),
+ randomAlphaOfLength(8),
+ Long.toString(randomNonNegativeLong()),
+ Long.toString(randomNonNegativeLong())));
}
ProcessStats processStats = frequently() ?
new ProcessStats(
diff --git a/core/src/test/java/org/elasticsearch/monitor/os/OsProbeTests.java b/core/src/test/java/org/elasticsearch/monitor/os/OsProbeTests.java
index 71305c41f56..2afa7af2ef3 100644
--- a/core/src/test/java/org/elasticsearch/monitor/os/OsProbeTests.java
+++ b/core/src/test/java/org/elasticsearch/monitor/os/OsProbeTests.java
@@ -22,6 +22,7 @@ package org.elasticsearch.monitor.os;
import org.apache.lucene.util.Constants;
import org.elasticsearch.test.ESTestCase;
+import java.math.BigInteger;
import java.util.Arrays;
import java.util.List;
@@ -117,6 +118,12 @@ public class OsProbeTests extends ESTestCase {
assertThat(stats.getCgroup().getCpuStat().getNumberOfElapsedPeriods(), greaterThanOrEqualTo(0L));
assertThat(stats.getCgroup().getCpuStat().getNumberOfTimesThrottled(), greaterThanOrEqualTo(0L));
assertThat(stats.getCgroup().getCpuStat().getTimeThrottledNanos(), greaterThanOrEqualTo(0L));
+ // These could be null if transported from a node running an older version, but shouldn't be null on the current node
+ assertThat(stats.getCgroup().getMemoryControlGroup(), notNullValue());
+ assertThat(stats.getCgroup().getMemoryLimitInBytes(), notNullValue());
+ assertThat(new BigInteger(stats.getCgroup().getMemoryLimitInBytes()), greaterThan(BigInteger.ZERO));
+ assertThat(stats.getCgroup().getMemoryUsageInBytes(), notNullValue());
+ assertThat(new BigInteger(stats.getCgroup().getMemoryUsageInBytes()), greaterThan(BigInteger.ZERO));
}
} else {
assertNull(stats.getCgroup());
@@ -159,7 +166,7 @@ public class OsProbeTests extends ESTestCase {
"9:net_cls,net_prio:/",
"8:pids:/",
"7:blkio:/",
- "6:memory:/",
+ "6:memory:/" + hierarchy,
"5:devices:/user.slice",
"4:hugetlb:/",
"3:perf_event:/",
@@ -194,6 +201,19 @@ public class OsProbeTests extends ESTestCase {
"throttled_time 139298645489");
}
+ @Override
+ String readSysFsCgroupMemoryLimitInBytes(String controlGroup) {
+ assertThat(controlGroup, equalTo("/" + hierarchy));
+ // This is the highest value that can be stored in an unsigned 64 bit number, hence too big for long
+ return "18446744073709551615";
+ }
+
+ @Override
+ String readSysFsCgroupMemoryUsageInBytes(String controlGroup) {
+ assertThat(controlGroup, equalTo("/" + hierarchy));
+ return "4796416";
+ }
+
@Override
boolean areCgroupStatsAvailable() {
return areCgroupStatsAvailable;
@@ -213,6 +233,8 @@ public class OsProbeTests extends ESTestCase {
assertThat(cgroup.getCpuStat().getNumberOfElapsedPeriods(), equalTo(17992L));
assertThat(cgroup.getCpuStat().getNumberOfTimesThrottled(), equalTo(1311L));
assertThat(cgroup.getCpuStat().getTimeThrottledNanos(), equalTo(139298645489L));
+ assertThat(cgroup.getMemoryLimitInBytes(), equalTo(Long.MAX_VALUE));
+ assertThat(cgroup.getMemoryUsageInBytes(), equalTo(4796416L));
} else {
assertNull(cgroup);
}
diff --git a/core/src/test/java/org/elasticsearch/monitor/os/OsStatsTests.java b/core/src/test/java/org/elasticsearch/monitor/os/OsStatsTests.java
index f1e2371db5c..0f05e623589 100644
--- a/core/src/test/java/org/elasticsearch/monitor/os/OsStatsTests.java
+++ b/core/src/test/java/org/elasticsearch/monitor/os/OsStatsTests.java
@@ -42,7 +42,10 @@ public class OsStatsTests extends ESTestCase {
randomAlphaOfLength(8),
randomNonNegativeLong(),
randomNonNegativeLong(),
- new OsStats.Cgroup.CpuStat(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()));
+ new OsStats.Cgroup.CpuStat(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()),
+ randomAlphaOfLength(8),
+ Long.toString(randomNonNegativeLong()),
+ Long.toString(randomNonNegativeLong()));
OsStats osStats = new OsStats(System.currentTimeMillis(), cpu, mem, swap, cgroup);
try (BytesStreamOutput out = new BytesStreamOutput()) {
@@ -70,6 +73,8 @@ public class OsStatsTests extends ESTestCase {
assertEquals(
osStats.getCgroup().getCpuStat().getTimeThrottledNanos(),
deserializedOsStats.getCgroup().getCpuStat().getTimeThrottledNanos());
+ assertEquals(osStats.getCgroup().getMemoryLimitInBytes(), deserializedOsStats.getCgroup().getMemoryLimitInBytes());
+ assertEquals(osStats.getCgroup().getMemoryUsageInBytes(), deserializedOsStats.getCgroup().getMemoryUsageInBytes());
}
}
}
diff --git a/docs/reference/cluster/nodes-stats.asciidoc b/docs/reference/cluster/nodes-stats.asciidoc
index 4d748d31559..40c02cf35aa 100644
--- a/docs/reference/cluster/nodes-stats.asciidoc
+++ b/docs/reference/cluster/nodes-stats.asciidoc
@@ -255,6 +255,25 @@ the operating system:
The total amount of time (in nanoseconds) for which all tasks in
the same cgroup as the Elasticsearch process have been throttled.
+`os.cgroup.memory.control_group` (Linux only)::
+ The `memory` control group to which the Elasticsearch process
+ belongs
+
+`os.cgroup.memory.limit_in_bytes` (Linux only)::
+ The maximum amount of user memory (including file cache) allowed
+ for all tasks in the same cgroup as the Elasticsearch process.
+ This value can be too big to store in a `long`, so is returned as
+ a string so that the value returned can exactly match what the
+ underlying operating system interface returns. Any value that is
+ too large to parse into a `long` almost certainly means no limit
+ has been set for the cgroup.
+
+`os.cgroup.memory.usage_in_bytes` (Linux only)::
+ The total current memory usage by processes in the cgroup (in bytes)
+ by all tasks in the same cgroup as the Elasticsearch process.
+ This value is stored as a string for consistency with
+ `os.cgroup.memory.limit_in_bytes`.
+
NOTE: For the cgroup stats to be visible, cgroups must be compiled into
the kernal, the `cpu` and `cpuacct` cgroup subsystems must be
configured and stats must be readable from `/sys/fs/cgroup/cpu`