Remove sporadic min/max usage estimates from stats (#59755)

Today `GET _nodes/stats/fs` includes `{least,most}_usage_estimate`
fields for some nodes. These fields have rather strange semantics. They
are only reported on the elected master and on nodes that have been the
elected master since they were last restarted; when a node stops being
the elected master these stats remain in place but we stop updating them
so they may become arbitrarily stale.

This means that these statistics are pretty meaningless and impossible
to use correctly. Even if they were kept up to date they're never
reported for data-only nodes anyway, despite the fact that data nodes
are the ones where we care most about disk usage. The information needed
to compute the path with the least/most available space is already
provided in the rest the stats output, so we can treat the inclusion of
these stats as a bug and fix it by simply removing them in this commit.
Since these stats were always optional and mostly omitted (for opaque
reasons) this is not considered a breaking change.
This commit is contained in:
David Turner 2020-07-20 14:48:53 +01:00
parent e7c7ed6493
commit b75207a09f
9 changed files with 54 additions and 112 deletions

View File

@ -1687,17 +1687,6 @@ less than `free_in_bytes`. This is the actual amount of free disk
space the {es} node can utilise.
=======
`least_usage_estimate`::
(object)
Contains statistics for the file store with the least estimated usage. See
<<cluster-nodes-stats-fs-data,`fs.data`>> for a list of child parameters.
`most_usage_estimate`::
(object)
Contains statistics for the file store with the most estimated usage. See
<<cluster-nodes-stats-fs-data,`fs.data`>> for a list of child parameters.
[[cluster-nodes-stats-fs-data]]
`data`::
(array of objects)
@ -1713,9 +1702,6 @@ Path to the file store.
`mount`::
(string)
Mount point of the file store (ex: /dev/sda2).
+
NOTE: This parameter is not provided for the `least_usage_estimate` or
`most_usage_estimate` file stores.
`type`::
(string)

View File

@ -0,0 +1,20 @@
[[breaking-changes-7.10]]
== Breaking changes in 7.10
++++
<titleabbrev>7.10</titleabbrev>
++++
This section discusses the changes that you need to be aware of when migrating
your application to {es} 7.10.
See also <<release-highlights>> and <<es-release-notes>>.
// * <<breaking_710_blah_changes>>
// * <<breaking_710_blah_changes>>
//NOTE: The notable-breaking-changes tagged regions are re-used in the
//Installation and Upgrade Guide
//tag::notable-breaking-changes[]
//end::notable-breaking-changes[]

View File

@ -0,0 +1,15 @@
[[release-notes-7.10.0]]
== {es} version 7.10.0
Also see <<breaking-changes-7.10,Breaking changes in 7.10>>.
[[stats-7.10.0]]
[float]
=== Stats changes
Estimates removed from filesystem stats::
* {es} would sometimes report the data path which it estimated to have the
most and least available space in the filesystem stats. These stats were
often very out of date and were frequently omitted from responses to the
stats APIs. They are now always omitted. {es-pull}59755[#59755]

View File

@ -28,7 +28,6 @@ import org.elasticsearch.monitor.jvm.JvmService;
import org.elasticsearch.monitor.os.OsService;
import org.elasticsearch.monitor.process.ProcessService;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.cluster.ClusterInfoService;
import java.io.IOException;
@ -40,13 +39,12 @@ public class MonitorService extends AbstractLifecycleComponent {
private final JvmService jvmService;
private final FsService fsService;
public MonitorService(Settings settings, NodeEnvironment nodeEnvironment, ThreadPool threadPool,
ClusterInfoService clusterInfoService) throws IOException {
public MonitorService(Settings settings, NodeEnvironment nodeEnvironment, ThreadPool threadPool) throws IOException {
this.jvmGcMonitorService = new JvmGcMonitorService(settings, threadPool);
this.osService = new OsService(settings);
this.processService = new ProcessService(settings);
this.jvmService = new JvmService(settings);
this.fsService = new FsService(settings, nodeEnvironment, clusterInfoService);
this.fsService = new FsService(settings, nodeEnvironment);
}
public OsService osService() {

View File

@ -126,19 +126,6 @@ public class FsInfo implements Iterable<FsInfo.Path>, Writeable, ToXContentFragm
return current + other;
}
private double addDouble(double current, double other) {
if (current == -1 && other == -1) {
return 0;
}
if (other == -1) {
return current;
}
if (current == -1) {
return other;
}
return current + other;
}
public void add(Path path) {
total = FsProbe.adjustForHugeFilesystems(addLong(total, path.total));
free = FsProbe.adjustForHugeFilesystems(addLong(free, path.free));
@ -435,20 +422,12 @@ public class FsInfo implements Iterable<FsInfo.Path>, Writeable, ToXContentFragm
private final Path[] paths;
private final IoStats ioStats;
private final Path total;
private final DiskUsage leastDiskEstimate;
private final DiskUsage mostDiskEstimate;
public FsInfo(long timestamp, IoStats ioStats, Path[] paths) {
this(timestamp, ioStats, paths, null, null);
}
public FsInfo(long timestamp, IoStats ioStats, Path[] paths, @Nullable DiskUsage leastUsage, @Nullable DiskUsage mostUsage) {
this.timestamp = timestamp;
this.ioStats = ioStats;
this.paths = paths;
this.total = total();
this.leastDiskEstimate = leastUsage;
this.mostDiskEstimate = mostUsage;
}
/**
@ -462,12 +441,9 @@ public class FsInfo implements Iterable<FsInfo.Path>, Writeable, ToXContentFragm
paths[i] = new Path(in);
}
this.total = total();
if (in.getVersion().onOrAfter(Version.V_6_0_0_alpha1)) {
this.leastDiskEstimate = in.readOptionalWriteable(DiskUsage::new);
this.mostDiskEstimate = in.readOptionalWriteable(DiskUsage::new);
} else {
this.leastDiskEstimate = null;
this.mostDiskEstimate = null;
if (in.getVersion().before(Version.V_7_10_0)) {
in.readOptionalWriteable(DiskUsage::new); // previously leastDiskEstimate
in.readOptionalWriteable(DiskUsage::new); // previously mostDiskEstimate
}
}
@ -479,9 +455,9 @@ public class FsInfo implements Iterable<FsInfo.Path>, Writeable, ToXContentFragm
for (Path path : paths) {
path.writeTo(out);
}
if (out.getVersion().onOrAfter(Version.V_6_0_0_alpha1)) {
out.writeOptionalWriteable(this.leastDiskEstimate);
out.writeOptionalWriteable(this.mostDiskEstimate);
if (out.getVersion().before(Version.V_7_10_0)) {
out.writeOptionalWriteable(null); // previously leastDiskEstimate
out.writeOptionalWriteable(null); // previously mostDiskEstimate
}
}
@ -489,16 +465,6 @@ public class FsInfo implements Iterable<FsInfo.Path>, Writeable, ToXContentFragm
return total;
}
@Nullable
public DiskUsage getLeastDiskEstimate() {
return this.leastDiskEstimate;
}
@Nullable
public DiskUsage getMostDiskEstimate() {
return this.mostDiskEstimate;
}
private Path total() {
Path res = new Path();
Set<String> seenDevices = new HashSet<>(paths.length);
@ -532,28 +498,7 @@ public class FsInfo implements Iterable<FsInfo.Path>, Writeable, ToXContentFragm
builder.field(Fields.TIMESTAMP, timestamp);
builder.field(Fields.TOTAL);
total().toXContent(builder, params);
if (leastDiskEstimate != null) {
builder.startObject(Fields.LEAST_ESTIMATE);
{
builder.field(Fields.PATH, leastDiskEstimate.getPath());
builder.humanReadableField(Fields.TOTAL_IN_BYTES, Fields.TOTAL, new ByteSizeValue(leastDiskEstimate.getTotalBytes()));
builder.humanReadableField(Fields.AVAILABLE_IN_BYTES, Fields.AVAILABLE,
new ByteSizeValue(leastDiskEstimate.getFreeBytes()));
builder.field(Fields.USAGE_PERCENTAGE, leastDiskEstimate.getUsedDiskAsPercentage());
}
builder.endObject();
}
if (mostDiskEstimate != null) {
builder.startObject(Fields.MOST_ESTIMATE);
{
builder.field(Fields.PATH, mostDiskEstimate.getPath());
builder.humanReadableField(Fields.TOTAL_IN_BYTES, Fields.TOTAL, new ByteSizeValue(mostDiskEstimate.getTotalBytes()));
builder.humanReadableField(Fields.AVAILABLE_IN_BYTES, Fields.AVAILABLE, new ByteSizeValue(mostDiskEstimate.getFreeBytes()));
builder.field(Fields.USAGE_PERCENTAGE, mostDiskEstimate.getUsedDiskAsPercentage());
}
builder.endObject();
}
builder.startArray(Fields.DATA);
for (Path path : paths) {
path.toXContent(builder, params);
@ -573,13 +518,6 @@ public class FsInfo implements Iterable<FsInfo.Path>, Writeable, ToXContentFragm
static final String TIMESTAMP = "timestamp";
static final String DATA = "data";
static final String TOTAL = "total";
static final String TOTAL_IN_BYTES = "total_in_bytes";
static final String IO_STATS = "io_stats";
static final String LEAST_ESTIMATE = "least_usage_estimate";
static final String MOST_ESTIMATE = "most_usage_estimate";
static final String USAGE_PERCENTAGE = "used_disk_percent";
static final String AVAILABLE = "available";
static final String AVAILABLE_IN_BYTES = "available_in_bytes";
static final String PATH = "path";
}
}

View File

@ -23,9 +23,6 @@ import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.lucene.util.Constants;
import org.elasticsearch.cluster.ClusterInfo;
import org.elasticsearch.cluster.DiskUsage;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.SuppressForbidden;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.io.PathUtils;
@ -51,7 +48,7 @@ public class FsProbe {
this.nodeEnv = nodeEnv;
}
public FsInfo stats(FsInfo previous, @Nullable ClusterInfo clusterInfo) throws IOException {
public FsInfo stats(FsInfo previous) throws IOException {
if (!nodeEnv.hasNodeFile()) {
return new FsInfo(System.currentTimeMillis(), null, new FsInfo.Path[0]);
}
@ -70,13 +67,7 @@ public class FsProbe {
}
ioStats = ioStats(devicesNumbers, previous);
}
DiskUsage leastDiskEstimate = null;
DiskUsage mostDiskEstimate = null;
if (clusterInfo != null) {
leastDiskEstimate = clusterInfo.getNodeLeastAvailableDiskUsages().get(nodeEnv.nodeId());
mostDiskEstimate = clusterInfo.getNodeMostAvailableDiskUsages().get(nodeEnv.nodeId());
}
return new FsInfo(System.currentTimeMillis(), ioStats, paths, leastDiskEstimate, mostDiskEstimate);
return new FsInfo(System.currentTimeMillis(), ioStats, paths);
}
final FsInfo.IoStats ioStats(final Set<Tuple<Integer, Integer>> devicesNumbers, final FsInfo previous) {

View File

@ -21,15 +21,12 @@ package org.elasticsearch.monitor.fs;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.cluster.ClusterInfo;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Setting.Property;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.util.SingleObjectCache;
import org.elasticsearch.env.NodeEnvironment;
import org.elasticsearch.cluster.ClusterInfoService;
import java.io.IOException;
@ -38,9 +35,7 @@ public class FsService {
private static final Logger logger = LogManager.getLogger(FsService.class);
private final FsProbe probe;
private final TimeValue refreshInterval;
private final SingleObjectCache<FsInfo> cache;
private final ClusterInfoService clusterInfoService;
public static final Setting<TimeValue> REFRESH_INTERVAL_SETTING =
Setting.timeSetting(
@ -49,23 +44,22 @@ public class FsService {
TimeValue.timeValueSeconds(1),
Property.NodeScope);
public FsService(final Settings settings, final NodeEnvironment nodeEnvironment, ClusterInfoService clusterInfoService) {
public FsService(final Settings settings, final NodeEnvironment nodeEnvironment) {
this.probe = new FsProbe(nodeEnvironment);
this.clusterInfoService = clusterInfoService;
refreshInterval = REFRESH_INTERVAL_SETTING.get(settings);
final TimeValue refreshInterval = REFRESH_INTERVAL_SETTING.get(settings);
logger.debug("using refresh_interval [{}]", refreshInterval);
cache = new FsInfoCache(refreshInterval, stats(probe, null, logger, null));
cache = new FsInfoCache(refreshInterval, stats(probe, null));
}
public FsInfo stats() {
return cache.getOrRefresh();
}
private static FsInfo stats(FsProbe probe, FsInfo initialValue, Logger logger, @Nullable ClusterInfo clusterInfo) {
private static FsInfo stats(FsProbe probe, FsInfo initialValue) {
try {
return probe.stats(initialValue, clusterInfo);
return probe.stats(initialValue);
} catch (IOException e) {
logger.debug("unexpected exception reading filesystem info", e);
FsService.logger.debug("unexpected exception reading filesystem info", e);
return null;
}
}
@ -81,7 +75,7 @@ public class FsService {
@Override
protected FsInfo refresh() {
return stats(probe, initialValue, logger, clusterInfoService.getClusterInfo());
return stats(probe, initialValue);
}
}

View File

@ -406,7 +406,7 @@ public class Node implements Closeable {
for (Module pluginModule : pluginsService.createGuiceModules()) {
modules.add(pluginModule);
}
final MonitorService monitorService = new MonitorService(settings, nodeEnvironment, threadPool, clusterInfoService);
final MonitorService monitorService = new MonitorService(settings, nodeEnvironment, threadPool);
final FsHealthService fsHealthService = new FsHealthService(settings, clusterService.getClusterSettings(), threadPool,
nodeEnvironment);
ClusterModule clusterModule = new ClusterModule(settings, clusterService, clusterPlugins, clusterInfoService);

View File

@ -53,7 +53,7 @@ public class FsProbeTests extends ESTestCase {
try (NodeEnvironment env = newNodeEnvironment()) {
FsProbe probe = new FsProbe(env);
FsInfo stats = probe.stats(null, null);
FsInfo stats = probe.stats(null);
assertNotNull(stats);
assertThat(stats.getTimestamp(), greaterThan(0L));