From 1f49e0b9d010e0cb20649eac8796b73be63256a9 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 10 Aug 2020 09:39:18 +0100 Subject: [PATCH] Fix testRerouteOccursOnDiskPassingHighWatermark (#60869) Sometimes this test would refresh the disk stats so quickly that it hit the refresh rate limiter even though it was almost completely disabled. This commit allows the rate limiter to be completely disabled. Closes #60587 --- .../routing/allocation/decider/MockDiskUsagesIT.java | 8 ++------ .../cluster/routing/allocation/DiskThresholdMonitor.java | 4 ++-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/MockDiskUsagesIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/MockDiskUsagesIT.java index 972500a26c2..e3ea1a6d351 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/MockDiskUsagesIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/MockDiskUsagesIT.java @@ -34,7 +34,6 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.monitor.fs.FsInfo; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESIntegTestCase; -import org.elasticsearch.test.junit.annotations.TestLogging; import java.nio.file.Path; import java.util.Arrays; @@ -79,9 +78,6 @@ public class MockDiskUsagesIT extends ESIntegTestCase { return new FsInfo.Path(original.getPath(), original.getMount(), totalBytes, freeBytes, freeBytes); } - @TestLogging(reason="https://github.com/elastic/elasticsearch/issues/60587", - value="org.elasticsearch.cluster.InternalClusterInfoService:TRACE," + - "org.elasticsearch.cluster.routing.allocation.DiskThresholdMonitor:TRACE") public void testRerouteOccursOnDiskPassingHighWatermark() throws Exception { for (int i = 0; i < 3; i++) { // ensure that each node has a single data path @@ -105,7 +101,7 @@ public class MockDiskUsagesIT extends ESIntegTestCase { .put(CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), watermarkBytes ? "10b" : "90%") .put(CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), watermarkBytes ? "10b" : "90%") .put(CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_WATERMARK_SETTING.getKey(), watermarkBytes ? "0b" : "100%") - .put(CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING.getKey(), "1ms"))); + .put(CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING.getKey(), "0ms"))); // Create an index with 10 shards so we can check allocation for it assertAcked(prepareCreate("test").setSettings(Settings.builder().put("number_of_shards", 10).put("number_of_replicas", 0))); ensureGreen("test"); @@ -241,7 +237,7 @@ public class MockDiskUsagesIT extends ESIntegTestCase { .put(CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), "90%") .put(CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), "90%") .put(CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_WATERMARK_SETTING.getKey(), "100%") - .put(CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING.getKey(), "1ms"))); + .put(CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING.getKey(), "0ms"))); final List nodeIds = StreamSupport.stream(client().admin().cluster().prepareState().get().getState() .getRoutingNodes().spliterator(), false).map(RoutingNode::nodeId).collect(Collectors.toList()); diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitor.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitor.java index 239725ec1a4..8dae37c69c3 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitor.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitor.java @@ -190,7 +190,7 @@ public class DiskThresholdMonitor { nodesOverLowThreshold.add(node); nodesOverHighThreshold.add(node); - if (lastRunTimeMillis.get() < currentTimeMillis - diskThresholdSettings.getRerouteInterval().millis()) { + if (lastRunTimeMillis.get() <= currentTimeMillis - diskThresholdSettings.getRerouteInterval().millis()) { reroute = true; explanation = "high disk watermark exceeded on one or more nodes"; usagesOverHighThreshold.add(usage); @@ -225,7 +225,7 @@ public class DiskThresholdMonitor { if (nodesOverLowThreshold.contains(node)) { // The node has previously been over the low watermark, but is no longer, so it may be possible to allocate more shards // if we reroute now. - if (lastRunTimeMillis.get() < currentTimeMillis - diskThresholdSettings.getRerouteInterval().millis()) { + if (lastRunTimeMillis.get() <= currentTimeMillis - diskThresholdSettings.getRerouteInterval().millis()) { reroute = true; explanation = "one or more nodes has gone under the high or low watermark"; nodesOverLowThreshold.remove(node);