Fix testRerouteOccursOnDiskPassingHighWatermark (#60869)

Sometimes this test would refresh the disk stats so quickly that it hit the refresh rate limiter even though it was almost completely disabled. This commit allows the rate limiter to be completely disabled. Closes #60587
2020-08-10 09:39:18 +01:00 · 2020-08-10 09:39:18 +01:00 · 1f49e0b9d0
parent 3bac6db466
commit 1f49e0b9d0
2 changed files with 4 additions and 8 deletions
--- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/MockDiskUsagesIT.java
+++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/MockDiskUsagesIT.java
@ -34,7 +34,6 @@ import org.elasticsearch.env.Environment;
 import org.elasticsearch.monitor.fs.FsInfo;
 import org.elasticsearch.plugins.Plugin;
 import org.elasticsearch.test.ESIntegTestCase;
 import org.elasticsearch.test.junit.annotations.TestLogging;
 import java.nio.file.Path;
 import java.util.Arrays;
@ -79,9 +78,6 @@ public class MockDiskUsagesIT extends ESIntegTestCase {
        return new FsInfo.Path(original.getPath(), original.getMount(), totalBytes, freeBytes, freeBytes);
    }
    @TestLogging(reason="https://github.com/elastic/elasticsearch/issues/60587",
        value="org.elasticsearch.cluster.InternalClusterInfoService:TRACE," +
              "org.elasticsearch.cluster.routing.allocation.DiskThresholdMonitor:TRACE")
    public void testRerouteOccursOnDiskPassingHighWatermark() throws Exception {
        for (int i = 0; i < 3; i++) {
            // ensure that each node has a single data path
@ -105,7 +101,7 @@ public class MockDiskUsagesIT extends ESIntegTestCase {
            .put(CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), watermarkBytes ? "10b" : "90%")
            .put(CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), watermarkBytes ? "10b" : "90%")
            .put(CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_WATERMARK_SETTING.getKey(), watermarkBytes ? "0b" : "100%")
-            .put(CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING.getKey(), "1ms")));
+            .put(CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING.getKey(), "0ms")));
        // Create an index with 10 shards so we can check allocation for it
        assertAcked(prepareCreate("test").setSettings(Settings.builder().put("number_of_shards", 10).put("number_of_replicas", 0)));
        ensureGreen("test");
@ -241,7 +237,7 @@ public class MockDiskUsagesIT extends ESIntegTestCase {
            .put(CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), "90%")
            .put(CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), "90%")
            .put(CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_WATERMARK_SETTING.getKey(), "100%")
-            .put(CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING.getKey(), "1ms")));
+            .put(CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING.getKey(), "0ms")));
        final List<String> nodeIds = StreamSupport.stream(client().admin().cluster().prepareState().get().getState()
            .getRoutingNodes().spliterator(), false).map(RoutingNode::nodeId).collect(Collectors.toList());
--- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitor.java
+++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitor.java
@ -190,7 +190,7 @@ public class DiskThresholdMonitor {
                nodesOverLowThreshold.add(node);
                nodesOverHighThreshold.add(node);
-                if (lastRunTimeMillis.get() < currentTimeMillis - diskThresholdSettings.getRerouteInterval().millis()) {
+                if (lastRunTimeMillis.get() <= currentTimeMillis - diskThresholdSettings.getRerouteInterval().millis()) {
                    reroute = true;
                    explanation = "high disk watermark exceeded on one or more nodes";
                    usagesOverHighThreshold.add(usage);
@ -225,7 +225,7 @@ public class DiskThresholdMonitor {
                if (nodesOverLowThreshold.contains(node)) {
                    // The node has previously been over the low watermark, but is no longer, so it may be possible to allocate more shards
                    // if we reroute now.
-                    if (lastRunTimeMillis.get() < currentTimeMillis - diskThresholdSettings.getRerouteInterval().millis()) {
+                    if (lastRunTimeMillis.get() <= currentTimeMillis - diskThresholdSettings.getRerouteInterval().millis()) {
                        reroute = true;
                        explanation = "one or more nodes has gone under the high or low watermark";
                        nodesOverLowThreshold.remove(node);