Fix ClusterInfoServiceIT timeouts (#36758)

The test testClusterInfoServiceInformationClearOnError relies on timing behavior. It sets InternalClusterInfoService.INTERNAL_CLUSTER_INFO_TIMEOUT_SETTING to 1s and relies on the fact that the stats request completes within that timeframe (which our ever-so-slow CI seems to violate at times). Unfortunately the logging has been misimplemented in InternalClusterInfoService, so the corresponding log messages showing that the requests have timed out are missing for this. The issue can be locally reproduced by reducing the timeout to something lower. Closes #36554
2018-12-19 13:59:58 +01:00 · 2018-12-19 13:59:58 +01:00 · 8f141b8a41
parent 18691daebe
commit 8f141b8a41
2 changed files with 13 additions and 5 deletions
--- a/server/src/main/java/org/elasticsearch/cluster/InternalClusterInfoService.java
+++ b/server/src/main/java/org/elasticsearch/cluster/InternalClusterInfoService.java
@ -345,17 +345,19 @@ public class InternalClusterInfoService implements ClusterInfoService, LocalNode
        });

        try {
-            nodeLatch.await(fetchTimeout.getMillis(), TimeUnit.MILLISECONDS);
+            if (nodeLatch.await(fetchTimeout.getMillis(), TimeUnit.MILLISECONDS) == false) {
+                logger.warn("Failed to update node information for ClusterInfoUpdateJob within {} timeout", fetchTimeout);
+            }
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt(); // restore interrupt status
-            logger.warn("Failed to update node information for ClusterInfoUpdateJob within {} timeout", fetchTimeout);
        }

        try {
-            indicesLatch.await(fetchTimeout.getMillis(), TimeUnit.MILLISECONDS);
+            if (indicesLatch.await(fetchTimeout.getMillis(), TimeUnit.MILLISECONDS) == false) {
+                logger.warn("Failed to update shard information for ClusterInfoUpdateJob within {} timeout", fetchTimeout);
+            }
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt(); // restore interrupt status
-            logger.warn("Failed to update shard information for ClusterInfoUpdateJob within {} timeout", fetchTimeout);
        }
        ClusterInfo clusterInfo = getClusterInfo();
        try {
--- a/server/src/test/java/org/elasticsearch/cluster/ClusterInfoServiceIT.java
+++ b/server/src/test/java/org/elasticsearch/cluster/ClusterInfoServiceIT.java
@ -111,7 +111,6 @@ public class ClusterInfoServiceIT extends ESIntegTestCase {
            .put(super.nodeSettings(nodeOrdinal))
            // manual collection or upon cluster forming.
            .put(NodeEnvironment.MAX_LOCAL_STORAGE_NODES_SETTING.getKey(), 2)
-            .put(InternalClusterInfoService.INTERNAL_CLUSTER_INFO_TIMEOUT_SETTING.getKey(), "1s")
            .build();
    }

@ -120,6 +119,11 @@ public class ClusterInfoServiceIT extends ESIntegTestCase {
        return Arrays.asList(TestPlugin.class, MockTransportService.TestPlugin.class);
    }

+    private void setClusterInfoTimeout(String timeValue) {
+        assertAcked(client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder()
+            .put(InternalClusterInfoService.INTERNAL_CLUSTER_INFO_TIMEOUT_SETTING.getKey(), timeValue).build()));
+    }
+
    public void testClusterInfoServiceCollectsInformation() throws Exception {
        internalCluster().startNodes(2);
        assertAcked(prepareCreate("test").setSettings(Settings.builder()
@ -204,6 +208,7 @@ public class ClusterInfoServiceIT extends ESIntegTestCase {
                });
        }

+        setClusterInfoTimeout("1s");
        // timeouts shouldn't clear the info
        timeout.set(true);
        info = infoService.refresh();
@ -237,6 +242,7 @@ public class ClusterInfoServiceIT extends ESIntegTestCase {

        // check we recover
        blockingActionFilter.blockActions();
+        setClusterInfoTimeout("15s");
        info = infoService.refresh();
        assertNotNull("info should not be null", info);
        assertThat(info.getNodeLeastAvailableDiskUsages().size(), equalTo(2));