From 3d57a78deb2482db8bd3c7aa98b607113ecd0623 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 18 Feb 2020 13:01:56 +0000 Subject: [PATCH] Add extra logging for investigation into #52000 (#52472) It looks like #52000 is caused by a slowdown in cluster state application (maybe due to #50907) but I would like to understand the details to ensure that there's nothing else going on here too before simply increasing the timeout. This commit enables some relevant `DEBUG` loggers and also captures stack traces from all threads rather than just the three hottest ones. --- .../elasticsearch/cluster/allocation/ClusterRerouteIT.java | 3 +++ .../src/main/java/org/elasticsearch/test/ESIntegTestCase.java | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/allocation/ClusterRerouteIT.java b/server/src/test/java/org/elasticsearch/cluster/allocation/ClusterRerouteIT.java index d629804b02d..411c47ef737 100644 --- a/server/src/test/java/org/elasticsearch/cluster/allocation/ClusterRerouteIT.java +++ b/server/src/test/java/org/elasticsearch/cluster/allocation/ClusterRerouteIT.java @@ -57,6 +57,7 @@ import org.elasticsearch.test.ESIntegTestCase.ClusterScope; import org.elasticsearch.test.ESIntegTestCase.Scope; import org.elasticsearch.test.InternalTestCluster; import org.elasticsearch.test.MockLogAppender; +import org.elasticsearch.test.junit.annotations.TestLogging; import java.nio.file.Path; import java.util.Arrays; @@ -187,6 +188,8 @@ public class ClusterRerouteIT extends ESIntegTestCase { rerouteWithAllocateLocalGateway(commonSettings); } + @TestLogging(reason = "https://github.com/elastic/elasticsearch/issues/52000", + value = "org.elasticsearch.gateway.PersistedClusterStateService:DEBUG,org.elasticsearch.cluster.service.MasterService:DEBUG") public void testDelayWithALargeAmountOfShards() throws Exception { Settings commonSettings = Settings.builder() .put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_INCOMING_RECOVERIES_SETTING.getKey(), 1) diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java index 58c40796738..de6601df33e 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java @@ -919,8 +919,8 @@ public abstract class ESIntegTestCase extends ESTestCase { ClusterHealthResponse actionGet = client().admin().cluster().health(healthRequest).actionGet(); if (actionGet.isTimedOut()) { - final String hotThreads = client().admin().cluster().prepareNodesHotThreads().setIgnoreIdleThreads(false).get().getNodes() - .stream().map(NodeHotThreads::getHotThreads).collect(Collectors.joining("\n")); + final String hotThreads = client().admin().cluster().prepareNodesHotThreads().setThreads(99999).setIgnoreIdleThreads(false) + .get().getNodes().stream().map(NodeHotThreads::getHotThreads).collect(Collectors.joining("\n")); logger.info("{} timed out, cluster state:\n{}\npending tasks:\n{}\nhot threads:\n{}\n", method, client().admin().cluster().prepareState().get().getState(),