It looks like #52000 is caused by a slowdown in cluster state application (maybe due to #50907) but I would like to understand the details to ensure that there's nothing else going on here too before simply increasing the timeout. This commit enables some relevant `DEBUG` loggers and also captures stack traces from all threads rather than just the three hottest ones.
This commit is contained in:
parent
84de601551
commit
3d57a78deb
|
@ -57,6 +57,7 @@ import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
|
|||
import org.elasticsearch.test.ESIntegTestCase.Scope;
|
||||
import org.elasticsearch.test.InternalTestCluster;
|
||||
import org.elasticsearch.test.MockLogAppender;
|
||||
import org.elasticsearch.test.junit.annotations.TestLogging;
|
||||
|
||||
import java.nio.file.Path;
|
||||
import java.util.Arrays;
|
||||
|
@ -187,6 +188,8 @@ public class ClusterRerouteIT extends ESIntegTestCase {
|
|||
rerouteWithAllocateLocalGateway(commonSettings);
|
||||
}
|
||||
|
||||
@TestLogging(reason = "https://github.com/elastic/elasticsearch/issues/52000",
|
||||
value = "org.elasticsearch.gateway.PersistedClusterStateService:DEBUG,org.elasticsearch.cluster.service.MasterService:DEBUG")
|
||||
public void testDelayWithALargeAmountOfShards() throws Exception {
|
||||
Settings commonSettings = Settings.builder()
|
||||
.put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_INCOMING_RECOVERIES_SETTING.getKey(), 1)
|
||||
|
|
|
@ -919,8 +919,8 @@ public abstract class ESIntegTestCase extends ESTestCase {
|
|||
|
||||
ClusterHealthResponse actionGet = client().admin().cluster().health(healthRequest).actionGet();
|
||||
if (actionGet.isTimedOut()) {
|
||||
final String hotThreads = client().admin().cluster().prepareNodesHotThreads().setIgnoreIdleThreads(false).get().getNodes()
|
||||
.stream().map(NodeHotThreads::getHotThreads).collect(Collectors.joining("\n"));
|
||||
final String hotThreads = client().admin().cluster().prepareNodesHotThreads().setThreads(99999).setIgnoreIdleThreads(false)
|
||||
.get().getNodes().stream().map(NodeHotThreads::getHotThreads).collect(Collectors.joining("\n"));
|
||||
logger.info("{} timed out, cluster state:\n{}\npending tasks:\n{}\nhot threads:\n{}\n",
|
||||
method,
|
||||
client().admin().cluster().prepareState().get().getState(),
|
||||
|
|
Loading…
Reference in New Issue