It looks like #52000 is caused by a slowdown in cluster state application (maybe due to #50907) but I would like to understand the details to ensure that there's nothing else going on here too before simply increasing the timeout. This commit enables some relevant `DEBUG` loggers and also captures stack traces from all threads rather than just the three hottest ones.
This commit is contained in:
parent
84de601551
commit
3d57a78deb
|
@ -57,6 +57,7 @@ import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
|
||||||
import org.elasticsearch.test.ESIntegTestCase.Scope;
|
import org.elasticsearch.test.ESIntegTestCase.Scope;
|
||||||
import org.elasticsearch.test.InternalTestCluster;
|
import org.elasticsearch.test.InternalTestCluster;
|
||||||
import org.elasticsearch.test.MockLogAppender;
|
import org.elasticsearch.test.MockLogAppender;
|
||||||
|
import org.elasticsearch.test.junit.annotations.TestLogging;
|
||||||
|
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -187,6 +188,8 @@ public class ClusterRerouteIT extends ESIntegTestCase {
|
||||||
rerouteWithAllocateLocalGateway(commonSettings);
|
rerouteWithAllocateLocalGateway(commonSettings);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@TestLogging(reason = "https://github.com/elastic/elasticsearch/issues/52000",
|
||||||
|
value = "org.elasticsearch.gateway.PersistedClusterStateService:DEBUG,org.elasticsearch.cluster.service.MasterService:DEBUG")
|
||||||
public void testDelayWithALargeAmountOfShards() throws Exception {
|
public void testDelayWithALargeAmountOfShards() throws Exception {
|
||||||
Settings commonSettings = Settings.builder()
|
Settings commonSettings = Settings.builder()
|
||||||
.put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_INCOMING_RECOVERIES_SETTING.getKey(), 1)
|
.put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_INCOMING_RECOVERIES_SETTING.getKey(), 1)
|
||||||
|
|
|
@ -919,8 +919,8 @@ public abstract class ESIntegTestCase extends ESTestCase {
|
||||||
|
|
||||||
ClusterHealthResponse actionGet = client().admin().cluster().health(healthRequest).actionGet();
|
ClusterHealthResponse actionGet = client().admin().cluster().health(healthRequest).actionGet();
|
||||||
if (actionGet.isTimedOut()) {
|
if (actionGet.isTimedOut()) {
|
||||||
final String hotThreads = client().admin().cluster().prepareNodesHotThreads().setIgnoreIdleThreads(false).get().getNodes()
|
final String hotThreads = client().admin().cluster().prepareNodesHotThreads().setThreads(99999).setIgnoreIdleThreads(false)
|
||||||
.stream().map(NodeHotThreads::getHotThreads).collect(Collectors.joining("\n"));
|
.get().getNodes().stream().map(NodeHotThreads::getHotThreads).collect(Collectors.joining("\n"));
|
||||||
logger.info("{} timed out, cluster state:\n{}\npending tasks:\n{}\nhot threads:\n{}\n",
|
logger.info("{} timed out, cluster state:\n{}\npending tasks:\n{}\nhot threads:\n{}\n",
|
||||||
method,
|
method,
|
||||||
client().admin().cluster().prepareState().get().getState(),
|
client().admin().cluster().prepareState().get().getState(),
|
||||||
|
|
Loading…
Reference in New Issue