Increase disruption test publish timeout to 5s (#51803)

With the new mechanism for storing cluster state in lucene, we store index metadata in multiple data paths too. This causes cluster state publish to timeout too frequently with a 1s timeout, so increasing it to 5s. Also increasing follower check timeout to 5s since it also sometimes has fsync in its timeout path and leader check for symmetry. Closes #51329
2020-02-03 13:57:18 +01:00 · 2020-02-03 13:57:18 +01:00 · 918dfaff1f
parent 81388051d8
commit 918dfaff1f
3 changed files with 5 additions and 9 deletions
--- a/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java
+++ b/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java
@ -123,12 +123,12 @@ public abstract class AbstractDisruptionTestCase extends ESIntegTestCase {
    }
    static final Settings DEFAULT_SETTINGS = Settings.builder()
-            .put(LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly
+            .put(LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING.getKey(), "5s") // for hitting simulated network failures quickly
            .put(LeaderChecker.LEADER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) // for hitting simulated network failures quickly
-            .put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly
+            .put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "5s") // for hitting simulated network failures quickly
            .put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) // for hitting simulated network failures quickly
            .put(JoinHelper.JOIN_TIMEOUT_SETTING.getKey(), "10s") // still long to induce failures but to long so test won't time out
-            .put(Coordinator.PUBLISH_TIMEOUT_SETTING.getKey(), "1s") // <-- for hitting simulated network failures quickly
+            .put(Coordinator.PUBLISH_TIMEOUT_SETTING.getKey(), "5s") // <-- for hitting simulated network failures quickly
            .put(TransportSettings.CONNECT_TIMEOUT.getKey(), "10s") // Network delay disruption waits for the min between this
            // value and the time of disruption and does not recover immediately
            // when disruption is stop. We should make sure we recover faster
--- a/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java
+++ b/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java
@ -107,9 +107,8 @@ public class ClusterDisruptionIT extends AbstractDisruptionTestCase {
    @TestIssueLogging(value = "_root:DEBUG,org.elasticsearch.action.bulk:TRACE,org.elasticsearch.action.get:TRACE," +
        "org.elasticsearch.discovery:TRACE,org.elasticsearch.action.support.replication:TRACE," +
        "org.elasticsearch.cluster.service:TRACE,org.elasticsearch.indices.recovery:TRACE," +
-        "org.elasticsearch.indices.cluster:TRACE,org.elasticsearch.index.shard:TRACE," +
+        "org.elasticsearch.indices.cluster:TRACE,org.elasticsearch.index.shard:TRACE",
-        "org.elasticsearch.gateway.PersistedClusterStateService:TRACE",
+        issueUrl = "https://github.com/elastic/elasticsearch/issues/41068")
        issueUrl = "https://github.com/elastic/elasticsearch/issues/41068,https://github.com/elastic/elasticsearch/issues/51329")
    public void testAckedIndexing() throws Exception {
        final int seconds = !(TEST_NIGHTLY && rarely()) ? 1 : 5;
--- a/server/src/test/java/org/elasticsearch/versioning/ConcurrentSeqNoVersioningIT.java
+++ b/server/src/test/java/org/elasticsearch/versioning/ConcurrentSeqNoVersioningIT.java
@ -38,7 +38,6 @@ import org.elasticsearch.discovery.AbstractDisruptionTestCase;
 import org.elasticsearch.index.engine.VersionConflictEngineException;
 import org.elasticsearch.test.ESIntegTestCase;
 import org.elasticsearch.test.disruption.ServiceDisruptionScheme;
 import org.elasticsearch.test.junit.annotations.TestIssueLogging;
 import org.elasticsearch.threadpool.Scheduler;
 import org.elasticsearch.threadpool.ThreadPool;
@ -132,8 +131,6 @@ public class ConcurrentSeqNoVersioningIT extends AbstractDisruptionTestCase {
    // multiple threads doing CAS updates.
    // Wait up to 1 minute (+10s in thread to ensure it does not time out) for threads to complete previous round before initiating next
    // round.
    @TestIssueLogging(value = "org.elasticsearch.gateway.PersistedClusterStateService:TRACE",
        issueUrl = "https://github.com/elastic/elasticsearch/issues/51329")
    public void testSeqNoCASLinearizability() {
        final int disruptTimeSeconds = scaledRandomIntBetween(1, 8);