Increase disruption test publish timeout to 5s (#51803)
With the new mechanism for storing cluster state in lucene, we store index metadata in multiple data paths too. This causes cluster state publish to timeout too frequently with a 1s timeout, so increasing it to 5s. Also increasing follower check timeout to 5s since it also sometimes has fsync in its timeout path and leader check for symmetry. Closes #51329
This commit is contained in:
parent
81388051d8
commit
918dfaff1f
|
@ -123,12 +123,12 @@ public abstract class AbstractDisruptionTestCase extends ESIntegTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
static final Settings DEFAULT_SETTINGS = Settings.builder()
|
static final Settings DEFAULT_SETTINGS = Settings.builder()
|
||||||
.put(LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly
|
.put(LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING.getKey(), "5s") // for hitting simulated network failures quickly
|
||||||
.put(LeaderChecker.LEADER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) // for hitting simulated network failures quickly
|
.put(LeaderChecker.LEADER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) // for hitting simulated network failures quickly
|
||||||
.put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly
|
.put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "5s") // for hitting simulated network failures quickly
|
||||||
.put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) // for hitting simulated network failures quickly
|
.put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) // for hitting simulated network failures quickly
|
||||||
.put(JoinHelper.JOIN_TIMEOUT_SETTING.getKey(), "10s") // still long to induce failures but to long so test won't time out
|
.put(JoinHelper.JOIN_TIMEOUT_SETTING.getKey(), "10s") // still long to induce failures but to long so test won't time out
|
||||||
.put(Coordinator.PUBLISH_TIMEOUT_SETTING.getKey(), "1s") // <-- for hitting simulated network failures quickly
|
.put(Coordinator.PUBLISH_TIMEOUT_SETTING.getKey(), "5s") // <-- for hitting simulated network failures quickly
|
||||||
.put(TransportSettings.CONNECT_TIMEOUT.getKey(), "10s") // Network delay disruption waits for the min between this
|
.put(TransportSettings.CONNECT_TIMEOUT.getKey(), "10s") // Network delay disruption waits for the min between this
|
||||||
// value and the time of disruption and does not recover immediately
|
// value and the time of disruption and does not recover immediately
|
||||||
// when disruption is stop. We should make sure we recover faster
|
// when disruption is stop. We should make sure we recover faster
|
||||||
|
|
|
@ -107,9 +107,8 @@ public class ClusterDisruptionIT extends AbstractDisruptionTestCase {
|
||||||
@TestIssueLogging(value = "_root:DEBUG,org.elasticsearch.action.bulk:TRACE,org.elasticsearch.action.get:TRACE," +
|
@TestIssueLogging(value = "_root:DEBUG,org.elasticsearch.action.bulk:TRACE,org.elasticsearch.action.get:TRACE," +
|
||||||
"org.elasticsearch.discovery:TRACE,org.elasticsearch.action.support.replication:TRACE," +
|
"org.elasticsearch.discovery:TRACE,org.elasticsearch.action.support.replication:TRACE," +
|
||||||
"org.elasticsearch.cluster.service:TRACE,org.elasticsearch.indices.recovery:TRACE," +
|
"org.elasticsearch.cluster.service:TRACE,org.elasticsearch.indices.recovery:TRACE," +
|
||||||
"org.elasticsearch.indices.cluster:TRACE,org.elasticsearch.index.shard:TRACE," +
|
"org.elasticsearch.indices.cluster:TRACE,org.elasticsearch.index.shard:TRACE",
|
||||||
"org.elasticsearch.gateway.PersistedClusterStateService:TRACE",
|
issueUrl = "https://github.com/elastic/elasticsearch/issues/41068")
|
||||||
issueUrl = "https://github.com/elastic/elasticsearch/issues/41068,https://github.com/elastic/elasticsearch/issues/51329")
|
|
||||||
public void testAckedIndexing() throws Exception {
|
public void testAckedIndexing() throws Exception {
|
||||||
|
|
||||||
final int seconds = !(TEST_NIGHTLY && rarely()) ? 1 : 5;
|
final int seconds = !(TEST_NIGHTLY && rarely()) ? 1 : 5;
|
||||||
|
|
|
@ -38,7 +38,6 @@ import org.elasticsearch.discovery.AbstractDisruptionTestCase;
|
||||||
import org.elasticsearch.index.engine.VersionConflictEngineException;
|
import org.elasticsearch.index.engine.VersionConflictEngineException;
|
||||||
import org.elasticsearch.test.ESIntegTestCase;
|
import org.elasticsearch.test.ESIntegTestCase;
|
||||||
import org.elasticsearch.test.disruption.ServiceDisruptionScheme;
|
import org.elasticsearch.test.disruption.ServiceDisruptionScheme;
|
||||||
import org.elasticsearch.test.junit.annotations.TestIssueLogging;
|
|
||||||
import org.elasticsearch.threadpool.Scheduler;
|
import org.elasticsearch.threadpool.Scheduler;
|
||||||
import org.elasticsearch.threadpool.ThreadPool;
|
import org.elasticsearch.threadpool.ThreadPool;
|
||||||
|
|
||||||
|
@ -132,8 +131,6 @@ public class ConcurrentSeqNoVersioningIT extends AbstractDisruptionTestCase {
|
||||||
// multiple threads doing CAS updates.
|
// multiple threads doing CAS updates.
|
||||||
// Wait up to 1 minute (+10s in thread to ensure it does not time out) for threads to complete previous round before initiating next
|
// Wait up to 1 minute (+10s in thread to ensure it does not time out) for threads to complete previous round before initiating next
|
||||||
// round.
|
// round.
|
||||||
@TestIssueLogging(value = "org.elasticsearch.gateway.PersistedClusterStateService:TRACE",
|
|
||||||
issueUrl = "https://github.com/elastic/elasticsearch/issues/51329")
|
|
||||||
public void testSeqNoCASLinearizability() {
|
public void testSeqNoCASLinearizability() {
|
||||||
final int disruptTimeSeconds = scaledRandomIntBetween(1, 8);
|
final int disruptTimeSeconds = scaledRandomIntBetween(1, 8);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue