diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/ElectionSchedulerFactory.java b/server/src/main/java/org/elasticsearch/cluster/coordination/ElectionSchedulerFactory.java index d0437e0778a..3abff6deaef 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/ElectionSchedulerFactory.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/ElectionSchedulerFactory.java @@ -48,6 +48,7 @@ public class ElectionSchedulerFactory extends AbstractComponent { private static final String ELECTION_INITIAL_TIMEOUT_SETTING_KEY = "cluster.election.initial_timeout"; private static final String ELECTION_BACK_OFF_TIME_SETTING_KEY = "cluster.election.back_off_time"; private static final String ELECTION_MAX_TIMEOUT_SETTING_KEY = "cluster.election.max_timeout"; + private static final String ELECTION_DURATION_SETTING_KEY = "cluster.election.duration"; /* * The first election is scheduled to occur a random number of milliseconds after the scheduler is started, where the random number of @@ -59,6 +60,8 @@ public class ElectionSchedulerFactory extends AbstractComponent { * number of milliseconds is chosen uniformly from * * (0, min(ELECTION_INITIAL_TIMEOUT_SETTING + (n-1) * ELECTION_BACK_OFF_TIME_SETTING, ELECTION_MAX_TIMEOUT_SETTING)] + * + * Each election lasts up to ELECTION_DURATION_SETTING. */ public static final Setting ELECTION_INITIAL_TIMEOUT_SETTING = Setting.timeSetting(ELECTION_INITIAL_TIMEOUT_SETTING_KEY, @@ -70,9 +73,13 @@ public class ElectionSchedulerFactory extends AbstractComponent { public static final Setting ELECTION_MAX_TIMEOUT_SETTING = Setting.timeSetting(ELECTION_MAX_TIMEOUT_SETTING_KEY, TimeValue.timeValueSeconds(10), TimeValue.timeValueMillis(200), TimeValue.timeValueSeconds(300), Property.NodeScope); + public static final Setting ELECTION_DURATION_SETTING = Setting.timeSetting(ELECTION_DURATION_SETTING_KEY, + TimeValue.timeValueMillis(500), TimeValue.timeValueMillis(1), TimeValue.timeValueSeconds(300), Property.NodeScope); + private final TimeValue initialTimeout; private final TimeValue backoffTime; private final TimeValue maxTimeout; + private final TimeValue duration; private final ThreadPool threadPool; private final Random random; @@ -85,6 +92,7 @@ public class ElectionSchedulerFactory extends AbstractComponent { initialTimeout = ELECTION_INITIAL_TIMEOUT_SETTING.get(settings); backoffTime = ELECTION_BACK_OFF_TIME_SETTING.get(settings); maxTimeout = ELECTION_MAX_TIMEOUT_SETTING.get(settings); + duration = ELECTION_DURATION_SETTING.get(settings); if (maxTimeout.millis() < initialTimeout.millis()) { throw new IllegalArgumentException(new ParameterizedMessage("[{}] is [{}], but must be at least [{}] which is [{}]", @@ -154,15 +162,11 @@ public class ElectionSchedulerFactory extends AbstractComponent { protected void doRun() { if (isClosed.get()) { logger.debug("{} not starting election", this); - return; + } else { + logger.debug("{} starting election", this); + scheduleNextElection(duration, scheduledRunnable); + scheduledRunnable.run(); } - logger.debug("{} starting election", this); - scheduledRunnable.run(); - } - - @Override - public void onAfter() { - scheduleNextElection(TimeValue.ZERO, scheduledRunnable); } @Override diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java index 438aec7944f..6bfd8d4d0f2 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java @@ -450,6 +450,7 @@ public final class ClusterSettings extends AbstractScopedSettings { ElectionSchedulerFactory.ELECTION_INITIAL_TIMEOUT_SETTING, ElectionSchedulerFactory.ELECTION_BACK_OFF_TIME_SETTING, ElectionSchedulerFactory.ELECTION_MAX_TIMEOUT_SETTING, + ElectionSchedulerFactory.ELECTION_DURATION_SETTING, Coordinator.PUBLISH_TIMEOUT_SETTING, JoinHelper.JOIN_TIMEOUT_SETTING ))); diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java index 3221bc4d985..bdb2d772358 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java @@ -74,6 +74,7 @@ import static org.elasticsearch.cluster.coordination.Coordinator.Mode.LEADER; import static org.elasticsearch.cluster.coordination.Coordinator.PUBLISH_TIMEOUT_SETTING; import static org.elasticsearch.cluster.coordination.CoordinatorTests.Cluster.DEFAULT_DELAY_VARIABILITY; import static org.elasticsearch.cluster.coordination.ElectionSchedulerFactory.ELECTION_BACK_OFF_TIME_SETTING; +import static org.elasticsearch.cluster.coordination.ElectionSchedulerFactory.ELECTION_DURATION_SETTING; import static org.elasticsearch.cluster.coordination.ElectionSchedulerFactory.ELECTION_INITIAL_TIMEOUT_SETTING; import static org.elasticsearch.cluster.coordination.FollowersChecker.FOLLOWER_CHECK_INTERVAL_SETTING; import static org.elasticsearch.cluster.coordination.FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING; @@ -490,6 +491,7 @@ public class CoordinatorTests extends ESTestCase { // Then wait for an election to be scheduled; we allow enough time for retries to allow for collisions + defaultMillis(ELECTION_INITIAL_TIMEOUT_SETTING) * ELECTION_RETRIES + defaultMillis(ELECTION_BACK_OFF_TIME_SETTING) * ELECTION_RETRIES * (ELECTION_RETRIES - 1) / 2 + + defaultMillis(ELECTION_DURATION_SETTING) * ELECTION_RETRIES // Allow two round-trip for pre-voting and voting + 4 * DEFAULT_DELAY_VARIABILITY // Then a commit of the new leader's first cluster state diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/ElectionSchedulerFactoryTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/ElectionSchedulerFactoryTests.java index 6cb8a0bee85..d7f8dd8ddaa 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/ElectionSchedulerFactoryTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/ElectionSchedulerFactoryTests.java @@ -28,6 +28,7 @@ import org.elasticsearch.test.ESTestCase; import java.util.concurrent.atomic.AtomicBoolean; import static org.elasticsearch.cluster.coordination.ElectionSchedulerFactory.ELECTION_BACK_OFF_TIME_SETTING; +import static org.elasticsearch.cluster.coordination.ElectionSchedulerFactory.ELECTION_DURATION_SETTING; import static org.elasticsearch.cluster.coordination.ElectionSchedulerFactory.ELECTION_INITIAL_TIMEOUT_SETTING; import static org.elasticsearch.cluster.coordination.ElectionSchedulerFactory.ELECTION_MAX_TIMEOUT_SETTING; import static org.elasticsearch.cluster.coordination.ElectionSchedulerFactory.toPositiveLongAtMost; @@ -48,7 +49,7 @@ public class ElectionSchedulerFactoryTests extends ESTestCase { private void assertElectionSchedule(final DeterministicTaskQueue deterministicTaskQueue, final ElectionSchedulerFactory electionSchedulerFactory, - final long initialTimeout, final long backOffTime, final long maxTimeout) { + final long initialTimeout, final long backOffTime, final long maxTimeout, final long duration) { final TimeValue initialGracePeriod = randomGracePeriod(); final AtomicBoolean electionStarted = new AtomicBoolean(); @@ -56,7 +57,7 @@ public class ElectionSchedulerFactoryTests extends ESTestCase { try (Releasable ignored = electionSchedulerFactory.startElectionScheduler(initialGracePeriod, () -> assertTrue(electionStarted.compareAndSet(false, true)))) { - long lastElectionTime = deterministicTaskQueue.getCurrentTimeMillis(); + long lastElectionFinishTime = deterministicTaskQueue.getCurrentTimeMillis(); int electionCount = 0; while (true) { @@ -70,10 +71,10 @@ public class ElectionSchedulerFactoryTests extends ESTestCase { } assertTrue(electionStarted.compareAndSet(true, false)); - final long thisElectionTime = deterministicTaskQueue.getCurrentTimeMillis(); + final long thisElectionStartTime = deterministicTaskQueue.getCurrentTimeMillis(); if (electionCount == 1) { - final long electionDelay = thisElectionTime - lastElectionTime; + final long electionDelay = thisElectionStartTime - lastElectionFinishTime; // Check grace period assertThat(electionDelay, greaterThanOrEqualTo(initialGracePeriod.millis())); @@ -84,7 +85,7 @@ public class ElectionSchedulerFactoryTests extends ESTestCase { } else { - final long electionDelay = thisElectionTime - lastElectionTime; + final long electionDelay = thisElectionStartTime - lastElectionFinishTime; // Check upper bound assertThat(electionDelay, lessThanOrEqualTo(initialTimeout + backOffTime * (electionCount - 1))); @@ -98,7 +99,7 @@ public class ElectionSchedulerFactoryTests extends ESTestCase { } } - lastElectionTime = thisElectionTime; + lastElectionFinishTime = thisElectionStartTime + duration; } } deterministicTaskQueue.runAllTasks(); @@ -125,19 +126,28 @@ public class ElectionSchedulerFactoryTests extends ESTestCase { randomLongBetween(Math.max(200, initialTimeoutMillis), 180000) + "ms"); } + final long electionDurationMillis; + if (randomBoolean()) { + electionDurationMillis = randomLongBetween(1, 300000); + settingsBuilder.put(ELECTION_DURATION_SETTING.getKey(), electionDurationMillis + "ms"); + } else { + electionDurationMillis = ELECTION_DURATION_SETTING.get(Settings.EMPTY).millis(); + } + final Settings settings = settingsBuilder.put(NODE_NAME_SETTING.getKey(), "node").build(); final long initialTimeout = ELECTION_INITIAL_TIMEOUT_SETTING.get(settings).millis(); final long backOffTime = ELECTION_BACK_OFF_TIME_SETTING.get(settings).millis(); final long maxTimeout = ELECTION_MAX_TIMEOUT_SETTING.get(settings).millis(); + final long duration = ELECTION_DURATION_SETTING.get(settings).millis(); final DeterministicTaskQueue deterministicTaskQueue = new DeterministicTaskQueue(settings, random()); final ElectionSchedulerFactory electionSchedulerFactory = new ElectionSchedulerFactory(settings, random(), deterministicTaskQueue.getThreadPool()); - assertElectionSchedule(deterministicTaskQueue, electionSchedulerFactory, initialTimeout, backOffTime, maxTimeout); + assertElectionSchedule(deterministicTaskQueue, electionSchedulerFactory, initialTimeout, backOffTime, maxTimeout, duration); // do it again to show that the max is reset when the scheduler is restarted - assertElectionSchedule(deterministicTaskQueue, electionSchedulerFactory, initialTimeout, backOffTime, maxTimeout); + assertElectionSchedule(deterministicTaskQueue, electionSchedulerFactory, initialTimeout, backOffTime, maxTimeout, duration); } public void testSettingsValidation() {