Improve scheduling fairness when batching cluster state changes with equal priority (#20775)
As the wise man @ywelsch said: currently when we batch cluster state update tasks by the same executor, we the first task un-queued from the pending task queue. That means that other tasks for the same executor are left in the queue. When those are dequeued, they will trigger another run for the same executor. This can give unfair precedence to future tasks of the same executor, even if they weren't batched in the first run. Take this queue for example (all with equal priority) ``` T1 (executor 1) T2 (executor 1) T3 (executor 2) T4 (executor 2) T5 (executor 1) T6 (executor 1) ``` If T1 & T2 are picked up first (when T5 & T6 are not yet queued), one would expect T3 & T4 to run second. However, since T2 is still in the queue, it will trigger execution of T5 & T6. The fix is easy - ignore processed tasks when extracting them from the queue. Closes #20768
This commit is contained in:
parent
591a8d4ec6
commit
b847a835e9
|
@ -891,7 +891,11 @@ public class ClusterService extends AbstractLifecycleComponent {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
runTasksForExecutor(executor);
|
// if this task is already processed, the executor shouldn't execute other tasks (that arrived later),
|
||||||
|
// to give other executors a chance to execute their tasks.
|
||||||
|
if (processed.get() == false) {
|
||||||
|
runTasksForExecutor(executor);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString(ClusterStateTaskExecutor<T> executor) {
|
public String toString(ClusterStateTaskExecutor<T> executor) {
|
||||||
|
|
|
@ -19,7 +19,6 @@
|
||||||
package org.elasticsearch.cluster.service;
|
package org.elasticsearch.cluster.service;
|
||||||
|
|
||||||
import org.apache.logging.log4j.Level;
|
import org.apache.logging.log4j.Level;
|
||||||
import org.apache.logging.log4j.LogManager;
|
|
||||||
import org.apache.logging.log4j.Logger;
|
import org.apache.logging.log4j.Logger;
|
||||||
import org.apache.logging.log4j.message.ParameterizedMessage;
|
import org.apache.logging.log4j.message.ParameterizedMessage;
|
||||||
import org.apache.logging.log4j.util.Supplier;
|
import org.apache.logging.log4j.util.Supplier;
|
||||||
|
@ -54,6 +53,8 @@ import org.junit.Before;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -301,6 +302,66 @@ public class ClusterServiceTests extends ESTestCase {
|
||||||
assertTrue(published.get());
|
assertTrue(published.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testOneExecutorDontStarveAnother() throws InterruptedException {
|
||||||
|
final List<String> executionOrder = Collections.synchronizedList(new ArrayList<>());
|
||||||
|
final Semaphore allowProcessing = new Semaphore(0);
|
||||||
|
final Semaphore startedProcessing = new Semaphore(0);
|
||||||
|
|
||||||
|
class TaskExecutor implements ClusterStateTaskExecutor<String> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BatchResult<String> execute(ClusterState currentState, List<String> tasks) throws Exception {
|
||||||
|
executionOrder.addAll(tasks); // do this first, so startedProcessing can be used as a notification that this is done.
|
||||||
|
startedProcessing.release(tasks.size());
|
||||||
|
allowProcessing.acquire(tasks.size());
|
||||||
|
return BatchResult.<String>builder().successes(tasks).build(ClusterState.builder(currentState).build());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean runOnlyOnMaster() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TaskExecutor executorA = new TaskExecutor();
|
||||||
|
TaskExecutor executorB = new TaskExecutor();
|
||||||
|
|
||||||
|
final ClusterStateTaskConfig config = ClusterStateTaskConfig.build(Priority.NORMAL);
|
||||||
|
final ClusterStateTaskListener noopListener = (source, e) -> { throw new AssertionError(source, e); };
|
||||||
|
// this blocks the cluster state queue, so we can set it up right
|
||||||
|
clusterService.submitStateUpdateTask("0", "A0", config, executorA, noopListener);
|
||||||
|
// wait to be processed
|
||||||
|
startedProcessing.acquire(1);
|
||||||
|
assertThat(executionOrder, equalTo(Arrays.asList("A0")));
|
||||||
|
|
||||||
|
|
||||||
|
// these will be the first batch
|
||||||
|
clusterService.submitStateUpdateTask("1", "A1", config, executorA, noopListener);
|
||||||
|
clusterService.submitStateUpdateTask("2", "A2", config, executorA, noopListener);
|
||||||
|
|
||||||
|
// release the first 0 task, but not the second
|
||||||
|
allowProcessing.release(1);
|
||||||
|
startedProcessing.acquire(2);
|
||||||
|
assertThat(executionOrder, equalTo(Arrays.asList("A0", "A1", "A2")));
|
||||||
|
|
||||||
|
// setup the queue with pending tasks for another executor same priority
|
||||||
|
clusterService.submitStateUpdateTask("3", "B3", config, executorB, noopListener);
|
||||||
|
clusterService.submitStateUpdateTask("4", "B4", config, executorB, noopListener);
|
||||||
|
|
||||||
|
|
||||||
|
clusterService.submitStateUpdateTask("5", "A5", config, executorA, noopListener);
|
||||||
|
clusterService.submitStateUpdateTask("6", "A6", config, executorA, noopListener);
|
||||||
|
|
||||||
|
// now release the processing
|
||||||
|
allowProcessing.release(6);
|
||||||
|
|
||||||
|
// wait for last task to be processed
|
||||||
|
startedProcessing.acquire(4);
|
||||||
|
|
||||||
|
assertThat(executionOrder, equalTo(Arrays.asList("A0", "A1", "A2", "B3", "B4", "A5", "A6")));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
// test that for a single thread, tasks are executed in the order
|
// test that for a single thread, tasks are executed in the order
|
||||||
// that they are submitted
|
// that they are submitted
|
||||||
public void testClusterStateUpdateTasksAreExecutedInOrder() throws BrokenBarrierException, InterruptedException {
|
public void testClusterStateUpdateTasksAreExecutedInOrder() throws BrokenBarrierException, InterruptedException {
|
||||||
|
|
Loading…
Reference in New Issue