Fix Race in ClusterApplierService Shutdown (#62944) (#63228)

The iteration over `timeoutClusterStateListeners` starts when the CS applier
thread is still running. This can lead to entries being added to it that never
get their listener resolved on shutdown and thus leak that listener as observed
in a stuck test in #62863.
Since `listener.onClose()` is idempotent we can just call it if we run into a stopped service
on the CS thread to avoid the race with certainty (because the iteration in `doStop` starts after
the stopped state has been set).

Closes #62863
This commit is contained in:
Armin Braun 2020-10-05 12:35:42 +02:00 committed by GitHub
parent 01950bc80f
commit 106695bec8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 4 additions and 1 deletions

View File

@ -1766,7 +1766,6 @@ public class IndexRecoveryIT extends ESIntegTestCase {
} }
} }
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/62863")
public void testPeerRecoveryTrimsLocalTranslog() throws Exception { public void testPeerRecoveryTrimsLocalTranslog() throws Exception {
internalCluster().startNode(); internalCluster().startNode();
List<String> dataNodes = internalCluster().startDataOnlyNodes(2); List<String> dataNodes = internalCluster().startDataOnlyNodes(2);

View File

@ -271,6 +271,10 @@ public class ClusterApplierService extends AbstractLifecycleComponent implements
final NotifyTimeout notifyTimeout = new NotifyTimeout(listener, timeout); final NotifyTimeout notifyTimeout = new NotifyTimeout(listener, timeout);
final NotifyTimeout previous = timeoutClusterStateListeners.put(listener, notifyTimeout); final NotifyTimeout previous = timeoutClusterStateListeners.put(listener, notifyTimeout);
assert previous == null : "Added same listener [" + listener + "]"; assert previous == null : "Added same listener [" + listener + "]";
if (lifecycle.stoppedOrClosed()) {
listener.onClose();
return;
}
if (timeout != null) { if (timeout != null) {
notifyTimeout.cancellable = threadPool.schedule(notifyTimeout, timeout, ThreadPool.Names.GENERIC); notifyTimeout.cancellable = threadPool.schedule(notifyTimeout, timeout, ThreadPool.Names.GENERIC);
} }