Fix Overly Optimistic Request Deduplication (#51270) (#51291)

On master failover we have to resent all the shard failed messages,
but the transport requests remain the same in the eyes of `equals`.
If the master failover is registered and the requests to the new master
are sent before all the callbacks have executed and the request to the
old master removed from the deduplicator then the requuests to the new
master will incorrectly fail and the snapshot get stuck.

Closes #51253
This commit is contained in:
Armin Braun 2020-01-22 11:38:40 +01:00 committed by GitHub
parent d9ad66965c
commit 7b4c2bfdc4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 11 additions and 0 deletions

View File

@ -357,6 +357,9 @@ public class SnapshotShardsService extends AbstractLifecycleComponent implements
return; return;
} }
// Clear request deduplicator since we need to send all requests that were potentially not handled by the previous
// master again
remoteFailedRequestDeduplicator.clear();
for (SnapshotsInProgress.Entry snapshot : snapshotsInProgress.entries()) { for (SnapshotsInProgress.Entry snapshot : snapshotsInProgress.entries()) {
if (snapshot.state() == State.STARTED || snapshot.state() == State.ABORTED) { if (snapshot.state() == State.STARTED || snapshot.state() == State.ABORTED) {
Map<ShardId, IndexShardSnapshotStatus> localShards = currentSnapshotShards(snapshot.snapshot()); Map<ShardId, IndexShardSnapshotStatus> localShards = currentSnapshotShards(snapshot.snapshot());

View File

@ -53,6 +53,14 @@ public final class TransportRequestDeduplicator<T extends TransportRequest> {
} }
} }
/**
* Remove all tracked requests from this instance so that the first time {@link #executeOnce} is invoked with any request it triggers
* an actual request execution. Use this e.g. for requests to master that need to be sent again on master failover.
*/
public void clear() {
requests.clear();
}
public int size() { public int size() {
return requests.size(); return requests.size();
} }