In the assertion mentioned in the new comment we first get the `endingSnapshots` and then check that we don't have a listener that isn't referred to by it so we need to remove from the listers map before removing from `endingSnapshots` to avoid rare, random assertion tripping here with concurrent repository operations.
This commit is contained in:
parent
416ea4fcdc
commit
7e1fc6dc67
|
@ -1554,9 +1554,7 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
|
|||
entry.version(),
|
||||
state -> stateWithoutSnapshot(state, snapshot),
|
||||
ActionListener.wrap(newRepoData -> {
|
||||
endingSnapshots.remove(snapshot);
|
||||
completeListenersIgnoringException(
|
||||
snapshotCompletionListeners.remove(snapshot), Tuple.tuple(newRepoData, snapshotInfo));
|
||||
completeListenersIgnoringException(endAndGetListenersToResolve(snapshot), Tuple.tuple(newRepoData, snapshotInfo));
|
||||
logger.info("snapshot [{}] completed with state [{}]", snapshot, snapshotInfo.state());
|
||||
runNextQueuedOperation(newRepoData, repository, true);
|
||||
}, e -> handleFinalizationFailure(e, entry, repositoryData))),
|
||||
|
@ -1567,6 +1565,17 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a snapshot from {@link #endingSnapshots} set and return its completion listeners that must be resolved.
|
||||
*/
|
||||
private List<ActionListener<Tuple<RepositoryData, SnapshotInfo>>> endAndGetListenersToResolve(Snapshot snapshot) {
|
||||
// get listeners before removing from the ending snapshots set to not trip assertion in #assertConsistentWithClusterState that
|
||||
// makes sure we don't have listeners for snapshots that aren't tracked in any internal state of this class
|
||||
final List<ActionListener<Tuple<RepositoryData, SnapshotInfo>>> listenersToComplete = snapshotCompletionListeners.remove(snapshot);
|
||||
endingSnapshots.remove(snapshot);
|
||||
return listenersToComplete;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles failure to finalize a snapshot. If the exception indicates that this node was unable to publish a cluster state and stopped
|
||||
* being the master node, then fail all snapshot create and delete listeners executing on this node by delegating to
|
||||
|
@ -1814,8 +1823,7 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
|
|||
}
|
||||
|
||||
private void failSnapshotCompletionListeners(Snapshot snapshot, Exception e) {
|
||||
endingSnapshots.remove(snapshot);
|
||||
failListenersIgnoringException(snapshotCompletionListeners.remove(snapshot), e);
|
||||
failListenersIgnoringException(endAndGetListenersToResolve(snapshot), e);
|
||||
assert repositoryOperations.assertNotQueued(snapshot);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue