Fix Issue with Concurrent Snapshot Init + Delete (#38518)
* Fix Issue with Concurrent Snapshot Init + Delete by ensuring that we're not finalizing a snapshot in the repository while it is initializing on another thread * Closes #38489
This commit is contained in:
parent
92756288b4
commit
238425e5e7
|
@ -331,7 +331,6 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
|
|||
public TimeValue timeout() {
|
||||
return request.masterNodeTimeout();
|
||||
}
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -394,6 +393,8 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
|
|||
|
||||
boolean snapshotCreated;
|
||||
|
||||
boolean hadAbortedInitializations;
|
||||
|
||||
@Override
|
||||
protected void doRun() {
|
||||
assert initializingSnapshots.contains(snapshot.snapshot());
|
||||
|
@ -433,6 +434,8 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
|
|||
|
||||
if (entry.state() == State.ABORTED) {
|
||||
entries.add(entry);
|
||||
assert entry.shards().isEmpty();
|
||||
hadAbortedInitializations = true;
|
||||
} else {
|
||||
// Replace the snapshot that was just initialized
|
||||
ImmutableOpenMap<ShardId, ShardSnapshotStatus> shards =
|
||||
|
@ -491,6 +494,14 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
|
|||
// completion listener in this method. For the snapshot completion to work properly, the snapshot
|
||||
// should still exist when listener is registered.
|
||||
userCreateSnapshotListener.onResponse(snapshot.snapshot());
|
||||
|
||||
if (hadAbortedInitializations) {
|
||||
final SnapshotsInProgress snapshotsInProgress = newState.custom(SnapshotsInProgress.TYPE);
|
||||
assert snapshotsInProgress != null;
|
||||
final SnapshotsInProgress.Entry entry = snapshotsInProgress.snapshot(snapshot.snapshot());
|
||||
assert entry != null;
|
||||
endSnapshot(entry);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@ -701,8 +712,8 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
|
|||
// 3. Snapshots in any other state that have all their shard tasks completed
|
||||
snapshotsInProgress.entries().stream().filter(
|
||||
entry -> entry.state().completed()
|
||||
|| entry.state() == State.INIT && initializingSnapshots.contains(entry.snapshot()) == false
|
||||
|| entry.state() != State.INIT && completed(entry.shards().values())
|
||||
|| initializingSnapshots.contains(entry.snapshot()) == false
|
||||
&& (entry.state() == State.INIT || completed(entry.shards().values()))
|
||||
).forEach(this::endSnapshot);
|
||||
}
|
||||
if (newMaster) {
|
||||
|
|
|
@ -855,7 +855,6 @@ public class DedicatedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTest
|
|||
assertEquals(0, snapshotInfo.failedShards());
|
||||
}
|
||||
|
||||
|
||||
public void testMasterAndDataShutdownDuringSnapshot() throws Exception {
|
||||
logger.info("--> starting three master nodes and two data nodes");
|
||||
internalCluster().startMasterOnlyNodes(3);
|
||||
|
|
Loading…
Reference in New Issue