Fix Issue with Concurrent Snapshot Init + Delete (#38518)
* Fix Issue with Concurrent Snapshot Init + Delete by ensuring that we're not finalizing a snapshot in the repository while it is initializing on another thread * Closes #38489
This commit is contained in:
parent
92756288b4
commit
238425e5e7
|
@ -331,7 +331,6 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
|
||||||
public TimeValue timeout() {
|
public TimeValue timeout() {
|
||||||
return request.masterNodeTimeout();
|
return request.masterNodeTimeout();
|
||||||
}
|
}
|
||||||
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -394,6 +393,8 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
|
||||||
|
|
||||||
boolean snapshotCreated;
|
boolean snapshotCreated;
|
||||||
|
|
||||||
|
boolean hadAbortedInitializations;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void doRun() {
|
protected void doRun() {
|
||||||
assert initializingSnapshots.contains(snapshot.snapshot());
|
assert initializingSnapshots.contains(snapshot.snapshot());
|
||||||
|
@ -433,6 +434,8 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
|
||||||
|
|
||||||
if (entry.state() == State.ABORTED) {
|
if (entry.state() == State.ABORTED) {
|
||||||
entries.add(entry);
|
entries.add(entry);
|
||||||
|
assert entry.shards().isEmpty();
|
||||||
|
hadAbortedInitializations = true;
|
||||||
} else {
|
} else {
|
||||||
// Replace the snapshot that was just initialized
|
// Replace the snapshot that was just initialized
|
||||||
ImmutableOpenMap<ShardId, ShardSnapshotStatus> shards =
|
ImmutableOpenMap<ShardId, ShardSnapshotStatus> shards =
|
||||||
|
@ -491,6 +494,14 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
|
||||||
// completion listener in this method. For the snapshot completion to work properly, the snapshot
|
// completion listener in this method. For the snapshot completion to work properly, the snapshot
|
||||||
// should still exist when listener is registered.
|
// should still exist when listener is registered.
|
||||||
userCreateSnapshotListener.onResponse(snapshot.snapshot());
|
userCreateSnapshotListener.onResponse(snapshot.snapshot());
|
||||||
|
|
||||||
|
if (hadAbortedInitializations) {
|
||||||
|
final SnapshotsInProgress snapshotsInProgress = newState.custom(SnapshotsInProgress.TYPE);
|
||||||
|
assert snapshotsInProgress != null;
|
||||||
|
final SnapshotsInProgress.Entry entry = snapshotsInProgress.snapshot(snapshot.snapshot());
|
||||||
|
assert entry != null;
|
||||||
|
endSnapshot(entry);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -701,8 +712,8 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
|
||||||
// 3. Snapshots in any other state that have all their shard tasks completed
|
// 3. Snapshots in any other state that have all their shard tasks completed
|
||||||
snapshotsInProgress.entries().stream().filter(
|
snapshotsInProgress.entries().stream().filter(
|
||||||
entry -> entry.state().completed()
|
entry -> entry.state().completed()
|
||||||
|| entry.state() == State.INIT && initializingSnapshots.contains(entry.snapshot()) == false
|
|| initializingSnapshots.contains(entry.snapshot()) == false
|
||||||
|| entry.state() != State.INIT && completed(entry.shards().values())
|
&& (entry.state() == State.INIT || completed(entry.shards().values()))
|
||||||
).forEach(this::endSnapshot);
|
).forEach(this::endSnapshot);
|
||||||
}
|
}
|
||||||
if (newMaster) {
|
if (newMaster) {
|
||||||
|
|
|
@ -855,7 +855,6 @@ public class DedicatedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTest
|
||||||
assertEquals(0, snapshotInfo.failedShards());
|
assertEquals(0, snapshotInfo.failedShards());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void testMasterAndDataShutdownDuringSnapshot() throws Exception {
|
public void testMasterAndDataShutdownDuringSnapshot() throws Exception {
|
||||||
logger.info("--> starting three master nodes and two data nodes");
|
logger.info("--> starting three master nodes and two data nodes");
|
||||||
internalCluster().startMasterOnlyNodes(3);
|
internalCluster().startMasterOnlyNodes(3);
|
||||||
|
|
Loading…
Reference in New Issue