Fix Issue with Concurrent Snapshot Init + Delete (#38518)

* Fix Issue with Concurrent Snapshot Init + Delete by ensuring that we're not finalizing a snapshot in the repository while it is initializing on another thread

* Closes #38489
This commit is contained in:
Armin Braun 2019-02-08 15:59:19 +01:00 committed by Tal Levy
parent 92756288b4
commit 238425e5e7
2 changed files with 14 additions and 4 deletions

View File

@ -331,7 +331,6 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
public TimeValue timeout() { public TimeValue timeout() {
return request.masterNodeTimeout(); return request.masterNodeTimeout();
} }
}); });
} }
@ -394,6 +393,8 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
boolean snapshotCreated; boolean snapshotCreated;
boolean hadAbortedInitializations;
@Override @Override
protected void doRun() { protected void doRun() {
assert initializingSnapshots.contains(snapshot.snapshot()); assert initializingSnapshots.contains(snapshot.snapshot());
@ -433,6 +434,8 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
if (entry.state() == State.ABORTED) { if (entry.state() == State.ABORTED) {
entries.add(entry); entries.add(entry);
assert entry.shards().isEmpty();
hadAbortedInitializations = true;
} else { } else {
// Replace the snapshot that was just initialized // Replace the snapshot that was just initialized
ImmutableOpenMap<ShardId, ShardSnapshotStatus> shards = ImmutableOpenMap<ShardId, ShardSnapshotStatus> shards =
@ -491,6 +494,14 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
// completion listener in this method. For the snapshot completion to work properly, the snapshot // completion listener in this method. For the snapshot completion to work properly, the snapshot
// should still exist when listener is registered. // should still exist when listener is registered.
userCreateSnapshotListener.onResponse(snapshot.snapshot()); userCreateSnapshotListener.onResponse(snapshot.snapshot());
if (hadAbortedInitializations) {
final SnapshotsInProgress snapshotsInProgress = newState.custom(SnapshotsInProgress.TYPE);
assert snapshotsInProgress != null;
final SnapshotsInProgress.Entry entry = snapshotsInProgress.snapshot(snapshot.snapshot());
assert entry != null;
endSnapshot(entry);
}
} }
}); });
} }
@ -701,8 +712,8 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
// 3. Snapshots in any other state that have all their shard tasks completed // 3. Snapshots in any other state that have all their shard tasks completed
snapshotsInProgress.entries().stream().filter( snapshotsInProgress.entries().stream().filter(
entry -> entry.state().completed() entry -> entry.state().completed()
|| entry.state() == State.INIT && initializingSnapshots.contains(entry.snapshot()) == false || initializingSnapshots.contains(entry.snapshot()) == false
|| entry.state() != State.INIT && completed(entry.shards().values()) && (entry.state() == State.INIT || completed(entry.shards().values()))
).forEach(this::endSnapshot); ).forEach(this::endSnapshot);
} }
if (newMaster) { if (newMaster) {

View File

@ -855,7 +855,6 @@ public class DedicatedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTest
assertEquals(0, snapshotInfo.failedShards()); assertEquals(0, snapshotInfo.failedShards());
} }
public void testMasterAndDataShutdownDuringSnapshot() throws Exception { public void testMasterAndDataShutdownDuringSnapshot() throws Exception {
logger.info("--> starting three master nodes and two data nodes"); logger.info("--> starting three master nodes and two data nodes");
internalCluster().startMasterOnlyNodes(3); internalCluster().startMasterOnlyNodes(3);