Fix Issue with Concurrent Snapshot Init + Delete (#38518)

* Fix Issue with Concurrent Snapshot Init + Delete by ensuring that we're not finalizing a snapshot in the repository while it is initializing on another thread

* Closes #38489
This commit is contained in:
Armin Braun 2019-02-08 15:59:19 +01:00 committed by Tal Levy
parent 92756288b4
commit 238425e5e7
2 changed files with 14 additions and 4 deletions

View File

@ -331,7 +331,6 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
public TimeValue timeout() {
return request.masterNodeTimeout();
}
});
}
@ -394,6 +393,8 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
boolean snapshotCreated;
boolean hadAbortedInitializations;
@Override
protected void doRun() {
assert initializingSnapshots.contains(snapshot.snapshot());
@ -433,6 +434,8 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
if (entry.state() == State.ABORTED) {
entries.add(entry);
assert entry.shards().isEmpty();
hadAbortedInitializations = true;
} else {
// Replace the snapshot that was just initialized
ImmutableOpenMap<ShardId, ShardSnapshotStatus> shards =
@ -491,6 +494,14 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
// completion listener in this method. For the snapshot completion to work properly, the snapshot
// should still exist when listener is registered.
userCreateSnapshotListener.onResponse(snapshot.snapshot());
if (hadAbortedInitializations) {
final SnapshotsInProgress snapshotsInProgress = newState.custom(SnapshotsInProgress.TYPE);
assert snapshotsInProgress != null;
final SnapshotsInProgress.Entry entry = snapshotsInProgress.snapshot(snapshot.snapshot());
assert entry != null;
endSnapshot(entry);
}
}
});
}
@ -701,8 +712,8 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
// 3. Snapshots in any other state that have all their shard tasks completed
snapshotsInProgress.entries().stream().filter(
entry -> entry.state().completed()
|| entry.state() == State.INIT && initializingSnapshots.contains(entry.snapshot()) == false
|| entry.state() != State.INIT && completed(entry.shards().values())
|| initializingSnapshots.contains(entry.snapshot()) == false
&& (entry.state() == State.INIT || completed(entry.shards().values()))
).forEach(this::endSnapshot);
}
if (newMaster) {

View File

@ -855,7 +855,6 @@ public class DedicatedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTest
assertEquals(0, snapshotInfo.failedShards());
}
public void testMasterAndDataShutdownDuringSnapshot() throws Exception {
logger.info("--> starting three master nodes and two data nodes");
internalCluster().startMasterOnlyNodes(3);