Remove Redundant Cluster State during Snapshot INIT + Master Failover (#54420) (#55208)

* Remove Redundant Cluster State during Snapshot INIT + Master Failover (#54420)

Similar to #54395 we know that a snapshot in INIT state has not
written anything to the repository yet. If we see one from a master
failover, there is no point in moving it to ABORTED before removing it
from the cluster state in a subsequent CS update.
Instead, we can simply remove its job from the CS the first time
we see it on master failover and be done with it.
This commit is contained in:
Armin Braun 2020-04-15 12:27:52 +02:00 committed by GitHub
parent d1123281b1
commit e164c9aaee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 27 additions and 29 deletions

View File

@ -335,10 +335,9 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
repository.initializeSnapshot(
snapshot.snapshot().getSnapshotId(), snapshot.indices(),
metadataForSnapshot(snapshot, clusterState.metadata()));
snapshotCreated = true;
}
snapshotCreated = true;
logger.info("snapshot [{}] started", snapshot.snapshot());
final Version version =
minCompatibleVersion(clusterState.nodes().getMinNodeVersion(), snapshot.repository(), repositoryData, null);
@ -408,7 +407,7 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
logger.warn(() -> new ParameterizedMessage("[{}] failed to create snapshot",
snapshot.snapshot().getSnapshotId()), e);
removeSnapshotFromClusterState(snapshot.snapshot(), null, e,
new CleanupAfterErrorListener(snapshot, true, userCreateSnapshotListener, e));
new CleanupAfterErrorListener(snapshot, snapshotCreated, userCreateSnapshotListener, e));
}
@Override
@ -471,9 +470,12 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
}
@Override
public void onFailure(Exception e) {
e.addSuppressed(this.e);
cleanupAfterError(e);
public void onFailure(@Nullable Exception e) {
if (snapshotCreated) {
cleanupAfterError(ExceptionsHelper.useOrSuppress(e, this.e));
} else {
userCreateSnapshotListener.onFailure(ExceptionsHelper.useOrSuppress(e, this.e));
}
}
public void onNoLongerMaster() {
@ -482,29 +484,25 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
private void cleanupAfterError(Exception exception) {
threadPool.generic().execute(() -> {
if (snapshotCreated) {
final Metadata metadata = clusterService.state().metadata();
repositoriesService.repository(snapshot.snapshot().getRepository())
final Metadata metadata = clusterService.state().metadata();
repositoriesService.repository(snapshot.snapshot().getRepository())
.finalizeSnapshot(snapshot.snapshot().getSnapshotId(),
buildGenerations(snapshot, metadata),
snapshot.startTime(),
ExceptionsHelper.stackTrace(exception),
0,
Collections.emptyList(),
snapshot.repositoryStateId(),
snapshot.includeGlobalState(),
metadataForSnapshot(snapshot, metadata),
snapshot.userMetadata(),
snapshot.version(),
ActionListener.runAfter(ActionListener.wrap(ignored -> {
}, inner -> {
inner.addSuppressed(exception);
logger.warn(() -> new ParameterizedMessage("[{}] failed to finalize snapshot in repository",
snapshot.snapshot()), inner);
}), () -> userCreateSnapshotListener.onFailure(e)));
} else {
userCreateSnapshotListener.onFailure(e);
}
buildGenerations(snapshot, metadata),
snapshot.startTime(),
ExceptionsHelper.stackTrace(exception),
0,
Collections.emptyList(),
snapshot.repositoryStateId(),
snapshot.includeGlobalState(),
metadataForSnapshot(snapshot, metadata),
snapshot.userMetadata(),
snapshot.version(),
ActionListener.runAfter(ActionListener.wrap(ignored -> {
}, inner -> {
inner.addSuppressed(exception);
logger.warn(() -> new ParameterizedMessage("[{}] failed to finalize snapshot in repository",
snapshot.snapshot()), inner);
}), () -> userCreateSnapshotListener.onFailure(e)));
});
}
}
@ -1016,7 +1014,7 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
endingSnapshots.remove(snapshot);
}
if (listener != null) {
listener.onResponse(snapshotInfo);
listener.onFailure(null);
}
}
});