Improve Snapshot Finalization Ex. Handling (#49995) (#50017)

* Improve Snapshot Finalization Ex. Handling

Like in #49989 we can get into a situation where the setting of
the repository generation (during snapshot finalization) in the cluster
state fails due to master failing over.
In this case we should not try to execute the next cluster state update
that will remove the snapshot from the cluster state.

Closes #49989
This commit is contained in:
Armin Braun 2019-12-10 13:01:51 +01:00 committed by GitHub
parent 3d8c2f9e18
commit ee4a8a08dd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 12 additions and 2 deletions

View File

@ -35,6 +35,7 @@ import org.elasticsearch.cluster.ClusterChangedEvent;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ClusterStateApplier;
import org.elasticsearch.cluster.ClusterStateUpdateTask;
import org.elasticsearch.cluster.NotMasterException;
import org.elasticsearch.cluster.RepositoryCleanupInProgress;
import org.elasticsearch.cluster.RestoreInProgress;
import org.elasticsearch.cluster.SnapshotDeletionsInProgress;
@ -42,6 +43,7 @@ import org.elasticsearch.cluster.SnapshotsInProgress;
import org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus;
import org.elasticsearch.cluster.SnapshotsInProgress.ShardState;
import org.elasticsearch.cluster.SnapshotsInProgress.State;
import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
import org.elasticsearch.cluster.metadata.MetaData;
@ -1065,8 +1067,16 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
@Override
public void onFailure(final Exception e) {
Snapshot snapshot = entry.snapshot();
logger.warn(() -> new ParameterizedMessage("[{}] failed to finalize snapshot", snapshot), e);
removeSnapshotFromClusterState(snapshot, null, e);
if (ExceptionsHelper.unwrap(e, NotMasterException.class, FailedToCommitClusterStateException.class) != null) {
// Failure due to not being master any more, don't try to remove snapshot from cluster state the next master
// will try ending this snapshot again
logger.debug(() -> new ParameterizedMessage(
"[{}] failed to update cluster state during snapshot finalization", snapshot), e);
endingSnapshots.remove(snapshot);
} else {
logger.warn(() -> new ParameterizedMessage("[{}] failed to finalize snapshot", snapshot), e);
removeSnapshotFromClusterState(snapshot, null, e);
}
}
});
}