Remove Duplicate Shard Snapshot State Updates (#46862) (#46906)

We were repeatedly trying to send shard state updates for aborted snapshots on every cluster state update. This is simply dead-code since those updates are already safely sent in the callbacks passed to `SnapshotShardsService#snapshot`. On master failover, we ensure that the status update is resent via `SnapshotShardsService#syncShardStatsOnNewMaster`. => there is no need for trying to send updates here over and over and this logic can safely be removed
2019-09-20 14:30:03 +02:00 · 2019-09-20 14:30:03 +02:00 · 938648fcff
parent 4a2cb05162
commit 938648fcff
2 changed files with 5 additions and 21 deletions
--- a/server/src/main/java/org/elasticsearch/index/snapshots/IndexShardSnapshotStatus.java
+++ b/server/src/main/java/org/elasticsearch/index/snapshots/IndexShardSnapshotStatus.java
@ -111,21 +111,19 @@ public class IndexShardSnapshotStatus {
        return asCopy();
    }

-    public synchronized Copy moveToDone(final long endTime) {
+    public synchronized void moveToDone(final long endTime) {
        if (stage.compareAndSet(Stage.FINALIZE, Stage.DONE)) {
            this.totalTime = Math.max(0L, endTime - startTime);
        } else {
            throw new IllegalStateException("Unable to move the shard snapshot status to [DONE]: " +
                "expecting [FINALIZE] but got [" + stage.get() + "]");
        }
-        return asCopy();
    }

-    public synchronized Copy abortIfNotCompleted(final String failure) {
+    public synchronized void abortIfNotCompleted(final String failure) {
        if (stage.compareAndSet(Stage.INIT, Stage.ABORTED) || stage.compareAndSet(Stage.STARTED, Stage.ABORTED)) {
            this.failure = failure;
        }
-        return asCopy();
    }

    public synchronized void moveToFailed(final long endTime, final String failure) {
--- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java
+++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java
@ -256,28 +256,14 @@ public class SnapshotShardsService extends AbstractLifecycleComponent implements
                Map<ShardId, IndexShardSnapshotStatus> snapshotShards = shardSnapshots.getOrDefault(snapshot, emptyMap());
                for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shard : entry.shards()) {
                    final IndexShardSnapshotStatus snapshotStatus = snapshotShards.get(shard.key);
-                    if (snapshotStatus != null) {
-                        final IndexShardSnapshotStatus.Copy lastSnapshotStatus =
-                            snapshotStatus.abortIfNotCompleted("snapshot has been aborted");
-                        final Stage stage = lastSnapshotStatus.getStage();
-                        if (stage == Stage.FINALIZE) {
-                            logger.debug("[{}] trying to cancel snapshot on shard [{}] that is finalizing, " +
-                                "letting it finish", snapshot, shard.key);
-                        } else if (stage == Stage.DONE) {
-                            logger.debug("[{}] trying to cancel snapshot on the shard [{}] that is already done, " +
-                                "updating status on the master", snapshot, shard.key);
-                            notifySuccessfulSnapshotShard(snapshot, shard.key);
-                        } else if (stage == Stage.FAILURE) {
-                            logger.debug("[{}] trying to cancel snapshot on the shard [{}] that has already failed, " +
-                                "updating status on the master", snapshot, shard.key);
-                            notifyFailedSnapshotShard(snapshot, shard.key, lastSnapshotStatus.getFailure());
-                        }
-                    } else {
+                    if (snapshotStatus == null) {
                        // due to CS batching we might have missed the INIT state and straight went into ABORTED
                        // notify master that abort has completed by moving to FAILED
                        if (shard.value.state() == ShardState.ABORTED) {
                            notifyFailedSnapshotShard(snapshot, shard.key, shard.value.reason());
                        }
+                    } else {
+                        snapshotStatus.abortIfNotCompleted("snapshot has been aborted");
                    }
                }
            }