Remove Duplicate Shard Snapshot State Updates (#46862) (#46906)

We were repeatedly trying to send shard state updates for aborted
snapshots on every cluster state update.
This is simply dead-code since those updates are already safely
sent in the callbacks passed to `SnapshotShardsService#snapshot`.
On master failover, we ensure that the status update is resent
via `SnapshotShardsService#syncShardStatsOnNewMaster`.
=> there is no need for trying to send updates here over and over
and this logic can safely be removed
This commit is contained in:
Armin Braun 2019-09-20 14:30:03 +02:00 committed by GitHub
parent 4a2cb05162
commit 938648fcff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 5 additions and 21 deletions

View File

@ -111,21 +111,19 @@ public class IndexShardSnapshotStatus {
return asCopy();
}
public synchronized Copy moveToDone(final long endTime) {
public synchronized void moveToDone(final long endTime) {
if (stage.compareAndSet(Stage.FINALIZE, Stage.DONE)) {
this.totalTime = Math.max(0L, endTime - startTime);
} else {
throw new IllegalStateException("Unable to move the shard snapshot status to [DONE]: " +
"expecting [FINALIZE] but got [" + stage.get() + "]");
}
return asCopy();
}
public synchronized Copy abortIfNotCompleted(final String failure) {
public synchronized void abortIfNotCompleted(final String failure) {
if (stage.compareAndSet(Stage.INIT, Stage.ABORTED) || stage.compareAndSet(Stage.STARTED, Stage.ABORTED)) {
this.failure = failure;
}
return asCopy();
}
public synchronized void moveToFailed(final long endTime, final String failure) {

View File

@ -256,28 +256,14 @@ public class SnapshotShardsService extends AbstractLifecycleComponent implements
Map<ShardId, IndexShardSnapshotStatus> snapshotShards = shardSnapshots.getOrDefault(snapshot, emptyMap());
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shard : entry.shards()) {
final IndexShardSnapshotStatus snapshotStatus = snapshotShards.get(shard.key);
if (snapshotStatus != null) {
final IndexShardSnapshotStatus.Copy lastSnapshotStatus =
snapshotStatus.abortIfNotCompleted("snapshot has been aborted");
final Stage stage = lastSnapshotStatus.getStage();
if (stage == Stage.FINALIZE) {
logger.debug("[{}] trying to cancel snapshot on shard [{}] that is finalizing, " +
"letting it finish", snapshot, shard.key);
} else if (stage == Stage.DONE) {
logger.debug("[{}] trying to cancel snapshot on the shard [{}] that is already done, " +
"updating status on the master", snapshot, shard.key);
notifySuccessfulSnapshotShard(snapshot, shard.key);
} else if (stage == Stage.FAILURE) {
logger.debug("[{}] trying to cancel snapshot on the shard [{}] that has already failed, " +
"updating status on the master", snapshot, shard.key);
notifyFailedSnapshotShard(snapshot, shard.key, lastSnapshotStatus.getFailure());
}
} else {
if (snapshotStatus == null) {
// due to CS batching we might have missed the INIT state and straight went into ABORTED
// notify master that abort has completed by moving to FAILED
if (shard.value.state() == ShardState.ABORTED) {
notifyFailedSnapshotShard(snapshot, shard.key, shard.value.reason());
}
} else {
snapshotStatus.abortIfNotCompleted("snapshot has been aborted");
}
}
}