We were repeatedly trying to send shard state updates for aborted snapshots on every cluster state update. This is simply dead-code since those updates are already safely sent in the callbacks passed to `SnapshotShardsService#snapshot`. On master failover, we ensure that the status update is resent via `SnapshotShardsService#syncShardStatsOnNewMaster`. => there is no need for trying to send updates here over and over and this logic can safely be removed
This commit is contained in:
parent
4a2cb05162
commit
938648fcff
|
@ -111,21 +111,19 @@ public class IndexShardSnapshotStatus {
|
|||
return asCopy();
|
||||
}
|
||||
|
||||
public synchronized Copy moveToDone(final long endTime) {
|
||||
public synchronized void moveToDone(final long endTime) {
|
||||
if (stage.compareAndSet(Stage.FINALIZE, Stage.DONE)) {
|
||||
this.totalTime = Math.max(0L, endTime - startTime);
|
||||
} else {
|
||||
throw new IllegalStateException("Unable to move the shard snapshot status to [DONE]: " +
|
||||
"expecting [FINALIZE] but got [" + stage.get() + "]");
|
||||
}
|
||||
return asCopy();
|
||||
}
|
||||
|
||||
public synchronized Copy abortIfNotCompleted(final String failure) {
|
||||
public synchronized void abortIfNotCompleted(final String failure) {
|
||||
if (stage.compareAndSet(Stage.INIT, Stage.ABORTED) || stage.compareAndSet(Stage.STARTED, Stage.ABORTED)) {
|
||||
this.failure = failure;
|
||||
}
|
||||
return asCopy();
|
||||
}
|
||||
|
||||
public synchronized void moveToFailed(final long endTime, final String failure) {
|
||||
|
|
|
@ -256,28 +256,14 @@ public class SnapshotShardsService extends AbstractLifecycleComponent implements
|
|||
Map<ShardId, IndexShardSnapshotStatus> snapshotShards = shardSnapshots.getOrDefault(snapshot, emptyMap());
|
||||
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shard : entry.shards()) {
|
||||
final IndexShardSnapshotStatus snapshotStatus = snapshotShards.get(shard.key);
|
||||
if (snapshotStatus != null) {
|
||||
final IndexShardSnapshotStatus.Copy lastSnapshotStatus =
|
||||
snapshotStatus.abortIfNotCompleted("snapshot has been aborted");
|
||||
final Stage stage = lastSnapshotStatus.getStage();
|
||||
if (stage == Stage.FINALIZE) {
|
||||
logger.debug("[{}] trying to cancel snapshot on shard [{}] that is finalizing, " +
|
||||
"letting it finish", snapshot, shard.key);
|
||||
} else if (stage == Stage.DONE) {
|
||||
logger.debug("[{}] trying to cancel snapshot on the shard [{}] that is already done, " +
|
||||
"updating status on the master", snapshot, shard.key);
|
||||
notifySuccessfulSnapshotShard(snapshot, shard.key);
|
||||
} else if (stage == Stage.FAILURE) {
|
||||
logger.debug("[{}] trying to cancel snapshot on the shard [{}] that has already failed, " +
|
||||
"updating status on the master", snapshot, shard.key);
|
||||
notifyFailedSnapshotShard(snapshot, shard.key, lastSnapshotStatus.getFailure());
|
||||
}
|
||||
} else {
|
||||
if (snapshotStatus == null) {
|
||||
// due to CS batching we might have missed the INIT state and straight went into ABORTED
|
||||
// notify master that abort has completed by moving to FAILED
|
||||
if (shard.value.state() == ShardState.ABORTED) {
|
||||
notifyFailedSnapshotShard(snapshot, shard.key, shard.value.reason());
|
||||
}
|
||||
} else {
|
||||
snapshotStatus.abortIfNotCompleted("snapshot has been aborted");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue