Avoiding a number of noop updates that were observed to cause trouble (as in needless noop CS publishing) which can become an issue when working with a large number of concurrent snapshot operations. Also this sets up some simplifications made in the clone snapshot branch.
This commit is contained in:
parent
c8981ea93d
commit
7b941a18e9
|
@ -213,6 +213,34 @@ public class SnapshotsInProgress extends AbstractNamedDiffable<Custom> implement
|
||||||
failure, userMetadata, version);
|
failure, userMetadata, version);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new instance that has its shard assignments replaced by the given shard assignment map.
|
||||||
|
* If the given shard assignments show all shard snapshots in a completed state then the returned instance will be of state
|
||||||
|
* {@link State#SUCCESS}, otherwise the state remains unchanged.
|
||||||
|
*
|
||||||
|
* @param shards new shard snapshot states
|
||||||
|
* @return new snapshot entry
|
||||||
|
*/
|
||||||
|
public Entry withShardStates(ImmutableOpenMap<ShardId, ShardSnapshotStatus> shards) {
|
||||||
|
if (completed(shards.values())) {
|
||||||
|
return new Entry(snapshot, includeGlobalState, partial, State.SUCCESS, indices, dataStreams, startTime, repositoryStateId,
|
||||||
|
shards, failure, userMetadata, version);
|
||||||
|
}
|
||||||
|
return withStartedShards(shards);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Same as {@link #withShardStates} but does not check if the snapshot completed and thus is only to be used when starting new
|
||||||
|
* shard snapshots on data nodes for a running snapshot.
|
||||||
|
*/
|
||||||
|
public Entry withStartedShards(ImmutableOpenMap<ShardId, ShardSnapshotStatus> shards) {
|
||||||
|
final SnapshotsInProgress.Entry updated = new Entry(snapshot, includeGlobalState, partial, state, indices, dataStreams,
|
||||||
|
startTime, repositoryStateId, shards, failure, userMetadata, version);
|
||||||
|
assert updated.state().completed() == false && completed(updated.shards().values()) == false
|
||||||
|
: "Only running snapshots allowed but saw [" + updated + "]";
|
||||||
|
return updated;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String repository() {
|
public String repository() {
|
||||||
return snapshot.getRepository();
|
return snapshot.getRepository();
|
||||||
|
|
|
@ -2458,63 +2458,77 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
|
||||||
public ClusterTasksResult<UpdateIndexShardSnapshotStatusRequest>
|
public ClusterTasksResult<UpdateIndexShardSnapshotStatusRequest>
|
||||||
execute(ClusterState currentState, List<UpdateIndexShardSnapshotStatusRequest> tasks) {
|
execute(ClusterState currentState, List<UpdateIndexShardSnapshotStatusRequest> tasks) {
|
||||||
int changedCount = 0;
|
int changedCount = 0;
|
||||||
|
int startedCount = 0;
|
||||||
final List<SnapshotsInProgress.Entry> entries = new ArrayList<>();
|
final List<SnapshotsInProgress.Entry> entries = new ArrayList<>();
|
||||||
final Map<String, Set<ShardId>> reusedShardIdsByRepo = new HashMap<>();
|
// Tasks to check for updates for running snapshots.
|
||||||
|
final List<UpdateIndexShardSnapshotStatusRequest> unconsumedTasks = new ArrayList<>(tasks);
|
||||||
|
// Tasks that were used to complete an existing in-progress shard snapshot
|
||||||
|
final Set<UpdateIndexShardSnapshotStatusRequest> executedTasks = new HashSet<>();
|
||||||
for (SnapshotsInProgress.Entry entry : currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY).entries()) {
|
for (SnapshotsInProgress.Entry entry : currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY).entries()) {
|
||||||
ImmutableOpenMap.Builder<ShardId, ShardSnapshotStatus> shards = ImmutableOpenMap.builder();
|
if (entry.state().completed()) {
|
||||||
boolean updated = false;
|
entries.add(entry);
|
||||||
|
continue;
|
||||||
for (UpdateIndexShardSnapshotStatusRequest updateSnapshotState : tasks) {
|
}
|
||||||
|
ImmutableOpenMap.Builder<ShardId, ShardSnapshotStatus> shards = null;
|
||||||
|
for (Iterator<UpdateIndexShardSnapshotStatusRequest> iterator = unconsumedTasks.iterator(); iterator.hasNext(); ) {
|
||||||
|
final UpdateIndexShardSnapshotStatusRequest updateSnapshotState = iterator.next();
|
||||||
|
final Snapshot updatedSnapshot = updateSnapshotState.snapshot();
|
||||||
|
final String updatedRepository = updatedSnapshot.getRepository();
|
||||||
|
if (entry.repository().equals(updatedRepository) == false) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
final ShardId finishedShardId = updateSnapshotState.shardId();
|
final ShardId finishedShardId = updateSnapshotState.shardId();
|
||||||
if (entry.snapshot().equals(updateSnapshotState.snapshot())) {
|
if (entry.snapshot().getSnapshotId().equals(updatedSnapshot.getSnapshotId())) {
|
||||||
logger.trace("[{}] Updating shard [{}] with status [{}]", updateSnapshotState.snapshot(),
|
final ShardSnapshotStatus existing = entry.shards().get(finishedShardId);
|
||||||
|
if (existing == null) {
|
||||||
|
logger.warn("Received shard snapshot status update [{}] but this shard is not tracked in [{}]",
|
||||||
|
updateSnapshotState, entry);
|
||||||
|
assert false : "This should never happen, data nodes should only send updates for expected shards";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (existing.state().completed()) {
|
||||||
|
// No point in doing noop updates that might happen if data nodes resends shard status after a disconnect.
|
||||||
|
iterator.remove();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
logger.trace("[{}] Updating shard [{}] with status [{}]", updatedSnapshot,
|
||||||
finishedShardId, updateSnapshotState.status().state());
|
finishedShardId, updateSnapshotState.status().state());
|
||||||
if (updated == false) {
|
if (shards == null) {
|
||||||
shards.putAll(entry.shards());
|
shards = ImmutableOpenMap.builder(entry.shards());
|
||||||
updated = true;
|
|
||||||
}
|
}
|
||||||
shards.put(finishedShardId, updateSnapshotState.status());
|
shards.put(finishedShardId, updateSnapshotState.status());
|
||||||
|
executedTasks.add(updateSnapshotState);
|
||||||
changedCount++;
|
changedCount++;
|
||||||
} else {
|
} else if (executedTasks.contains(updateSnapshotState)) {
|
||||||
final String updatedRepository = updateSnapshotState.snapshot().getRepository();
|
// tasks that completed a shard might allow starting a new shard snapshot for the current snapshot
|
||||||
final Set<ShardId> reusedShardIds = reusedShardIdsByRepo.computeIfAbsent(updatedRepository, k -> new HashSet<>());
|
final ShardSnapshotStatus existingStatus = entry.shards().get(finishedShardId);
|
||||||
if (entry.state().completed() == false && entry.repository().equals(updatedRepository)
|
if (existingStatus == null || existingStatus.state() != ShardState.QUEUED) {
|
||||||
&& reusedShardIds.contains(finishedShardId) == false) {
|
continue;
|
||||||
final ShardSnapshotStatus existingStatus = entry.shards().get(finishedShardId);
|
|
||||||
if (existingStatus == null || existingStatus.state() != ShardState.QUEUED) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (updated == false) {
|
|
||||||
shards.putAll(entry.shards());
|
|
||||||
updated = true;
|
|
||||||
}
|
|
||||||
final ShardSnapshotStatus finishedStatus = updateSnapshotState.status();
|
|
||||||
logger.trace("Starting [{}] on [{}] with generation [{}]", finishedShardId,
|
|
||||||
finishedStatus.nodeId(), finishedStatus.generation());
|
|
||||||
shards.put(finishedShardId, new ShardSnapshotStatus(finishedStatus.nodeId(), finishedStatus.generation()));
|
|
||||||
reusedShardIds.add(finishedShardId);
|
|
||||||
}
|
}
|
||||||
|
if (shards == null) {
|
||||||
|
shards = ImmutableOpenMap.builder(entry.shards());
|
||||||
|
}
|
||||||
|
final ShardSnapshotStatus finishedStatus = updateSnapshotState.status();
|
||||||
|
logger.trace("Starting [{}] on [{}] with generation [{}]", finishedShardId,
|
||||||
|
finishedStatus.nodeId(), finishedStatus.generation());
|
||||||
|
shards.put(finishedShardId, new ShardSnapshotStatus(finishedStatus.nodeId(), finishedStatus.generation()));
|
||||||
|
iterator.remove();
|
||||||
|
startedCount++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (updated) {
|
if (shards == null) {
|
||||||
if (completed(shards.values()) == false) {
|
|
||||||
entries.add(new SnapshotsInProgress.Entry(entry, shards.build()));
|
|
||||||
} else {
|
|
||||||
// Snapshot is finished - mark it as done
|
|
||||||
// TODO: Add PARTIAL_SUCCESS status?
|
|
||||||
SnapshotsInProgress.Entry updatedEntry = new SnapshotsInProgress.Entry(entry, State.SUCCESS, shards.build());
|
|
||||||
entries.add(updatedEntry);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
entries.add(entry);
|
entries.add(entry);
|
||||||
|
} else {
|
||||||
|
entries.add(entry.withShardStates(shards.build()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (changedCount > 0) {
|
if (changedCount > 0) {
|
||||||
logger.trace("changed cluster state triggered by {} snapshot state updates", changedCount);
|
logger.trace("changed cluster state triggered by [{}] snapshot state updates and resulted in starting " +
|
||||||
|
"[{}] shard snapshots", changedCount, startedCount);
|
||||||
return ClusterTasksResult.<UpdateIndexShardSnapshotStatusRequest>builder().successes(tasks)
|
return ClusterTasksResult.<UpdateIndexShardSnapshotStatusRequest>builder().successes(tasks)
|
||||||
.build(ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE,
|
.build(ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE,
|
||||||
SnapshotsInProgress.of(unmodifiableList(entries))).build());
|
SnapshotsInProgress.of(entries)).build());
|
||||||
}
|
}
|
||||||
return ClusterTasksResult.<UpdateIndexShardSnapshotStatusRequest>builder().successes(tasks).build(currentState);
|
return ClusterTasksResult.<UpdateIndexShardSnapshotStatusRequest>builder().successes(tasks).build(currentState);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue