Fix Test Failure in testCorrectCountsForDoneShards (#60254) (#60286)

* Fix Test Failure in testCorrectCountsForDoneShards

Fixing the freak edge case where the node shard status request returns before
the node was able to send the state update request to master and update the cluster state.
Without this change, the snapshot shard status would report as `DONE` once the data node
has finished updating the shard in the cluster state.
If the data node then drops out of the cluster before the state has been updated, then
the status will jump to "FAILURE" because the master updates the state once the data node
leaves the cluster.

Closes #60247
This commit is contained in:
Armin Braun 2020-07-28 15:46:18 +02:00 committed by GitHub
parent 92ce41cfaf
commit 9222070f22
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 11 additions and 1 deletions

View File

@ -31,7 +31,7 @@ public enum SnapshotIndexShardStage {
*/ */
STARTED((byte)1, false), STARTED((byte)1, false),
/** /**
* Snapshot metadata is being written * Snapshot metadata is being written or this shard's status in the cluster state is being updated
*/ */
FINALIZE((byte)2, false), FINALIZE((byte)2, false),
/** /**

View File

@ -176,6 +176,16 @@ public class TransportSnapshotsStatusAction extends TransportMasterNodeAction<Sn
SnapshotIndexShardStatus shardStatus = shardStatues.get(shardEntry.key); SnapshotIndexShardStatus shardStatus = shardStatues.get(shardEntry.key);
if (shardStatus != null) { if (shardStatus != null) {
// We have full information about this shard // We have full information about this shard
if (shardStatus.getStage() == SnapshotIndexShardStage.DONE
&& shardEntry.value.state() != SnapshotsInProgress.ShardState.SUCCESS) {
// Unlikely edge case:
// Data node has finished snapshotting the shard but the cluster state has not yet been updated
// to reflect this. We adjust the status to show up as snapshot metadata being written because
// technically if the data node failed before successfully reporting DONE state to master, then
// this shards state would jump to a failed state.
shardStatus = new SnapshotIndexShardStatus(shardEntry.key, SnapshotIndexShardStage.FINALIZE,
shardStatus.getStats(), shardStatus.getNodeId(), shardStatus.getFailure());
}
shardStatusBuilder.add(shardStatus); shardStatusBuilder.add(shardStatus);
continue; continue;
} }