Add reason to global checkpoint updates on replica

Updating the global checkpoint on a replica can occur for a few
different reasons:
 - from inlined global checkpoint updates
 - from a primary term transition
 - from finalizing recovery

Yet, the trace logging for a global checkpoint update does not present
this information that can be useful when tracing test failures. This
commit adds a reason for the global checkpoint update on a replica so
that we can trace these updates.

Relates #25612
This commit is contained in:
Jason Tedor 2017-07-08 17:05:24 -04:00 committed by GitHub
parent fe04b666ea
commit cb3674c5ee
5 changed files with 16 additions and 12 deletions

View File

@ -49,7 +49,8 @@ public class GlobalCheckpointTracker extends AbstractIndexShardComponent {
* The global checkpoint tracker can operate in two modes:
* - primary: this shard is in charge of collecting local checkpoint information from all shard copies and computing the global
* checkpoint based on the local checkpoints of all in-sync shard copies.
* - replica: this shard receives global checkpoint information from the primary (see {@link #updateGlobalCheckpointOnReplica}).
* - replica: this shard receives global checkpoint information from the primary (see
* {@link #updateGlobalCheckpointOnReplica(long, String)}).
*
* When a shard is initialized (be it a primary or replica), it initially operates in replica mode. The global checkpoint tracker is
* then switched to primary mode in the following three scenarios:
@ -245,8 +246,9 @@ public class GlobalCheckpointTracker extends AbstractIndexShardComponent {
* Updates the global checkpoint on a replica shard after it has been updated by the primary.
*
* @param globalCheckpoint the global checkpoint
* @param reason the reason the global checkpoint was updated
*/
public synchronized void updateGlobalCheckpointOnReplica(final long globalCheckpoint) {
public synchronized void updateGlobalCheckpointOnReplica(final long globalCheckpoint, final String reason) {
assert invariant();
assert primaryMode == false;
/*
@ -255,8 +257,8 @@ public class GlobalCheckpointTracker extends AbstractIndexShardComponent {
* replica shards). In these cases, the local knowledge of the global checkpoint could be higher than sync from the lagging primary.
*/
if (this.globalCheckpoint <= globalCheckpoint) {
logger.trace("updating global checkpoint from [{}] to [{}] due to [{}]", this.globalCheckpoint, globalCheckpoint, reason);
this.globalCheckpoint = globalCheckpoint;
logger.trace("global checkpoint updated from primary to [{}]", globalCheckpoint);
}
assert invariant();
}

View File

@ -182,9 +182,10 @@ public class SequenceNumbersService extends AbstractIndexShardComponent {
* Updates the global checkpoint on a replica shard after it has been updated by the primary.
*
* @param globalCheckpoint the global checkpoint
* @param reason the reason the global checkpoint was updated
*/
public void updateGlobalCheckpointOnReplica(final long globalCheckpoint) {
globalCheckpointTracker.updateGlobalCheckpointOnReplica(globalCheckpoint);
public void updateGlobalCheckpointOnReplica(final long globalCheckpoint, final String reason) {
globalCheckpointTracker.updateGlobalCheckpointOnReplica(globalCheckpoint, reason);
}
/**

View File

@ -1715,8 +1715,9 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
* Updates the global checkpoint on a replica shard after it has been updated by the primary.
*
* @param globalCheckpoint the global checkpoint
* @param reason the reason the global checkpoint was updated
*/
public void updateGlobalCheckpointOnReplica(final long globalCheckpoint) {
public void updateGlobalCheckpointOnReplica(final long globalCheckpoint, final String reason) {
verifyReplicationTarget();
final SequenceNumbersService seqNoService = getEngine().seqNoService();
final long localCheckpoint = seqNoService.getLocalCheckpoint();
@ -1733,7 +1734,7 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
*/
return;
}
seqNoService.updateGlobalCheckpointOnReplica(globalCheckpoint);
seqNoService.updateGlobalCheckpointOnReplica(globalCheckpoint, reason);
}
/**
@ -2099,7 +2100,7 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
assert operationPrimaryTerm > primaryTerm :
"shard term already update. op term [" + operationPrimaryTerm + "], shardTerm [" + primaryTerm + "]";
primaryTerm = operationPrimaryTerm;
updateGlobalCheckpointOnReplica(globalCheckpoint);
updateGlobalCheckpointOnReplica(globalCheckpoint, "primary term transition");
final long currentGlobalCheckpoint = getGlobalCheckpoint();
final long localCheckpoint;
if (currentGlobalCheckpoint == SequenceNumbersService.UNASSIGNED_SEQ_NO) {
@ -2146,7 +2147,7 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
} else {
if (globalCheckpointUpdated == false) {
try {
updateGlobalCheckpointOnReplica(globalCheckpoint);
updateGlobalCheckpointOnReplica(globalCheckpoint, "operation");
} catch (Exception e) {
releasable.close();
onPermitAcquired.onFailure(e);

View File

@ -368,7 +368,7 @@ public class RecoveryTarget extends AbstractRefCounted implements RecoveryTarget
@Override
public void finalizeRecovery(final long globalCheckpoint) {
indexShard().updateGlobalCheckpointOnReplica(globalCheckpoint);
indexShard().updateGlobalCheckpointOnReplica(globalCheckpoint, "finalizing recovery");
final IndexShard indexShard = indexShard();
indexShard.finalizeRecovery();
}

View File

@ -709,7 +709,7 @@ public class IndexShardTests extends IndexShardTestCase {
randomIntBetween(
Math.toIntExact(SequenceNumbersService.UNASSIGNED_SEQ_NO),
Math.toIntExact(indexShard.getLocalCheckpoint()));
indexShard.updateGlobalCheckpointOnReplica(globalCheckpointOnReplica);
indexShard.updateGlobalCheckpointOnReplica(globalCheckpointOnReplica, "test");
final int globalCheckpoint =
randomIntBetween(
@ -764,7 +764,7 @@ public class IndexShardTests extends IndexShardTestCase {
randomIntBetween(
Math.toIntExact(SequenceNumbersService.UNASSIGNED_SEQ_NO),
Math.toIntExact(indexShard.getLocalCheckpoint()));
indexShard.updateGlobalCheckpointOnReplica(globalCheckpointOnReplica);
indexShard.updateGlobalCheckpointOnReplica(globalCheckpointOnReplica, "test");
final int globalCheckpoint =
randomIntBetween(