Fix stalled send translog ops request (#57859)

Currently, the translog ops request is reentrent when there is a mapping
update. The impact of this is that a translog ops ends up waiting on the
pre-existing listener and it is never completed. This commit fixes this
by introducing a new code path to avoid the idempotency logic.
This commit is contained in:
Tim Brooks 2020-06-09 09:06:59 -06:00
parent 24a50eb3af
commit 8119b96517
No known key found for this signature in database
GPG Key ID: C2AA3BB91A889E77
1 changed files with 61 additions and 52 deletions

View File

@ -348,60 +348,69 @@ public class PeerRecoveryTargetService implements IndexEventListener {
return; return;
} }
final ClusterStateObserver observer = new ClusterStateObserver(clusterService, null, logger, threadPool.getThreadContext()); performTranslogOps(request, listener, recoveryRef);
final Consumer<Exception> retryOnMappingException = exception -> {
// in very rare cases a translog replay from primary is processed before a mapping update on this node
// which causes local mapping changes since the mapping (clusterstate) might not have arrived on this node.
logger.debug("delaying recovery due to missing mapping changes", exception);
// we do not need to use a timeout here since the entire recovery mechanism has an inactivity protection (it will be
// canceled)
observer.waitForNextChange(new ClusterStateObserver.Listener() {
@Override
public void onNewClusterState(ClusterState state) {
try {
messageReceived(request, channel, task);
} catch (Exception e) {
listener.onFailure(e);
}
}
@Override
public void onClusterServiceClose() {
listener.onFailure(new ElasticsearchException(
"cluster service was closed while waiting for mapping updates"));
}
@Override
public void onTimeout(TimeValue timeout) {
// note that we do not use a timeout (see comment above)
listener.onFailure(new ElasticsearchTimeoutException("timed out waiting for mapping updates " +
"(timeout [" + timeout + "])"));
}
});
};
final IndexMetadata indexMetadata = clusterService.state().metadata().index(request.shardId().getIndex());
final long mappingVersionOnTarget = indexMetadata != null ? indexMetadata.getMappingVersion() : 0L;
recoveryTarget.indexTranslogOperations(
request.operations(),
request.totalTranslogOps(),
request.maxSeenAutoIdTimestampOnPrimary(),
request.maxSeqNoOfUpdatesOrDeletesOnPrimary(),
request.retentionLeases(),
request.mappingVersionOnPrimary(),
ActionListener.wrap(
checkpoint -> listener.onResponse(null),
e -> {
// do not retry if the mapping on replica is at least as recent as the mapping
// that the primary used to index the operations in the request.
if (mappingVersionOnTarget < request.mappingVersionOnPrimary() && e instanceof MapperException) {
retryOnMappingException.accept(e);
} else {
listener.onFailure(e);
}
})
);
} }
} }
private void performTranslogOps(final RecoveryTranslogOperationsRequest request, final ActionListener<Void> listener,
final RecoveryRef recoveryRef) {
final RecoveryTarget recoveryTarget = recoveryRef.target();
final ClusterStateObserver observer = new ClusterStateObserver(clusterService, null, logger, threadPool.getThreadContext());
final Consumer<Exception> retryOnMappingException = exception -> {
// in very rare cases a translog replay from primary is processed before a mapping update on this node
// which causes local mapping changes since the mapping (clusterstate) might not have arrived on this node.
logger.debug("delaying recovery due to missing mapping changes", exception);
// we do not need to use a timeout here since the entire recovery mechanism has an inactivity protection (it will be
// canceled)
observer.waitForNextChange(new ClusterStateObserver.Listener() {
@Override
public void onNewClusterState(ClusterState state) {
try {
try (RecoveryRef recoveryRef = onGoingRecoveries.getRecoverySafe(request.recoveryId(), request.shardId())) {
performTranslogOps(request, listener, recoveryRef);
}
} catch (Exception e) {
listener.onFailure(e);
}
}
@Override
public void onClusterServiceClose() {
listener.onFailure(new ElasticsearchException(
"cluster service was closed while waiting for mapping updates"));
}
@Override
public void onTimeout(TimeValue timeout) {
// note that we do not use a timeout (see comment above)
listener.onFailure(new ElasticsearchTimeoutException("timed out waiting for mapping updates " +
"(timeout [" + timeout + "])"));
}
});
};
final IndexMetadata indexMetadata = clusterService.state().metadata().index(request.shardId().getIndex());
final long mappingVersionOnTarget = indexMetadata != null ? indexMetadata.getMappingVersion() : 0L;
recoveryTarget.indexTranslogOperations(
request.operations(),
request.totalTranslogOps(),
request.maxSeenAutoIdTimestampOnPrimary(),
request.maxSeqNoOfUpdatesOrDeletesOnPrimary(),
request.retentionLeases(),
request.mappingVersionOnPrimary(),
ActionListener.wrap(
checkpoint -> listener.onResponse(null),
e -> {
// do not retry if the mapping on replica is at least as recent as the mapping
// that the primary used to index the operations in the request.
if (mappingVersionOnTarget < request.mappingVersionOnPrimary() && e instanceof MapperException) {
retryOnMappingException.accept(e);
} else {
listener.onFailure(e);
}
})
);
}
} }
class FilesInfoRequestHandler implements TransportRequestHandler<RecoveryFilesInfoRequest> { class FilesInfoRequestHandler implements TransportRequestHandler<RecoveryFilesInfoRequest> {