Fix stalled send translog ops request (#57859)

Currently, the translog ops request is reentrent when there is a mapping
update. The impact of this is that a translog ops ends up waiting on the
pre-existing listener and it is never completed. This commit fixes this
by introducing a new code path to avoid the idempotency logic.
This commit is contained in:
Tim Brooks 2020-06-09 09:06:59 -06:00
parent 24a50eb3af
commit 8119b96517
No known key found for this signature in database
GPG Key ID: C2AA3BB91A889E77
1 changed files with 61 additions and 52 deletions

View File

@ -348,6 +348,14 @@ public class PeerRecoveryTargetService implements IndexEventListener {
return;
}
performTranslogOps(request, listener, recoveryRef);
}
}
private void performTranslogOps(final RecoveryTranslogOperationsRequest request, final ActionListener<Void> listener,
final RecoveryRef recoveryRef) {
final RecoveryTarget recoveryTarget = recoveryRef.target();
final ClusterStateObserver observer = new ClusterStateObserver(clusterService, null, logger, threadPool.getThreadContext());
final Consumer<Exception> retryOnMappingException = exception -> {
// in very rare cases a translog replay from primary is processed before a mapping update on this node
@ -359,7 +367,9 @@ public class PeerRecoveryTargetService implements IndexEventListener {
@Override
public void onNewClusterState(ClusterState state) {
try {
messageReceived(request, channel, task);
try (RecoveryRef recoveryRef = onGoingRecoveries.getRecoverySafe(request.recoveryId(), request.shardId())) {
performTranslogOps(request, listener, recoveryRef);
}
} catch (Exception e) {
listener.onFailure(e);
}
@ -402,7 +412,6 @@ public class PeerRecoveryTargetService implements IndexEventListener {
);
}
}
}
class FilesInfoRequestHandler implements TransportRequestHandler<RecoveryFilesInfoRequest> {