Fix stalled send translog ops request (#57859)
Currently, the translog ops request is reentrent when there is a mapping update. The impact of this is that a translog ops ends up waiting on the pre-existing listener and it is never completed. This commit fixes this by introducing a new code path to avoid the idempotency logic.
This commit is contained in:
parent
24a50eb3af
commit
8119b96517
|
@ -348,6 +348,14 @@ public class PeerRecoveryTargetService implements IndexEventListener {
|
|||
return;
|
||||
}
|
||||
|
||||
performTranslogOps(request, listener, recoveryRef);
|
||||
}
|
||||
}
|
||||
|
||||
private void performTranslogOps(final RecoveryTranslogOperationsRequest request, final ActionListener<Void> listener,
|
||||
final RecoveryRef recoveryRef) {
|
||||
final RecoveryTarget recoveryTarget = recoveryRef.target();
|
||||
|
||||
final ClusterStateObserver observer = new ClusterStateObserver(clusterService, null, logger, threadPool.getThreadContext());
|
||||
final Consumer<Exception> retryOnMappingException = exception -> {
|
||||
// in very rare cases a translog replay from primary is processed before a mapping update on this node
|
||||
|
@ -359,7 +367,9 @@ public class PeerRecoveryTargetService implements IndexEventListener {
|
|||
@Override
|
||||
public void onNewClusterState(ClusterState state) {
|
||||
try {
|
||||
messageReceived(request, channel, task);
|
||||
try (RecoveryRef recoveryRef = onGoingRecoveries.getRecoverySafe(request.recoveryId(), request.shardId())) {
|
||||
performTranslogOps(request, listener, recoveryRef);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
listener.onFailure(e);
|
||||
}
|
||||
|
@ -402,7 +412,6 @@ public class PeerRecoveryTargetService implements IndexEventListener {
|
|||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class FilesInfoRequestHandler implements TransportRequestHandler<RecoveryFilesInfoRequest> {
|
||||
|
||||
|
|
Loading…
Reference in New Issue