Fix Remote Recovery Being Retried for Removed Nodes (#57608) (#57913)

If a node is disconnected we retry. It does not make sense
to retry the recovery if the node is removed from the cluster though.
=> added a CS listener that cancels the recovery for removed nodes

Also, we were running the retry on the `SAME` pool which for each retry will
be the scheduler pool. Since the error path of the listener we use here
will do blocking operations when closing the resources used by the recovery
we can't use the `SAME` pool here since not all exceptions go to the `ActionListenerResponseHandler`
threading like e.g. `NodeNotConnectedException`.

Closes #57585
This commit is contained in:
Armin Braun 2020-06-10 09:41:52 +02:00 committed by GitHub
parent d579420452
commit fe85bdbe6f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 71 additions and 18 deletions

View File

@ -331,6 +331,10 @@ public class IndicesService extends AbstractLifecycleComponent
private static final String DANGLING_INDICES_UPDATE_THREAD_NAME = "DanglingIndices#updateTask"; private static final String DANGLING_INDICES_UPDATE_THREAD_NAME = "DanglingIndices#updateTask";
public ClusterService clusterService() {
return clusterService;
}
@Override @Override
protected void doStop() { protected void doStop() {
ThreadPool.terminate(danglingIndicesThreadPoolExecutor, 10, TimeUnit.SECONDS); ThreadPool.terminate(danglingIndicesThreadPoolExecutor, 10, TimeUnit.SECONDS);

View File

@ -26,8 +26,12 @@ import org.elasticsearch.ResourceNotFoundException;
import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.support.ChannelActionListener; import org.elasticsearch.action.support.ChannelActionListener;
import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.action.support.PlainActionFuture;
import org.elasticsearch.cluster.ClusterChangedEvent;
import org.elasticsearch.cluster.ClusterStateListener;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.component.AbstractLifecycleComponent; import org.elasticsearch.common.component.AbstractLifecycleComponent;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
@ -44,17 +48,18 @@ import org.elasticsearch.transport.TransportRequestHandler;
import org.elasticsearch.transport.TransportService; import org.elasticsearch.transport.TransportService;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set;
/** /**
* The source recovery accepts recovery requests from other peer shards and start the recovery process from this * The source recovery accepts recovery requests from other peer shards and start the recovery process from this
* source shard to the target shard. * source shard to the target shard.
*/ */
public class PeerRecoverySourceService extends AbstractLifecycleComponent implements IndexEventListener { public class PeerRecoverySourceService extends AbstractLifecycleComponent implements IndexEventListener, ClusterStateListener {
private static final Logger logger = LogManager.getLogger(PeerRecoverySourceService.class); private static final Logger logger = LogManager.getLogger(PeerRecoverySourceService.class);
@ -88,11 +93,13 @@ public class PeerRecoverySourceService extends AbstractLifecycleComponent implem
@Override @Override
protected void doStart() { protected void doStart() {
indicesService.clusterService().addListener(this);
} }
@Override @Override
protected void doStop() { protected void doStop() {
ongoingRecoveries.awaitEmpty(); ongoingRecoveries.awaitEmpty();
indicesService.clusterService().removeListener(this);
} }
@Override @Override
@ -107,6 +114,15 @@ public class PeerRecoverySourceService extends AbstractLifecycleComponent implem
} }
} }
@Override
public void clusterChanged(ClusterChangedEvent event) {
if (event.nodesRemoved()) {
for (DiscoveryNode removedNode : event.nodesDelta().removedNodes()) {
ongoingRecoveries.cancelOnNodeLeft(removedNode);
}
}
}
private void recover(StartRecoveryRequest request, ActionListener<RecoveryResponse> listener) { private void recover(StartRecoveryRequest request, ActionListener<RecoveryResponse> listener) {
final IndexService indexService = indicesService.indexServiceSafe(request.shardId().getIndex()); final IndexService indexService = indicesService.indexServiceSafe(request.shardId().getIndex());
final IndexShard shard = indexService.getShard(request.shardId().id()); final IndexShard shard = indexService.getShard(request.shardId().id());
@ -162,15 +178,28 @@ public class PeerRecoverySourceService extends AbstractLifecycleComponent implem
private final Map<IndexShard, ShardRecoveryContext> ongoingRecoveries = new HashMap<>(); private final Map<IndexShard, ShardRecoveryContext> ongoingRecoveries = new HashMap<>();
private final Map<DiscoveryNode, Collection<RemoteRecoveryTargetHandler>> nodeToHandlers = new HashMap<>();
@Nullable @Nullable
private List<ActionListener<Void>> emptyListeners; private List<ActionListener<Void>> emptyListeners;
synchronized RecoverySourceHandler addNewRecovery(StartRecoveryRequest request, IndexShard shard) { synchronized RecoverySourceHandler addNewRecovery(StartRecoveryRequest request, IndexShard shard) {
assert lifecycle.started(); assert lifecycle.started();
final ShardRecoveryContext shardContext = ongoingRecoveries.computeIfAbsent(shard, s -> new ShardRecoveryContext()); final ShardRecoveryContext shardContext = ongoingRecoveries.computeIfAbsent(shard, s -> new ShardRecoveryContext());
RecoverySourceHandler handler = shardContext.addNewRecovery(request, shard); final Tuple<RecoverySourceHandler, RemoteRecoveryTargetHandler> handlers = shardContext.addNewRecovery(request, shard);
final RemoteRecoveryTargetHandler recoveryTargetHandler = handlers.v2();
nodeToHandlers.computeIfAbsent(recoveryTargetHandler.targetNode(), k -> new HashSet<>()).add(recoveryTargetHandler);
shard.recoveryStats().incCurrentAsSource(); shard.recoveryStats().incCurrentAsSource();
return handler; return handlers.v1();
}
synchronized void cancelOnNodeLeft(DiscoveryNode node) {
final Collection<RemoteRecoveryTargetHandler> handlers = nodeToHandlers.get(node);
if (handlers != null) {
for (RemoteRecoveryTargetHandler handler : handlers) {
handler.cancel();
}
}
} }
synchronized void reestablishRecovery(ReestablishRecoveryRequest request, IndexShard shard, synchronized void reestablishRecovery(ReestablishRecoveryRequest request, IndexShard shard,
@ -186,10 +215,20 @@ public class PeerRecoverySourceService extends AbstractLifecycleComponent implem
synchronized void remove(IndexShard shard, RecoverySourceHandler handler) { synchronized void remove(IndexShard shard, RecoverySourceHandler handler) {
final ShardRecoveryContext shardRecoveryContext = ongoingRecoveries.get(shard); final ShardRecoveryContext shardRecoveryContext = ongoingRecoveries.get(shard);
assert shardRecoveryContext != null : "Shard was not registered [" + shard + "]"; assert shardRecoveryContext != null : "Shard was not registered [" + shard + "]";
boolean remove = shardRecoveryContext.recoveryHandlers.remove(handler); final RemoteRecoveryTargetHandler removed = shardRecoveryContext.recoveryHandlers.remove(handler);
assert remove : "Handler was not registered [" + handler + "]"; assert removed != null : "Handler was not registered [" + handler + "]";
if (remove) { if (removed != null) {
shard.recoveryStats().decCurrentAsSource(); shard.recoveryStats().decCurrentAsSource();
removed.cancel();
assert nodeToHandlers.getOrDefault(removed.targetNode(), Collections.emptySet()).contains(removed)
: "Remote recovery was not properly tracked [" + removed + "]";
nodeToHandlers.computeIfPresent(removed.targetNode(), (k, handlersForNode) -> {
handlersForNode.remove(removed);
if (handlersForNode.isEmpty()) {
return null;
}
return handlersForNode;
});
} }
if (shardRecoveryContext.recoveryHandlers.isEmpty()) { if (shardRecoveryContext.recoveryHandlers.isEmpty()) {
ongoingRecoveries.remove(shard); ongoingRecoveries.remove(shard);
@ -207,7 +246,7 @@ public class PeerRecoverySourceService extends AbstractLifecycleComponent implem
final ShardRecoveryContext shardRecoveryContext = ongoingRecoveries.get(shard); final ShardRecoveryContext shardRecoveryContext = ongoingRecoveries.get(shard);
if (shardRecoveryContext != null) { if (shardRecoveryContext != null) {
final List<Exception> failures = new ArrayList<>(); final List<Exception> failures = new ArrayList<>();
for (RecoverySourceHandler handlers : shardRecoveryContext.recoveryHandlers) { for (RecoverySourceHandler handlers : shardRecoveryContext.recoveryHandlers.keySet()) {
try { try {
handlers.cancel(reason); handlers.cancel(reason);
} catch (Exception ex) { } catch (Exception ex) {
@ -237,21 +276,22 @@ public class PeerRecoverySourceService extends AbstractLifecycleComponent implem
} }
private final class ShardRecoveryContext { private final class ShardRecoveryContext {
final Set<RecoverySourceHandler> recoveryHandlers = new HashSet<>(); final Map<RecoverySourceHandler, RemoteRecoveryTargetHandler> recoveryHandlers = new HashMap<>();
/** /**
* Adds recovery source handler. * Adds recovery source handler.
*/ */
synchronized RecoverySourceHandler addNewRecovery(StartRecoveryRequest request, IndexShard shard) { synchronized Tuple<RecoverySourceHandler, RemoteRecoveryTargetHandler> addNewRecovery(StartRecoveryRequest request,
for (RecoverySourceHandler existingHandler : recoveryHandlers) { IndexShard shard) {
for (RecoverySourceHandler existingHandler : recoveryHandlers.keySet()) {
if (existingHandler.getRequest().targetAllocationId().equals(request.targetAllocationId())) { if (existingHandler.getRequest().targetAllocationId().equals(request.targetAllocationId())) {
throw new DelayRecoveryException("recovery with same target already registered, waiting for " + throw new DelayRecoveryException("recovery with same target already registered, waiting for " +
"previous recovery attempt to be cancelled or completed"); "previous recovery attempt to be cancelled or completed");
} }
} }
RecoverySourceHandler handler = createRecoverySourceHandler(request, shard); final Tuple<RecoverySourceHandler, RemoteRecoveryTargetHandler> handlers = createRecoverySourceHandler(request, shard);
recoveryHandlers.add(handler); recoveryHandlers.put(handlers.v1(), handlers.v2());
return handler; return handlers;
} }
/** /**
@ -259,7 +299,7 @@ public class PeerRecoverySourceService extends AbstractLifecycleComponent implem
*/ */
synchronized void reestablishRecovery(ReestablishRecoveryRequest request, ActionListener<RecoveryResponse> listener) { synchronized void reestablishRecovery(ReestablishRecoveryRequest request, ActionListener<RecoveryResponse> listener) {
RecoverySourceHandler handler = null; RecoverySourceHandler handler = null;
for (RecoverySourceHandler existingHandler : recoveryHandlers) { for (RecoverySourceHandler existingHandler : recoveryHandlers.keySet()) {
if (existingHandler.getRequest().recoveryId() == request.recoveryId() && if (existingHandler.getRequest().recoveryId() == request.recoveryId() &&
existingHandler.getRequest().targetAllocationId().equals(request.targetAllocationId())) { existingHandler.getRequest().targetAllocationId().equals(request.targetAllocationId())) {
handler = existingHandler; handler = existingHandler;
@ -273,14 +313,15 @@ public class PeerRecoverySourceService extends AbstractLifecycleComponent implem
handler.addListener(listener); handler.addListener(listener);
} }
private RecoverySourceHandler createRecoverySourceHandler(StartRecoveryRequest request, IndexShard shard) { private Tuple<RecoverySourceHandler, RemoteRecoveryTargetHandler> createRecoverySourceHandler(StartRecoveryRequest request,
IndexShard shard) {
RecoverySourceHandler handler; RecoverySourceHandler handler;
final RemoteRecoveryTargetHandler recoveryTarget = final RemoteRecoveryTargetHandler recoveryTarget =
new RemoteRecoveryTargetHandler(request.recoveryId(), request.shardId(), transportService, new RemoteRecoveryTargetHandler(request.recoveryId(), request.shardId(), transportService,
request.targetNode(), recoverySettings, throttleTime -> shard.recoveryStats().addThrottleTime(throttleTime)); request.targetNode(), recoverySettings, throttleTime -> shard.recoveryStats().addThrottleTime(throttleTime));
handler = new RecoverySourceHandler(shard, recoveryTarget, shard.getThreadPool(), request, handler = new RecoverySourceHandler(shard, recoveryTarget, shard.getThreadPool(), request,
Math.toIntExact(recoverySettings.getChunkSize().getBytes()), recoverySettings.getMaxConcurrentFileChunks()); Math.toIntExact(recoverySettings.getChunkSize().getBytes()), recoverySettings.getMaxConcurrentFileChunks());
return handler; return Tuple.tuple(handler, recoveryTarget);
} }
} }
} }

View File

@ -99,6 +99,10 @@ public class RemoteRecoveryTargetHandler implements RecoveryTargetHandler {
this.retriesSupported = targetNode.getVersion().onOrAfter(Version.V_7_9_0); this.retriesSupported = targetNode.getVersion().onOrAfter(Version.V_7_9_0);
} }
public DiscoveryNode targetNode() {
return targetNode;
}
@Override @Override
public void prepareForTranslogOperations(int totalTranslogOps, ActionListener<Void> listener) { public void prepareForTranslogOperations(int totalTranslogOps, ActionListener<Void> listener) {
final String action = PeerRecoveryTargetService.Actions.PREPARE_TRANSLOG; final String action = PeerRecoveryTargetService.Actions.PREPARE_TRANSLOG;

View File

@ -19,6 +19,7 @@
package org.elasticsearch.indices.recovery; package org.elasticsearch.indices.recovery;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.ClusterSettings;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.seqno.SequenceNumbers; import org.elasticsearch.index.seqno.SequenceNumbers;
@ -32,13 +33,16 @@ import java.io.IOException;
import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.containsString;
import static org.mockito.Mockito.mock; import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
public class PeerRecoverySourceServiceTests extends IndexShardTestCase { public class PeerRecoverySourceServiceTests extends IndexShardTestCase {
public void testDuplicateRecoveries() throws IOException { public void testDuplicateRecoveries() throws IOException {
IndexShard primary = newStartedShard(true); IndexShard primary = newStartedShard(true);
final IndicesService indicesService = mock(IndicesService.class);
when(indicesService.clusterService()).thenReturn(mock(ClusterService.class));
PeerRecoverySourceService peerRecoverySourceService = new PeerRecoverySourceService( PeerRecoverySourceService peerRecoverySourceService = new PeerRecoverySourceService(
mock(TransportService.class), mock(IndicesService.class), mock(TransportService.class), indicesService,
new RecoverySettings(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS))); new RecoverySettings(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)));
StartRecoveryRequest startRecoveryRequest = new StartRecoveryRequest(primary.shardId(), randomAlphaOfLength(10), StartRecoveryRequest startRecoveryRequest = new StartRecoveryRequest(primary.shardId(), randomAlphaOfLength(10),
getFakeDiscoNode("source"), getFakeDiscoNode("target"), Store.MetadataSnapshot.EMPTY, randomBoolean(), randomLong(), getFakeDiscoNode("source"), getFakeDiscoNode("target"), Store.MetadataSnapshot.EMPTY, randomBoolean(), randomLong(),