Free shard resources when recovery reset is cancelled

Resetting a recovery consists of resetting the old recovery target and replacing it by a new recovery target object. This is done on the Cancellable threads of
the new recovery target. If the new recovery target is already cancelled before or while this happens, for example due to shard closing or recovery source
changing, we have to make sure that the old recovery target object frees all shard resources.

Relates to #22325
This commit is contained in:
Yannick Welsch 2016-12-29 16:51:37 +01:00
parent 3999e5ba6b
commit 816e1c6cc4
2 changed files with 20 additions and 5 deletions

View File

@ -26,6 +26,7 @@ import org.elasticsearch.ElasticsearchTimeoutException;
import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.util.Callback; import org.elasticsearch.common.util.Callback;
import org.elasticsearch.common.util.CancellableThreads;
import org.elasticsearch.common.util.concurrent.AbstractRunnable; import org.elasticsearch.common.util.concurrent.AbstractRunnable;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections; import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.index.shard.IndexShard;
@ -106,9 +107,18 @@ public class RecoveriesCollection {
} }
// Closes the current recovery target // Closes the current recovery target
final RecoveryTarget finalOldRecoveryTarget = oldRecoveryTarget;
final AtomicBoolean successfulReset = new AtomicBoolean(); final AtomicBoolean successfulReset = new AtomicBoolean();
try {
final RecoveryTarget finalOldRecoveryTarget = oldRecoveryTarget;
newRecoveryTarget.CancellableThreads().executeIO(() -> successfulReset.set(finalOldRecoveryTarget.resetRecovery())); newRecoveryTarget.CancellableThreads().executeIO(() -> successfulReset.set(finalOldRecoveryTarget.resetRecovery()));
} catch (CancellableThreads.ExecutionCancelledException e) {
// new recovery target is already cancelled (probably due to shard closing or recovery source changing)
assert onGoingRecoveries.containsKey(newRecoveryTarget.recoveryId()) == false;
logger.trace("{} recovery reset cancelled, recovery from {}, id [{}], previous id [{}]", newRecoveryTarget.shardId(),
newRecoveryTarget.sourceNode(), newRecoveryTarget.recoveryId(), oldRecoveryTarget.recoveryId());
oldRecoveryTarget.cancel("recovery reset cancelled"); // if finalOldRecoveryTarget.resetRecovery did not even get to execute
return null;
}
if (successfulReset.get() == false) { if (successfulReset.get() == false) {
cancelRecovery(newRecoveryTarget.recoveryId(), "failed to reset recovery"); cancelRecovery(newRecoveryTarget.recoveryId(), "failed to reset recovery");
return null; return null;

View File

@ -184,9 +184,14 @@ public class RecoveryTarget extends AbstractRefCounted implements RecoveryTarget
*/ */
boolean resetRecovery() throws InterruptedException, IOException { boolean resetRecovery() throws InterruptedException, IOException {
if (finished.compareAndSet(false, true)) { if (finished.compareAndSet(false, true)) {
try {
// yes, this is just a logger call in a try-finally block. The reason for this is that resetRecovery is called from
// CancellableThreads and we have to make sure that all references to IndexShard are cleaned up before exiting this method
logger.debug("reset of recovery with shard {} and id [{}]", shardId, recoveryId); logger.debug("reset of recovery with shard {} and id [{}]", shardId, recoveryId);
// release the initial reference. recovery files will be cleaned as soon as ref count goes to zero, potentially now } finally {
// release the initial reference. recovery files will be cleaned as soon as ref count goes to zero, potentially now.
decRef(); decRef();
}
closedLatch.await(); closedLatch.await();
RecoveryState.Stage stage = indexShard.recoveryState().getStage(); RecoveryState.Stage stage = indexShard.recoveryState().getStage();
if (indexShard.recoveryState().getPrimary() && (stage == RecoveryState.Stage.FINALIZE || stage == RecoveryState.Stage.DONE)) { if (indexShard.recoveryState().getPrimary() && (stage == RecoveryState.Stage.FINALIZE || stage == RecoveryState.Stage.DONE)) {