Never release store using CancellableThreads (#45409)
Today we can release a Store using CancellableThreads. If we are holding the last reference, then we will verify the node lock before deleting the store. Checking node lock performs some I/O on FileChannel. If the current thread is interrupted, then the channel will be closed and the node lock will also be invalid. Closes #45237
This commit is contained in:
parent
9b180314e3
commit
3029887451
|
@ -112,6 +112,11 @@ public class PeerRecoverySourceService implements IndexEventListener {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// exposed for testing
|
||||||
|
final int numberOfOngoingRecoveries() {
|
||||||
|
return ongoingRecoveries.ongoingRecoveries.size();
|
||||||
|
}
|
||||||
|
|
||||||
final class OngoingRecoveries {
|
final class OngoingRecoveries {
|
||||||
private final Map<IndexShard, ShardRecoveryContext> ongoingRecoveries = new HashMap<>();
|
private final Map<IndexShard, ShardRecoveryContext> ongoingRecoveries = new HashMap<>();
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,9 @@ import org.apache.lucene.util.SetOnce;
|
||||||
import org.elasticsearch.ExceptionsHelper;
|
import org.elasticsearch.ExceptionsHelper;
|
||||||
import org.elasticsearch.Version;
|
import org.elasticsearch.Version;
|
||||||
import org.elasticsearch.action.ActionListener;
|
import org.elasticsearch.action.ActionListener;
|
||||||
|
import org.elasticsearch.action.ActionRunnable;
|
||||||
import org.elasticsearch.action.StepListener;
|
import org.elasticsearch.action.StepListener;
|
||||||
|
import org.elasticsearch.action.support.PlainActionFuture;
|
||||||
import org.elasticsearch.action.support.ThreadedActionListener;
|
import org.elasticsearch.action.support.ThreadedActionListener;
|
||||||
import org.elasticsearch.action.support.replication.ReplicationResponse;
|
import org.elasticsearch.action.support.replication.ReplicationResponse;
|
||||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||||
|
@ -232,8 +234,7 @@ public class RecoverySourceHandler {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
final int estimateNumOps = shard.estimateNumberOfHistoryOperations("peer-recovery", startingSeqNo);
|
final int estimateNumOps = shard.estimateNumberOfHistoryOperations("peer-recovery", startingSeqNo);
|
||||||
shard.store().incRef();
|
final Releasable releaseStore = acquireStore(shard.store());
|
||||||
final Releasable releaseStore = Releasables.releaseOnce(shard.store()::decRef);
|
|
||||||
resources.add(releaseStore);
|
resources.add(releaseStore);
|
||||||
sendFileStep.whenComplete(r -> IOUtils.close(safeCommitRef, releaseStore), e -> {
|
sendFileStep.whenComplete(r -> IOUtils.close(safeCommitRef, releaseStore), e -> {
|
||||||
try {
|
try {
|
||||||
|
@ -396,6 +397,25 @@ public class RecoverySourceHandler {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Increases the store reference and returns a {@link Releasable} that will decrease the store reference using the generic thread pool.
|
||||||
|
* We must never release the store using an interruptible thread as we can risk invalidating the node lock.
|
||||||
|
*/
|
||||||
|
private Releasable acquireStore(Store store) {
|
||||||
|
store.incRef();
|
||||||
|
return Releasables.releaseOnce(() -> {
|
||||||
|
final PlainActionFuture<Void> future = new PlainActionFuture<>();
|
||||||
|
threadPool.generic().execute(new ActionRunnable<Void>(future) {
|
||||||
|
@Override
|
||||||
|
protected void doRun() {
|
||||||
|
store.decRef();
|
||||||
|
listener.onResponse(null);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
FutureUtils.get(future);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
static final class SendFileResult {
|
static final class SendFileResult {
|
||||||
final List<String> phase1FileNames;
|
final List<String> phase1FileNames;
|
||||||
final List<Long> phase1FileSizes;
|
final List<Long> phase1FileSizes;
|
||||||
|
|
|
@ -36,6 +36,7 @@ import org.elasticsearch.action.admin.indices.stats.ShardStats;
|
||||||
import org.elasticsearch.action.index.IndexRequestBuilder;
|
import org.elasticsearch.action.index.IndexRequestBuilder;
|
||||||
import org.elasticsearch.action.index.IndexResponse;
|
import org.elasticsearch.action.index.IndexResponse;
|
||||||
import org.elasticsearch.action.search.SearchResponse;
|
import org.elasticsearch.action.search.SearchResponse;
|
||||||
|
import org.elasticsearch.action.support.ActiveShardCount;
|
||||||
import org.elasticsearch.action.support.PlainActionFuture;
|
import org.elasticsearch.action.support.PlainActionFuture;
|
||||||
import org.elasticsearch.action.support.WriteRequest.RefreshPolicy;
|
import org.elasticsearch.action.support.WriteRequest.RefreshPolicy;
|
||||||
import org.elasticsearch.action.support.replication.ReplicationResponse;
|
import org.elasticsearch.action.support.replication.ReplicationResponse;
|
||||||
|
@ -1488,4 +1489,20 @@ public class IndexRecoveryIT extends ESIntegTestCase {
|
||||||
}
|
}
|
||||||
ensureGreen(indexName);
|
ensureGreen(indexName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testCancelRecoveryWithAutoExpandReplicas() throws Exception {
|
||||||
|
internalCluster().startMasterOnlyNode();
|
||||||
|
assertAcked(client().admin().indices().prepareCreate("test")
|
||||||
|
.setSettings(Settings.builder().put(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, "0-all"))
|
||||||
|
.setWaitForActiveShards(ActiveShardCount.NONE));
|
||||||
|
internalCluster().startNode();
|
||||||
|
internalCluster().startNode();
|
||||||
|
client().admin().cluster().prepareReroute().setRetryFailed(true).get();
|
||||||
|
assertAcked(client().admin().indices().prepareDelete("test")); // cancel recoveries
|
||||||
|
assertBusy(() -> {
|
||||||
|
for (PeerRecoverySourceService recoveryService : internalCluster().getDataNodeInstances(PeerRecoverySourceService.class)) {
|
||||||
|
assertThat(recoveryService.numberOfOngoingRecoveries(), equalTo(0));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue