Similar to #47507. We are throwing `SnapshotException` when you (and SLM tests) would expect a `SnapshotMissingException` for concurrent snapshot status and snapshot delete operations with a very low probability. Fixed the exception type and added a test for this scenario.
This commit is contained in:
parent
f4c884450f
commit
96b36b5a8c
|
@ -1337,6 +1337,8 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
|
|||
private BlobStoreIndexShardSnapshot loadShardSnapshot(BlobContainer shardContainer, SnapshotId snapshotId) {
|
||||
try {
|
||||
return indexShardSnapshotFormat.read(shardContainer, snapshotId.getUUID());
|
||||
} catch (NoSuchFileException ex) {
|
||||
throw new SnapshotMissingException(metadata.name(), snapshotId, ex);
|
||||
} catch (IOException ex) {
|
||||
throw new SnapshotException(metadata.name(), snapshotId,
|
||||
"failed to read shard snapshot file for [" + shardContainer.path() + ']', ex);
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
*/
|
||||
package org.elasticsearch.snapshots;
|
||||
|
||||
import org.apache.lucene.util.SetOnce;
|
||||
import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
|
||||
import org.elasticsearch.cluster.SnapshotsInProgress;
|
||||
import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider;
|
||||
|
@ -25,9 +26,12 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.common.unit.TimeValue;
|
||||
import org.elasticsearch.plugins.Plugin;
|
||||
import org.elasticsearch.repositories.RepositoriesService;
|
||||
import org.elasticsearch.repositories.Repository;
|
||||
import org.elasticsearch.repositories.RepositoryData;
|
||||
import org.elasticsearch.repositories.blobstore.BlobStoreTestUtil;
|
||||
import org.elasticsearch.snapshots.mockstore.MockRepository;
|
||||
import org.elasticsearch.test.ESIntegTestCase;
|
||||
import org.elasticsearch.threadpool.ThreadPool;
|
||||
import org.junit.After;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -40,6 +44,7 @@ import java.util.ArrayList;
|
|||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
|
@ -88,6 +93,19 @@ public abstract class AbstractSnapshotIntegTestCase extends ESIntegTestCase {
|
|||
skipRepoConsistencyCheckReason = reason;
|
||||
}
|
||||
|
||||
protected RepositoryData getRepositoryData(Repository repository) throws InterruptedException {
|
||||
ThreadPool threadPool = internalCluster().getInstance(ThreadPool.class, internalCluster().getMasterName());
|
||||
final SetOnce<RepositoryData> repositoryData = new SetOnce<>();
|
||||
final CountDownLatch latch = new CountDownLatch(1);
|
||||
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(() -> {
|
||||
repositoryData.set(repository.getRepositoryData());
|
||||
latch.countDown();
|
||||
});
|
||||
|
||||
latch.await();
|
||||
return repositoryData.get();
|
||||
}
|
||||
|
||||
public static long getFailureCount(String repository) {
|
||||
long failureCount = 0;
|
||||
for (RepositoriesService repositoriesService :
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
|
||||
package org.elasticsearch.snapshots;
|
||||
|
||||
import org.apache.lucene.util.SetOnce;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.ExceptionsHelper;
|
||||
import org.elasticsearch.Version;
|
||||
|
@ -91,7 +90,6 @@ import org.elasticsearch.rest.RestStatus;
|
|||
import org.elasticsearch.script.MockScriptEngine;
|
||||
import org.elasticsearch.script.StoredScriptsIT;
|
||||
import org.elasticsearch.snapshots.mockstore.MockRepository;
|
||||
import org.elasticsearch.threadpool.ThreadPool;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.channels.SeekableByteChannel;
|
||||
|
@ -106,7 +104,6 @@ import java.util.HashMap;
|
|||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.function.Consumer;
|
||||
|
@ -3657,19 +3654,6 @@ public class SharedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCas
|
|||
}
|
||||
}
|
||||
|
||||
private RepositoryData getRepositoryData(Repository repository) throws InterruptedException {
|
||||
ThreadPool threadPool = internalCluster().getInstance(ThreadPool.class, internalCluster().getMasterName());
|
||||
final SetOnce<RepositoryData> repositoryData = new SetOnce<>();
|
||||
final CountDownLatch latch = new CountDownLatch(1);
|
||||
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(() -> {
|
||||
repositoryData.set(repository.getRepositoryData());
|
||||
latch.countDown();
|
||||
});
|
||||
|
||||
latch.await();
|
||||
return repositoryData.get();
|
||||
}
|
||||
|
||||
private void verifySnapshotInfo(final GetSnapshotsResponse response, final Map<String, List<String>> indicesPerSnapshot) {
|
||||
for (SnapshotInfo snapshotInfo : response.getSnapshots()) {
|
||||
final List<String> expected = snapshotInfo.indices();
|
||||
|
|
|
@ -29,6 +29,8 @@ import org.elasticsearch.cluster.SnapshotsInProgress;
|
|||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.unit.TimeValue;
|
||||
import org.elasticsearch.core.internal.io.IOUtils;
|
||||
import org.elasticsearch.repositories.RepositoriesService;
|
||||
import org.elasticsearch.repositories.Repository;
|
||||
import org.elasticsearch.repositories.blobstore.BlobStoreRepository;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -133,4 +135,36 @@ public class SnapshotStatusApisIT extends AbstractSnapshotIntegTestCase {
|
|||
expectThrows(SnapshotMissingException.class, () -> client().admin().cluster()
|
||||
.getSnapshots(new GetSnapshotsRequest("test-repo", new String[] {"test-snap"})).actionGet());
|
||||
}
|
||||
|
||||
public void testExceptionOnMissingShardLevelSnapBlob() throws IOException, InterruptedException {
|
||||
disableRepoConsistencyCheck("This test intentionally corrupts the repository");
|
||||
|
||||
logger.info("--> creating repository");
|
||||
final Path repoPath = randomRepoPath();
|
||||
assertAcked(client().admin().cluster().preparePutRepository("test-repo").setType("fs").setSettings(
|
||||
Settings.builder().put("location", repoPath).build()));
|
||||
|
||||
createIndex("test-idx-1");
|
||||
ensureGreen();
|
||||
|
||||
logger.info("--> indexing some data");
|
||||
for (int i = 0; i < 100; i++) {
|
||||
index("test-idx-1", "_doc", Integer.toString(i), "foo", "bar" + i);
|
||||
}
|
||||
refresh();
|
||||
|
||||
logger.info("--> snapshot");
|
||||
final CreateSnapshotResponse response =
|
||||
client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(true).get();
|
||||
|
||||
logger.info("--> delete shard-level snap-${uuid}.dat file for one shard in this snapshot to simulate concurrent delete");
|
||||
final RepositoriesService service = internalCluster().getMasterNodeInstance(RepositoriesService.class);
|
||||
final Repository repository = service.repository("test-repo");
|
||||
final String indexRepoId = getRepositoryData(repository).resolveIndexId(response.getSnapshotInfo().indices().get(0)).getId();
|
||||
IOUtils.rm(repoPath.resolve("indices").resolve(indexRepoId).resolve("0").resolve(
|
||||
BlobStoreRepository.SNAPSHOT_PREFIX + response.getSnapshotInfo().snapshotId().getUUID() + ".dat"));
|
||||
|
||||
expectThrows(SnapshotMissingException.class, () -> client().admin().cluster()
|
||||
.prepareSnapshotStatus("test-repo").setSnapshots("test-snap").execute().actionGet());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue