Make loadShardSnapshot Exceptions Consistent (#47728) (#47735)

Similar to #47507. We are throwing `SnapshotException` when
you (and SLM tests) would expect a `SnapshotMissingException`
for concurrent snapshot status and snapshot delete operations
with a very low probability.
Fixed the exception type and added a test for this scenario.
This commit is contained in:
Armin Braun 2019-10-08 21:04:51 +02:00 committed by GitHub
parent f4c884450f
commit 96b36b5a8c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 54 additions and 16 deletions

View File

@ -1337,6 +1337,8 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
private BlobStoreIndexShardSnapshot loadShardSnapshot(BlobContainer shardContainer, SnapshotId snapshotId) {
try {
return indexShardSnapshotFormat.read(shardContainer, snapshotId.getUUID());
} catch (NoSuchFileException ex) {
throw new SnapshotMissingException(metadata.name(), snapshotId, ex);
} catch (IOException ex) {
throw new SnapshotException(metadata.name(), snapshotId,
"failed to read shard snapshot file for [" + shardContainer.path() + ']', ex);

View File

@ -18,6 +18,7 @@
*/
package org.elasticsearch.snapshots;
import org.apache.lucene.util.SetOnce;
import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
import org.elasticsearch.cluster.SnapshotsInProgress;
import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider;
@ -25,9 +26,12 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.repositories.RepositoriesService;
import org.elasticsearch.repositories.Repository;
import org.elasticsearch.repositories.RepositoryData;
import org.elasticsearch.repositories.blobstore.BlobStoreTestUtil;
import org.elasticsearch.snapshots.mockstore.MockRepository;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.threadpool.ThreadPool;
import org.junit.After;
import java.io.IOException;
@ -40,6 +44,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
@ -88,6 +93,19 @@ public abstract class AbstractSnapshotIntegTestCase extends ESIntegTestCase {
skipRepoConsistencyCheckReason = reason;
}
protected RepositoryData getRepositoryData(Repository repository) throws InterruptedException {
ThreadPool threadPool = internalCluster().getInstance(ThreadPool.class, internalCluster().getMasterName());
final SetOnce<RepositoryData> repositoryData = new SetOnce<>();
final CountDownLatch latch = new CountDownLatch(1);
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(() -> {
repositoryData.set(repository.getRepositoryData());
latch.countDown();
});
latch.await();
return repositoryData.get();
}
public static long getFailureCount(String repository) {
long failureCount = 0;
for (RepositoriesService repositoriesService :

View File

@ -19,7 +19,6 @@
package org.elasticsearch.snapshots;
import org.apache.lucene.util.SetOnce;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.Version;
@ -91,7 +90,6 @@ import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.script.MockScriptEngine;
import org.elasticsearch.script.StoredScriptsIT;
import org.elasticsearch.snapshots.mockstore.MockRepository;
import org.elasticsearch.threadpool.ThreadPool;
import java.io.IOException;
import java.nio.channels.SeekableByteChannel;
@ -106,7 +104,6 @@ import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
@ -3657,19 +3654,6 @@ public class SharedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCas
}
}
private RepositoryData getRepositoryData(Repository repository) throws InterruptedException {
ThreadPool threadPool = internalCluster().getInstance(ThreadPool.class, internalCluster().getMasterName());
final SetOnce<RepositoryData> repositoryData = new SetOnce<>();
final CountDownLatch latch = new CountDownLatch(1);
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(() -> {
repositoryData.set(repository.getRepositoryData());
latch.countDown();
});
latch.await();
return repositoryData.get();
}
private void verifySnapshotInfo(final GetSnapshotsResponse response, final Map<String, List<String>> indicesPerSnapshot) {
for (SnapshotInfo snapshotInfo : response.getSnapshots()) {
final List<String> expected = snapshotInfo.indices();

View File

@ -29,6 +29,8 @@ import org.elasticsearch.cluster.SnapshotsInProgress;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.core.internal.io.IOUtils;
import org.elasticsearch.repositories.RepositoriesService;
import org.elasticsearch.repositories.Repository;
import org.elasticsearch.repositories.blobstore.BlobStoreRepository;
import java.io.IOException;
@ -133,4 +135,36 @@ public class SnapshotStatusApisIT extends AbstractSnapshotIntegTestCase {
expectThrows(SnapshotMissingException.class, () -> client().admin().cluster()
.getSnapshots(new GetSnapshotsRequest("test-repo", new String[] {"test-snap"})).actionGet());
}
public void testExceptionOnMissingShardLevelSnapBlob() throws IOException, InterruptedException {
disableRepoConsistencyCheck("This test intentionally corrupts the repository");
logger.info("--> creating repository");
final Path repoPath = randomRepoPath();
assertAcked(client().admin().cluster().preparePutRepository("test-repo").setType("fs").setSettings(
Settings.builder().put("location", repoPath).build()));
createIndex("test-idx-1");
ensureGreen();
logger.info("--> indexing some data");
for (int i = 0; i < 100; i++) {
index("test-idx-1", "_doc", Integer.toString(i), "foo", "bar" + i);
}
refresh();
logger.info("--> snapshot");
final CreateSnapshotResponse response =
client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(true).get();
logger.info("--> delete shard-level snap-${uuid}.dat file for one shard in this snapshot to simulate concurrent delete");
final RepositoriesService service = internalCluster().getMasterNodeInstance(RepositoriesService.class);
final Repository repository = service.repository("test-repo");
final String indexRepoId = getRepositoryData(repository).resolveIndexId(response.getSnapshotInfo().indices().get(0)).getId();
IOUtils.rm(repoPath.resolve("indices").resolve(indexRepoId).resolve("0").resolve(
BlobStoreRepository.SNAPSHOT_PREFIX + response.getSnapshotInfo().snapshotId().getUUID() + ".dat"));
expectThrows(SnapshotMissingException.class, () -> client().admin().cluster()
.prepareSnapshotStatus("test-repo").setSnapshots("test-snap").execute().actionGet());
}
}