Do not fail snapshot when deleting a missing snapshotted file (#30332)

When deleting or creating a snapshot for a given shard, elasticsearch 
usually starts by listing all the existing snapshotted files in the repository. 
Then it computes a diff and deletes the snapshotted files that are not 
needed anymore. During this deletion, an exception is thrown if the file 
to be deleted does not exist anymore.

This behavior is challenging with cloud based repository implementations 
like S3 where a file that has been deleted can still appear in the bucket for 
few seconds/minutes (because the deletion can take some time to be fully 
replicated on S3). If the deleted file appears in the listing of files, then the 
following deletion will fail with a NoSuchFileException and the snapshot 
will be partially created/deleted.

This pull request makes the deletion of these files a bit less strict, ie not 
failing if the file we want to delete does not exist anymore. It introduces a 
new BlobContainer.deleteIgnoringIfNotExists() method that can be used 
at some specific places where not failing when deleting a file is 
considered harmless.

Closes #28322
This commit is contained in:
Tanguy Leroux 2018-05-07 09:35:55 +02:00 committed by Colin Goodheart-Smithe
parent eb51318b0f
commit a836e79610
No known key found for this signature in database
GPG Key ID: F975E7BDD739B3C7
6 changed files with 79 additions and 53 deletions

View File

@ -117,6 +117,7 @@ Fail snapshot operations early when creating or deleting a snapshot on a reposit
written to by an older Elasticsearch after writing to it with a newer Elasticsearch version. ({pull}30140[#30140]) written to by an older Elasticsearch after writing to it with a newer Elasticsearch version. ({pull}30140[#30140])
Fix NPE when CumulativeSum agg encounters null value/empty bucket ({pull}29641[#29641]) Fix NPE when CumulativeSum agg encounters null value/empty bucket ({pull}29641[#29641])
Do not fail snapshot when deleting a missing snapshotted file ({pull}30332[#30332])
//[float] //[float]
//=== Regressions //=== Regressions

View File

@ -75,7 +75,8 @@ public interface BlobContainer {
void writeBlob(String blobName, InputStream inputStream, long blobSize) throws IOException; void writeBlob(String blobName, InputStream inputStream, long blobSize) throws IOException;
/** /**
* Deletes a blob with giving name, if the blob exists. If the blob does not exist, this method throws an IOException. * Deletes a blob with giving name, if the blob exists. If the blob does not exist,
* this method throws a NoSuchFileException.
* *
* @param blobName * @param blobName
* The name of the blob to delete. * The name of the blob to delete.
@ -84,6 +85,21 @@ public interface BlobContainer {
*/ */
void deleteBlob(String blobName) throws IOException; void deleteBlob(String blobName) throws IOException;
/**
* Deletes a blob with giving name, ignoring if the blob does not exist.
*
* @param blobName
* The name of the blob to delete.
* @throws IOException if the blob exists but could not be deleted.
*/
default void deleteBlobIgnoringIfNotExists(String blobName) throws IOException {
try {
deleteBlob(blobName);
} catch (final NoSuchFileException ignored) {
// This exception is ignored
}
}
/** /**
* Lists all blobs in the container. * Lists all blobs in the container.
* *

View File

@ -102,13 +102,6 @@ public class BlobStoreIndexShardSnapshots implements Iterable<SnapshotFiles>, To
this.physicalFiles = unmodifiableMap(mapBuilder); this.physicalFiles = unmodifiableMap(mapBuilder);
} }
private BlobStoreIndexShardSnapshots() {
shardSnapshots = Collections.emptyList();
files = Collections.emptyMap();
physicalFiles = Collections.emptyMap();
}
/** /**
* Returns list of snapshots * Returns list of snapshots
* *

View File

@ -90,7 +90,6 @@ public abstract class BlobStoreFormat<T extends ToXContent> {
return readBlob(blobContainer, blobName); return readBlob(blobContainer, blobName);
} }
/** /**
* Deletes obj in the blob container * Deletes obj in the blob container
*/ */

View File

@ -120,6 +120,7 @@ import java.util.stream.Collectors;
import static java.util.Collections.emptyMap; import static java.util.Collections.emptyMap;
import static java.util.Collections.unmodifiableMap; import static java.util.Collections.unmodifiableMap;
import static org.elasticsearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot.FileInfo.canonicalName;
/** /**
* BlobStore - based implementation of Snapshot Repository * BlobStore - based implementation of Snapshot Repository
@ -780,7 +781,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
} catch (IOException ex) { } catch (IOException ex) {
// temporary blob creation or move failed - try cleaning up // temporary blob creation or move failed - try cleaning up
try { try {
snapshotsBlobContainer.deleteBlob(tempBlobName); snapshotsBlobContainer.deleteBlobIgnoringIfNotExists(tempBlobName);
} catch (IOException e) { } catch (IOException e) {
ex.addSuppressed(e); ex.addSuppressed(e);
} }
@ -915,13 +916,13 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
} }
} }
// finalize the snapshot and rewrite the snapshot index with the next sequential snapshot index // finalize the snapshot and rewrite the snapshot index with the next sequential snapshot index
finalize(newSnapshotsList, fileListGeneration + 1, blobs); finalize(newSnapshotsList, fileListGeneration + 1, blobs, "snapshot deletion [" + snapshotId + "]");
} }
/** /**
* Loads information about shard snapshot * Loads information about shard snapshot
*/ */
public BlobStoreIndexShardSnapshot loadSnapshot() { BlobStoreIndexShardSnapshot loadSnapshot() {
try { try {
return indexShardSnapshotFormat(version).read(blobContainer, snapshotId.getUUID()); return indexShardSnapshotFormat(version).read(blobContainer, snapshotId.getUUID());
} catch (IOException ex) { } catch (IOException ex) {
@ -930,54 +931,57 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
} }
/** /**
* Removes all unreferenced files from the repository and writes new index file * Writes a new index file for the shard and removes all unreferenced files from the repository.
*
* We need to be really careful in handling index files in case of failures to make sure we have index file that
* points to files that were deleted.
* *
* We need to be really careful in handling index files in case of failures to make sure we don't
* have index file that points to files that were deleted.
* *
* @param snapshots list of active snapshots in the container * @param snapshots list of active snapshots in the container
* @param fileListGeneration the generation number of the snapshot index file * @param fileListGeneration the generation number of the snapshot index file
* @param blobs list of blobs in the container * @param blobs list of blobs in the container
* @param reason a reason explaining why the shard index file is written
*/ */
protected void finalize(List<SnapshotFiles> snapshots, int fileListGeneration, Map<String, BlobMetaData> blobs) { protected void finalize(final List<SnapshotFiles> snapshots,
BlobStoreIndexShardSnapshots newSnapshots = new BlobStoreIndexShardSnapshots(snapshots); final int fileListGeneration,
// delete old index files first final Map<String, BlobMetaData> blobs,
for (String blobName : blobs.keySet()) { final String reason) {
final String indexGeneration = Integer.toString(fileListGeneration);
final String currentIndexGen = indexShardSnapshotsFormat.blobName(indexGeneration);
final BlobStoreIndexShardSnapshots updatedSnapshots = new BlobStoreIndexShardSnapshots(snapshots);
try {
// If we deleted all snapshots, we don't need to create a new index file
if (snapshots.size() > 0) {
indexShardSnapshotsFormat.writeAtomic(updatedSnapshots, blobContainer, indexGeneration);
}
// Delete old index files
for (final String blobName : blobs.keySet()) {
if (indexShardSnapshotsFormat.isTempBlobName(blobName) || blobName.startsWith(SNAPSHOT_INDEX_PREFIX)) { if (indexShardSnapshotsFormat.isTempBlobName(blobName) || blobName.startsWith(SNAPSHOT_INDEX_PREFIX)) {
try { try {
blobContainer.deleteBlob(blobName); blobContainer.deleteBlobIgnoringIfNotExists(blobName);
} catch (IOException e) { } catch (IOException e) {
// We cannot delete index file - this is fatal, we cannot continue, otherwise we might end up logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to delete index blob [{}] during finalization",
// with references to non-existing files snapshotId, shardId, blobName), e);
throw new IndexShardSnapshotFailedException(shardId, "error deleting index file [" throw e;
+ blobName + "] during cleanup", e);
} }
} }
} }
// now go over all the blobs, and if they don't exist in a snapshot, delete them // Delete all blobs that don't exist in a snapshot
for (String blobName : blobs.keySet()) { for (final String blobName : blobs.keySet()) {
// delete unused files if (blobName.startsWith(DATA_BLOB_PREFIX) && (updatedSnapshots.findNameFile(canonicalName(blobName)) == null)) {
if (blobName.startsWith(DATA_BLOB_PREFIX)) {
if (newSnapshots.findNameFile(BlobStoreIndexShardSnapshot.FileInfo.canonicalName(blobName)) == null) {
try { try {
blobContainer.deleteBlob(blobName); blobContainer.deleteBlobIgnoringIfNotExists(blobName);
} catch (IOException e) { } catch (IOException e) {
// TODO: don't catch and let the user handle it? logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to delete data blob [{}] during finalization",
logger.debug(() -> new ParameterizedMessage("[{}] [{}] error deleting blob [{}] during cleanup", snapshotId, shardId, blobName), e); snapshotId, shardId, blobName), e);
} }
} }
} }
}
// If we deleted all snapshots - we don't need to create the index file
if (snapshots.size() > 0) {
try {
indexShardSnapshotsFormat.writeAtomic(newSnapshots, blobContainer, Integer.toString(fileListGeneration));
} catch (IOException e) { } catch (IOException e) {
throw new IndexShardSnapshotFailedException(shardId, "Failed to write file list", e); String message = "Failed to finalize " + reason + " with shard index [" + currentIndexGen + "]";
} throw new IndexShardSnapshotFailedException(shardId, message, e);
} }
} }
@ -1003,7 +1007,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
if (!name.startsWith(DATA_BLOB_PREFIX)) { if (!name.startsWith(DATA_BLOB_PREFIX)) {
continue; continue;
} }
name = BlobStoreIndexShardSnapshot.FileInfo.canonicalName(name); name = canonicalName(name);
try { try {
long currentGen = Long.parseLong(name.substring(DATA_BLOB_PREFIX.length()), Character.MAX_RADIX); long currentGen = Long.parseLong(name.substring(DATA_BLOB_PREFIX.length()), Character.MAX_RADIX);
if (currentGen > generation) { if (currentGen > generation) {
@ -1217,7 +1221,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
newSnapshotsList.add(point); newSnapshotsList.add(point);
} }
// finalize the snapshot and rewrite the snapshot index with the next sequential snapshot index // finalize the snapshot and rewrite the snapshot index with the next sequential snapshot index
finalize(newSnapshotsList, fileListGeneration + 1, blobs); finalize(newSnapshotsList, fileListGeneration + 1, blobs, "snapshot creation [" + snapshotId + "]");
snapshotStatus.moveToDone(System.currentTimeMillis()); snapshotStatus.moveToDone(System.currentTimeMillis());
} }

View File

@ -29,13 +29,14 @@ import org.elasticsearch.test.ESTestCase;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.nio.file.NoSuchFileException;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import static org.elasticsearch.repositories.ESBlobStoreTestCase.writeRandomBlob;
import static org.elasticsearch.repositories.ESBlobStoreTestCase.randomBytes; import static org.elasticsearch.repositories.ESBlobStoreTestCase.randomBytes;
import static org.elasticsearch.repositories.ESBlobStoreTestCase.readBlobFully; import static org.elasticsearch.repositories.ESBlobStoreTestCase.readBlobFully;
import static org.elasticsearch.repositories.ESBlobStoreTestCase.writeRandomBlob;
import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.notNullValue; import static org.hamcrest.CoreMatchers.notNullValue;
@ -116,7 +117,7 @@ public abstract class ESBlobStoreContainerTestCase extends ESTestCase {
try (BlobStore store = newBlobStore()) { try (BlobStore store = newBlobStore()) {
final String blobName = "foobar"; final String blobName = "foobar";
final BlobContainer container = store.blobContainer(new BlobPath()); final BlobContainer container = store.blobContainer(new BlobPath());
expectThrows(IOException.class, () -> container.deleteBlob(blobName)); expectThrows(NoSuchFileException.class, () -> container.deleteBlob(blobName));
byte[] data = randomBytes(randomIntBetween(10, scaledRandomIntBetween(1024, 1 << 16))); byte[] data = randomBytes(randomIntBetween(10, scaledRandomIntBetween(1024, 1 << 16)));
final BytesArray bytesArray = new BytesArray(data); final BytesArray bytesArray = new BytesArray(data);
@ -124,7 +125,19 @@ public abstract class ESBlobStoreContainerTestCase extends ESTestCase {
container.deleteBlob(blobName); // should not raise container.deleteBlob(blobName); // should not raise
// blob deleted, so should raise again // blob deleted, so should raise again
expectThrows(IOException.class, () -> container.deleteBlob(blobName)); expectThrows(NoSuchFileException.class, () -> container.deleteBlob(blobName));
}
}
public void testDeleteBlobIgnoringIfNotExists() throws IOException {
try (BlobStore store = newBlobStore()) {
BlobPath blobPath = new BlobPath();
if (randomBoolean()) {
blobPath = blobPath.add(randomAlphaOfLengthBetween(1, 10));
}
final BlobContainer container = store.blobContainer(blobPath);
container.deleteBlobIgnoringIfNotExists("does_not_exist");
} }
} }