Do not fail snapshot when deleting a missing snapshotted file (#30332)
When deleting or creating a snapshot for a given shard, elasticsearch usually starts by listing all the existing snapshotted files in the repository. Then it computes a diff and deletes the snapshotted files that are not needed anymore. During this deletion, an exception is thrown if the file to be deleted does not exist anymore. This behavior is challenging with cloud based repository implementations like S3 where a file that has been deleted can still appear in the bucket for few seconds/minutes (because the deletion can take some time to be fully replicated on S3). If the deleted file appears in the listing of files, then the following deletion will fail with a NoSuchFileException and the snapshot will be partially created/deleted. This pull request makes the deletion of these files a bit less strict, ie not failing if the file we want to delete does not exist anymore. It introduces a new BlobContainer.deleteIgnoringIfNotExists() method that can be used at some specific places where not failing when deleting a file is considered harmless. Closes #28322
This commit is contained in:
parent
eb51318b0f
commit
a836e79610
|
@ -117,6 +117,7 @@ Fail snapshot operations early when creating or deleting a snapshot on a reposit
|
||||||
written to by an older Elasticsearch after writing to it with a newer Elasticsearch version. ({pull}30140[#30140])
|
written to by an older Elasticsearch after writing to it with a newer Elasticsearch version. ({pull}30140[#30140])
|
||||||
|
|
||||||
Fix NPE when CumulativeSum agg encounters null value/empty bucket ({pull}29641[#29641])
|
Fix NPE when CumulativeSum agg encounters null value/empty bucket ({pull}29641[#29641])
|
||||||
|
Do not fail snapshot when deleting a missing snapshotted file ({pull}30332[#30332])
|
||||||
|
|
||||||
//[float]
|
//[float]
|
||||||
//=== Regressions
|
//=== Regressions
|
||||||
|
|
|
@ -75,7 +75,8 @@ public interface BlobContainer {
|
||||||
void writeBlob(String blobName, InputStream inputStream, long blobSize) throws IOException;
|
void writeBlob(String blobName, InputStream inputStream, long blobSize) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Deletes a blob with giving name, if the blob exists. If the blob does not exist, this method throws an IOException.
|
* Deletes a blob with giving name, if the blob exists. If the blob does not exist,
|
||||||
|
* this method throws a NoSuchFileException.
|
||||||
*
|
*
|
||||||
* @param blobName
|
* @param blobName
|
||||||
* The name of the blob to delete.
|
* The name of the blob to delete.
|
||||||
|
@ -84,6 +85,21 @@ public interface BlobContainer {
|
||||||
*/
|
*/
|
||||||
void deleteBlob(String blobName) throws IOException;
|
void deleteBlob(String blobName) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deletes a blob with giving name, ignoring if the blob does not exist.
|
||||||
|
*
|
||||||
|
* @param blobName
|
||||||
|
* The name of the blob to delete.
|
||||||
|
* @throws IOException if the blob exists but could not be deleted.
|
||||||
|
*/
|
||||||
|
default void deleteBlobIgnoringIfNotExists(String blobName) throws IOException {
|
||||||
|
try {
|
||||||
|
deleteBlob(blobName);
|
||||||
|
} catch (final NoSuchFileException ignored) {
|
||||||
|
// This exception is ignored
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lists all blobs in the container.
|
* Lists all blobs in the container.
|
||||||
*
|
*
|
||||||
|
|
|
@ -102,13 +102,6 @@ public class BlobStoreIndexShardSnapshots implements Iterable<SnapshotFiles>, To
|
||||||
this.physicalFiles = unmodifiableMap(mapBuilder);
|
this.physicalFiles = unmodifiableMap(mapBuilder);
|
||||||
}
|
}
|
||||||
|
|
||||||
private BlobStoreIndexShardSnapshots() {
|
|
||||||
shardSnapshots = Collections.emptyList();
|
|
||||||
files = Collections.emptyMap();
|
|
||||||
physicalFiles = Collections.emptyMap();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns list of snapshots
|
* Returns list of snapshots
|
||||||
*
|
*
|
||||||
|
|
|
@ -90,7 +90,6 @@ public abstract class BlobStoreFormat<T extends ToXContent> {
|
||||||
return readBlob(blobContainer, blobName);
|
return readBlob(blobContainer, blobName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Deletes obj in the blob container
|
* Deletes obj in the blob container
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -120,6 +120,7 @@ import java.util.stream.Collectors;
|
||||||
|
|
||||||
import static java.util.Collections.emptyMap;
|
import static java.util.Collections.emptyMap;
|
||||||
import static java.util.Collections.unmodifiableMap;
|
import static java.util.Collections.unmodifiableMap;
|
||||||
|
import static org.elasticsearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot.FileInfo.canonicalName;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* BlobStore - based implementation of Snapshot Repository
|
* BlobStore - based implementation of Snapshot Repository
|
||||||
|
@ -780,7 +781,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
// temporary blob creation or move failed - try cleaning up
|
// temporary blob creation or move failed - try cleaning up
|
||||||
try {
|
try {
|
||||||
snapshotsBlobContainer.deleteBlob(tempBlobName);
|
snapshotsBlobContainer.deleteBlobIgnoringIfNotExists(tempBlobName);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
ex.addSuppressed(e);
|
ex.addSuppressed(e);
|
||||||
}
|
}
|
||||||
|
@ -915,13 +916,13 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// finalize the snapshot and rewrite the snapshot index with the next sequential snapshot index
|
// finalize the snapshot and rewrite the snapshot index with the next sequential snapshot index
|
||||||
finalize(newSnapshotsList, fileListGeneration + 1, blobs);
|
finalize(newSnapshotsList, fileListGeneration + 1, blobs, "snapshot deletion [" + snapshotId + "]");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Loads information about shard snapshot
|
* Loads information about shard snapshot
|
||||||
*/
|
*/
|
||||||
public BlobStoreIndexShardSnapshot loadSnapshot() {
|
BlobStoreIndexShardSnapshot loadSnapshot() {
|
||||||
try {
|
try {
|
||||||
return indexShardSnapshotFormat(version).read(blobContainer, snapshotId.getUUID());
|
return indexShardSnapshotFormat(version).read(blobContainer, snapshotId.getUUID());
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
|
@ -930,54 +931,57 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Removes all unreferenced files from the repository and writes new index file
|
* Writes a new index file for the shard and removes all unreferenced files from the repository.
|
||||||
*
|
|
||||||
* We need to be really careful in handling index files in case of failures to make sure we have index file that
|
|
||||||
* points to files that were deleted.
|
|
||||||
*
|
*
|
||||||
|
* We need to be really careful in handling index files in case of failures to make sure we don't
|
||||||
|
* have index file that points to files that were deleted.
|
||||||
*
|
*
|
||||||
* @param snapshots list of active snapshots in the container
|
* @param snapshots list of active snapshots in the container
|
||||||
* @param fileListGeneration the generation number of the snapshot index file
|
* @param fileListGeneration the generation number of the snapshot index file
|
||||||
* @param blobs list of blobs in the container
|
* @param blobs list of blobs in the container
|
||||||
|
* @param reason a reason explaining why the shard index file is written
|
||||||
*/
|
*/
|
||||||
protected void finalize(List<SnapshotFiles> snapshots, int fileListGeneration, Map<String, BlobMetaData> blobs) {
|
protected void finalize(final List<SnapshotFiles> snapshots,
|
||||||
BlobStoreIndexShardSnapshots newSnapshots = new BlobStoreIndexShardSnapshots(snapshots);
|
final int fileListGeneration,
|
||||||
// delete old index files first
|
final Map<String, BlobMetaData> blobs,
|
||||||
for (String blobName : blobs.keySet()) {
|
final String reason) {
|
||||||
|
final String indexGeneration = Integer.toString(fileListGeneration);
|
||||||
|
final String currentIndexGen = indexShardSnapshotsFormat.blobName(indexGeneration);
|
||||||
|
|
||||||
|
final BlobStoreIndexShardSnapshots updatedSnapshots = new BlobStoreIndexShardSnapshots(snapshots);
|
||||||
|
try {
|
||||||
|
// If we deleted all snapshots, we don't need to create a new index file
|
||||||
|
if (snapshots.size() > 0) {
|
||||||
|
indexShardSnapshotsFormat.writeAtomic(updatedSnapshots, blobContainer, indexGeneration);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete old index files
|
||||||
|
for (final String blobName : blobs.keySet()) {
|
||||||
if (indexShardSnapshotsFormat.isTempBlobName(blobName) || blobName.startsWith(SNAPSHOT_INDEX_PREFIX)) {
|
if (indexShardSnapshotsFormat.isTempBlobName(blobName) || blobName.startsWith(SNAPSHOT_INDEX_PREFIX)) {
|
||||||
try {
|
try {
|
||||||
blobContainer.deleteBlob(blobName);
|
blobContainer.deleteBlobIgnoringIfNotExists(blobName);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// We cannot delete index file - this is fatal, we cannot continue, otherwise we might end up
|
logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to delete index blob [{}] during finalization",
|
||||||
// with references to non-existing files
|
snapshotId, shardId, blobName), e);
|
||||||
throw new IndexShardSnapshotFailedException(shardId, "error deleting index file ["
|
throw e;
|
||||||
+ blobName + "] during cleanup", e);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// now go over all the blobs, and if they don't exist in a snapshot, delete them
|
// Delete all blobs that don't exist in a snapshot
|
||||||
for (String blobName : blobs.keySet()) {
|
for (final String blobName : blobs.keySet()) {
|
||||||
// delete unused files
|
if (blobName.startsWith(DATA_BLOB_PREFIX) && (updatedSnapshots.findNameFile(canonicalName(blobName)) == null)) {
|
||||||
if (blobName.startsWith(DATA_BLOB_PREFIX)) {
|
|
||||||
if (newSnapshots.findNameFile(BlobStoreIndexShardSnapshot.FileInfo.canonicalName(blobName)) == null) {
|
|
||||||
try {
|
try {
|
||||||
blobContainer.deleteBlob(blobName);
|
blobContainer.deleteBlobIgnoringIfNotExists(blobName);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// TODO: don't catch and let the user handle it?
|
logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to delete data blob [{}] during finalization",
|
||||||
logger.debug(() -> new ParameterizedMessage("[{}] [{}] error deleting blob [{}] during cleanup", snapshotId, shardId, blobName), e);
|
snapshotId, shardId, blobName), e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// If we deleted all snapshots - we don't need to create the index file
|
|
||||||
if (snapshots.size() > 0) {
|
|
||||||
try {
|
|
||||||
indexShardSnapshotsFormat.writeAtomic(newSnapshots, blobContainer, Integer.toString(fileListGeneration));
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new IndexShardSnapshotFailedException(shardId, "Failed to write file list", e);
|
String message = "Failed to finalize " + reason + " with shard index [" + currentIndexGen + "]";
|
||||||
}
|
throw new IndexShardSnapshotFailedException(shardId, message, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1003,7 +1007,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
|
||||||
if (!name.startsWith(DATA_BLOB_PREFIX)) {
|
if (!name.startsWith(DATA_BLOB_PREFIX)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
name = BlobStoreIndexShardSnapshot.FileInfo.canonicalName(name);
|
name = canonicalName(name);
|
||||||
try {
|
try {
|
||||||
long currentGen = Long.parseLong(name.substring(DATA_BLOB_PREFIX.length()), Character.MAX_RADIX);
|
long currentGen = Long.parseLong(name.substring(DATA_BLOB_PREFIX.length()), Character.MAX_RADIX);
|
||||||
if (currentGen > generation) {
|
if (currentGen > generation) {
|
||||||
|
@ -1217,7 +1221,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
|
||||||
newSnapshotsList.add(point);
|
newSnapshotsList.add(point);
|
||||||
}
|
}
|
||||||
// finalize the snapshot and rewrite the snapshot index with the next sequential snapshot index
|
// finalize the snapshot and rewrite the snapshot index with the next sequential snapshot index
|
||||||
finalize(newSnapshotsList, fileListGeneration + 1, blobs);
|
finalize(newSnapshotsList, fileListGeneration + 1, blobs, "snapshot creation [" + snapshotId + "]");
|
||||||
snapshotStatus.moveToDone(System.currentTimeMillis());
|
snapshotStatus.moveToDone(System.currentTimeMillis());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,13 +29,14 @@ import org.elasticsearch.test.ESTestCase;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.nio.file.NoSuchFileException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import static org.elasticsearch.repositories.ESBlobStoreTestCase.writeRandomBlob;
|
|
||||||
import static org.elasticsearch.repositories.ESBlobStoreTestCase.randomBytes;
|
import static org.elasticsearch.repositories.ESBlobStoreTestCase.randomBytes;
|
||||||
import static org.elasticsearch.repositories.ESBlobStoreTestCase.readBlobFully;
|
import static org.elasticsearch.repositories.ESBlobStoreTestCase.readBlobFully;
|
||||||
|
import static org.elasticsearch.repositories.ESBlobStoreTestCase.writeRandomBlob;
|
||||||
import static org.hamcrest.CoreMatchers.equalTo;
|
import static org.hamcrest.CoreMatchers.equalTo;
|
||||||
import static org.hamcrest.CoreMatchers.notNullValue;
|
import static org.hamcrest.CoreMatchers.notNullValue;
|
||||||
|
|
||||||
|
@ -116,7 +117,7 @@ public abstract class ESBlobStoreContainerTestCase extends ESTestCase {
|
||||||
try (BlobStore store = newBlobStore()) {
|
try (BlobStore store = newBlobStore()) {
|
||||||
final String blobName = "foobar";
|
final String blobName = "foobar";
|
||||||
final BlobContainer container = store.blobContainer(new BlobPath());
|
final BlobContainer container = store.blobContainer(new BlobPath());
|
||||||
expectThrows(IOException.class, () -> container.deleteBlob(blobName));
|
expectThrows(NoSuchFileException.class, () -> container.deleteBlob(blobName));
|
||||||
|
|
||||||
byte[] data = randomBytes(randomIntBetween(10, scaledRandomIntBetween(1024, 1 << 16)));
|
byte[] data = randomBytes(randomIntBetween(10, scaledRandomIntBetween(1024, 1 << 16)));
|
||||||
final BytesArray bytesArray = new BytesArray(data);
|
final BytesArray bytesArray = new BytesArray(data);
|
||||||
|
@ -124,7 +125,19 @@ public abstract class ESBlobStoreContainerTestCase extends ESTestCase {
|
||||||
container.deleteBlob(blobName); // should not raise
|
container.deleteBlob(blobName); // should not raise
|
||||||
|
|
||||||
// blob deleted, so should raise again
|
// blob deleted, so should raise again
|
||||||
expectThrows(IOException.class, () -> container.deleteBlob(blobName));
|
expectThrows(NoSuchFileException.class, () -> container.deleteBlob(blobName));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDeleteBlobIgnoringIfNotExists() throws IOException {
|
||||||
|
try (BlobStore store = newBlobStore()) {
|
||||||
|
BlobPath blobPath = new BlobPath();
|
||||||
|
if (randomBoolean()) {
|
||||||
|
blobPath = blobPath.add(randomAlphaOfLengthBetween(1, 10));
|
||||||
|
}
|
||||||
|
|
||||||
|
final BlobContainer container = store.blobContainer(blobPath);
|
||||||
|
container.deleteBlobIgnoringIfNotExists("does_not_exist");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue