Fixes shard level snapshot metadata loading when index-N file is missing (#21813)
In making changes for the 5.0 version of snapshots, a bug was introduced where if an index-N file could not be found for an individual shard, the backup was to iterate over all snap-*.dat files in the shard folder to know which snapshots contain that shard's data, but in 5.0, reading the snap-*.dat files as backup was incorrectly passing in the blob name for the snap-*.dat file, thereby failing to load all index files for a given snapshot when the index-N file is missing. This condition should be rare as there is no reason an index-N file should be absent (unless it was deleted or there was corruption reading the file), but nevertheless, this situation can be encountered and this commit fixes the bug by reading the correct snap-*.dat blob name in the shard data folder.
This commit is contained in:
parent
b7292a6005
commit
db7362da67
|
@ -1137,7 +1137,8 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
|
|||
*/
|
||||
protected Tuple<BlobStoreIndexShardSnapshots, Integer> buildBlobStoreIndexShardSnapshots(Map<String, BlobMetaData> blobs) {
|
||||
int latest = -1;
|
||||
for (String name : blobs.keySet()) {
|
||||
Set<String> blobKeys = blobs.keySet();
|
||||
for (String name : blobKeys) {
|
||||
if (name.startsWith(SNAPSHOT_INDEX_PREFIX)) {
|
||||
try {
|
||||
int gen = Integer.parseInt(name.substring(SNAPSHOT_INDEX_PREFIX.length()));
|
||||
|
@ -1158,15 +1159,17 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
|
|||
final String file = SNAPSHOT_INDEX_PREFIX + latest;
|
||||
logger.warn((Supplier<?>) () -> new ParameterizedMessage("failed to read index file [{}]", file), e);
|
||||
}
|
||||
} else if (blobKeys.isEmpty() == false) {
|
||||
logger.debug("Could not find a readable index-N file in a non-empty shard snapshot directory [{}]", blobContainer.path());
|
||||
}
|
||||
|
||||
// We couldn't load the index file - falling back to loading individual snapshots
|
||||
List<SnapshotFiles> snapshots = new ArrayList<>();
|
||||
for (String name : blobs.keySet()) {
|
||||
for (String name : blobKeys) {
|
||||
try {
|
||||
BlobStoreIndexShardSnapshot snapshot = null;
|
||||
if (name.startsWith(SNAPSHOT_PREFIX)) {
|
||||
snapshot = indexShardSnapshotFormat.readBlob(blobContainer, snapshotId.getUUID());
|
||||
snapshot = indexShardSnapshotFormat.readBlob(blobContainer, name);
|
||||
} else if (name.startsWith(LEGACY_SNAPSHOT_PREFIX)) {
|
||||
snapshot = indexShardSnapshotLegacyFormat.readBlob(blobContainer, name);
|
||||
}
|
||||
|
|
|
@ -69,7 +69,6 @@ import org.elasticsearch.index.IndexService;
|
|||
import org.elasticsearch.index.shard.ShardId;
|
||||
import org.elasticsearch.indices.IndicesService;
|
||||
import org.elasticsearch.indices.InvalidIndexNameException;
|
||||
import org.elasticsearch.indices.recovery.RecoverySettings;
|
||||
import org.elasticsearch.ingest.IngestTestPlugin;
|
||||
import org.elasticsearch.plugins.Plugin;
|
||||
import org.elasticsearch.repositories.IndexId;
|
||||
|
@ -95,6 +94,7 @@ import java.util.concurrent.CountDownLatch;
|
|||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
|
||||
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
|
||||
|
@ -1066,6 +1066,44 @@ public class SharedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCas
|
|||
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), equalTo(createSnapshotResponse.getSnapshotInfo().totalShards()));
|
||||
}
|
||||
|
||||
public void testSnapshotWithMissingShardLevelIndexFile() throws Exception {
|
||||
Path repo = randomRepoPath();
|
||||
logger.info("--> creating repository at {}", repo.toAbsolutePath());
|
||||
assertAcked(client().admin().cluster().preparePutRepository("test-repo").setType("fs").setSettings(
|
||||
Settings.builder().put("location", repo).put("compress", false)));
|
||||
|
||||
createIndex("test-idx-1", "test-idx-2");
|
||||
logger.info("--> indexing some data");
|
||||
indexRandom(true,
|
||||
client().prepareIndex("test-idx-1", "doc").setSource("foo", "bar"),
|
||||
client().prepareIndex("test-idx-2", "doc").setSource("foo", "bar"));
|
||||
|
||||
logger.info("--> creating snapshot");
|
||||
client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap-1")
|
||||
.setWaitForCompletion(true).setIndices("test-idx-*").get();
|
||||
|
||||
logger.info("--> deleting shard level index file");
|
||||
try (Stream<Path> files = Files.list(repo.resolve("indices"))) {
|
||||
files.forEach(indexPath ->
|
||||
IOUtils.deleteFilesIgnoringExceptions(indexPath.resolve("0").resolve("index-0"))
|
||||
);
|
||||
}
|
||||
|
||||
logger.info("--> creating another snapshot");
|
||||
CreateSnapshotResponse createSnapshotResponse =
|
||||
client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap-2")
|
||||
.setWaitForCompletion(true).setIndices("test-idx-1").get();
|
||||
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(0));
|
||||
assertEquals(createSnapshotResponse.getSnapshotInfo().successfulShards(), createSnapshotResponse.getSnapshotInfo().totalShards());
|
||||
|
||||
logger.info("--> restoring the first snapshot, the repository should not have lost any shard data despite deleting index-N, " +
|
||||
"because it should have iterated over the snap-*.data files as backup");
|
||||
client().admin().indices().prepareDelete("test-idx-1", "test-idx-2").get();
|
||||
RestoreSnapshotResponse restoreSnapshotResponse =
|
||||
client().admin().cluster().prepareRestoreSnapshot("test-repo", "test-snap-1").setWaitForCompletion(true).get();
|
||||
assertEquals(0, restoreSnapshotResponse.getRestoreInfo().failedShards());
|
||||
}
|
||||
|
||||
public void testSnapshotClosedIndex() throws Exception {
|
||||
Client client = client();
|
||||
|
||||
|
|
Loading…
Reference in New Issue