Fixes shard level snapshot metadata loading when index-N file is missing (#21813)

In making changes for the 5.0 version of snapshots, a bug was
introduced where if an index-N file could not be found for an
individual shard, the backup was to iterate over all snap-*.dat
files in the shard folder to know which snapshots contain that
shard's data, but in 5.0, reading the snap-*.dat files as backup
was incorrectly passing in the blob name for the snap-*.dat file,
thereby failing to load all index files for a given snapshot
when the index-N file is missing.  This condition should be rare
as there is no reason an index-N file should be absent (unless
it was deleted or there was corruption reading the file), but
nevertheless, this situation can be encountered and this commit
fixes the bug by reading the correct snap-*.dat blob name in the
shard data folder.
This commit is contained in:
Ali Beyad 2016-11-28 10:46:33 -05:00 committed by GitHub
parent b7292a6005
commit db7362da67
2 changed files with 45 additions and 4 deletions

View File

@ -1137,7 +1137,8 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
*/ */
protected Tuple<BlobStoreIndexShardSnapshots, Integer> buildBlobStoreIndexShardSnapshots(Map<String, BlobMetaData> blobs) { protected Tuple<BlobStoreIndexShardSnapshots, Integer> buildBlobStoreIndexShardSnapshots(Map<String, BlobMetaData> blobs) {
int latest = -1; int latest = -1;
for (String name : blobs.keySet()) { Set<String> blobKeys = blobs.keySet();
for (String name : blobKeys) {
if (name.startsWith(SNAPSHOT_INDEX_PREFIX)) { if (name.startsWith(SNAPSHOT_INDEX_PREFIX)) {
try { try {
int gen = Integer.parseInt(name.substring(SNAPSHOT_INDEX_PREFIX.length())); int gen = Integer.parseInt(name.substring(SNAPSHOT_INDEX_PREFIX.length()));
@ -1158,15 +1159,17 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
final String file = SNAPSHOT_INDEX_PREFIX + latest; final String file = SNAPSHOT_INDEX_PREFIX + latest;
logger.warn((Supplier<?>) () -> new ParameterizedMessage("failed to read index file [{}]", file), e); logger.warn((Supplier<?>) () -> new ParameterizedMessage("failed to read index file [{}]", file), e);
} }
} else if (blobKeys.isEmpty() == false) {
logger.debug("Could not find a readable index-N file in a non-empty shard snapshot directory [{}]", blobContainer.path());
} }
// We couldn't load the index file - falling back to loading individual snapshots // We couldn't load the index file - falling back to loading individual snapshots
List<SnapshotFiles> snapshots = new ArrayList<>(); List<SnapshotFiles> snapshots = new ArrayList<>();
for (String name : blobs.keySet()) { for (String name : blobKeys) {
try { try {
BlobStoreIndexShardSnapshot snapshot = null; BlobStoreIndexShardSnapshot snapshot = null;
if (name.startsWith(SNAPSHOT_PREFIX)) { if (name.startsWith(SNAPSHOT_PREFIX)) {
snapshot = indexShardSnapshotFormat.readBlob(blobContainer, snapshotId.getUUID()); snapshot = indexShardSnapshotFormat.readBlob(blobContainer, name);
} else if (name.startsWith(LEGACY_SNAPSHOT_PREFIX)) { } else if (name.startsWith(LEGACY_SNAPSHOT_PREFIX)) {
snapshot = indexShardSnapshotLegacyFormat.readBlob(blobContainer, name); snapshot = indexShardSnapshotLegacyFormat.readBlob(blobContainer, name);
} }

View File

@ -69,7 +69,6 @@ import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.indices.IndicesService; import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.indices.InvalidIndexNameException; import org.elasticsearch.indices.InvalidIndexNameException;
import org.elasticsearch.indices.recovery.RecoverySettings;
import org.elasticsearch.ingest.IngestTestPlugin; import org.elasticsearch.ingest.IngestTestPlugin;
import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.repositories.IndexId; import org.elasticsearch.repositories.IndexId;
@ -95,6 +94,7 @@ import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
@ -1066,6 +1066,44 @@ public class SharedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCas
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), equalTo(createSnapshotResponse.getSnapshotInfo().totalShards())); assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), equalTo(createSnapshotResponse.getSnapshotInfo().totalShards()));
} }
public void testSnapshotWithMissingShardLevelIndexFile() throws Exception {
Path repo = randomRepoPath();
logger.info("--> creating repository at {}", repo.toAbsolutePath());
assertAcked(client().admin().cluster().preparePutRepository("test-repo").setType("fs").setSettings(
Settings.builder().put("location", repo).put("compress", false)));
createIndex("test-idx-1", "test-idx-2");
logger.info("--> indexing some data");
indexRandom(true,
client().prepareIndex("test-idx-1", "doc").setSource("foo", "bar"),
client().prepareIndex("test-idx-2", "doc").setSource("foo", "bar"));
logger.info("--> creating snapshot");
client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap-1")
.setWaitForCompletion(true).setIndices("test-idx-*").get();
logger.info("--> deleting shard level index file");
try (Stream<Path> files = Files.list(repo.resolve("indices"))) {
files.forEach(indexPath ->
IOUtils.deleteFilesIgnoringExceptions(indexPath.resolve("0").resolve("index-0"))
);
}
logger.info("--> creating another snapshot");
CreateSnapshotResponse createSnapshotResponse =
client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap-2")
.setWaitForCompletion(true).setIndices("test-idx-1").get();
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(0));
assertEquals(createSnapshotResponse.getSnapshotInfo().successfulShards(), createSnapshotResponse.getSnapshotInfo().totalShards());
logger.info("--> restoring the first snapshot, the repository should not have lost any shard data despite deleting index-N, " +
"because it should have iterated over the snap-*.data files as backup");
client().admin().indices().prepareDelete("test-idx-1", "test-idx-2").get();
RestoreSnapshotResponse restoreSnapshotResponse =
client().admin().cluster().prepareRestoreSnapshot("test-repo", "test-snap-1").setWaitForCompletion(true).get();
assertEquals(0, restoreSnapshotResponse.getRestoreInfo().failedShards());
}
public void testSnapshotClosedIndex() throws Exception { public void testSnapshotClosedIndex() throws Exception {
Client client = client(); Client client = client();