* The problem here is that if we run into a corrupted index-N file, instead of generating a new index-(N+1) file, we instead set the newest index generation to -1 and thus tried to create `index-0` * If `index-0` is corrupt, this prevents us from ever creating a new snapshot using the broken shard, because we are unable to create `index-0` since it already exists * Fixed by still using the index generation for naming the next index file, even if it was a broken index file * Added test that makes sure restoring as well as snapshotting on top of the broken shard index file work as expected * closes #41304
This commit is contained in:
parent
37ebefb553
commit
1db9166ea0
|
@ -37,10 +37,10 @@ import java.util.Map;
|
|||
import static java.util.Collections.unmodifiableMap;
|
||||
|
||||
/**
|
||||
* Contains information about all snapshot for the given shard in repository
|
||||
* Contains information about all snapshots for the given shard in repository
|
||||
* <p>
|
||||
* This class is used to find files that were already snapshotted and clear out files that no longer referenced by any
|
||||
* snapshots
|
||||
* snapshots.
|
||||
*/
|
||||
public class BlobStoreIndexShardSnapshots implements Iterable<SnapshotFiles>, ToXContentFragment {
|
||||
|
||||
|
|
|
@ -121,27 +121,28 @@ import static org.elasticsearch.index.snapshots.blobstore.BlobStoreIndexShardSna
|
|||
* <pre>
|
||||
* {@code
|
||||
* STORE_ROOT
|
||||
* |- index-N - list of all snapshot ids and the indices belonging to each snapshot, N is the generation of the file
|
||||
* |- index-N - JSON serialized {@link RepositoryData} containing a list of all snapshot ids and the indices belonging to
|
||||
* | each snapshot, N is the generation of the file
|
||||
* |- index.latest - contains the numeric value of the latest generation of the index file (i.e. N from above)
|
||||
* |- incompatible-snapshots - list of all snapshot ids that are no longer compatible with the current version of the cluster
|
||||
* |- snap-20131010 - JSON serialized Snapshot for snapshot "20131010"
|
||||
* |- meta-20131010.dat - JSON serialized MetaData for snapshot "20131010" (includes only global metadata)
|
||||
* |- snap-20131011 - JSON serialized Snapshot for snapshot "20131011"
|
||||
* |- meta-20131011.dat - JSON serialized MetaData for snapshot "20131011"
|
||||
* |- snap-20131010.dat - SMILE serialized {@link SnapshotInfo} for snapshot "20131010"
|
||||
* |- meta-20131010.dat - SMILE serialized {@link MetaData} for snapshot "20131010" (includes only global metadata)
|
||||
* |- snap-20131011.dat - SMILE serialized {@link SnapshotInfo} for snapshot "20131011"
|
||||
* |- meta-20131011.dat - SMILE serialized {@link MetaData} for snapshot "20131011"
|
||||
* .....
|
||||
* |- indices/ - data for all indices
|
||||
* |- Ac1342-B_x/ - data for index "foo" which was assigned the unique id of Ac1342-B_x in the repository
|
||||
* | |- meta-20131010.dat - JSON Serialized IndexMetaData for index "foo"
|
||||
* | |- meta-20131010.dat - JSON Serialized {@link IndexMetaData} for index "foo"
|
||||
* | |- 0/ - data for shard "0" of index "foo"
|
||||
* | | |- __1 \
|
||||
* | | |- __1 \ (files with numeric names were created by older ES versions)
|
||||
* | | |- __2 |
|
||||
* | | |- __3 |- files from different segments see snapshot-* for their mappings to real segment files
|
||||
* | | |- __4 |
|
||||
* | | |- __5 /
|
||||
* | | |- __VPO5oDMVT5y4Akv8T_AO_A |- files from different segments see snap-* for their mappings to real segment files
|
||||
* | | |- __1gbJy18wS_2kv1qI7FgKuQ |
|
||||
* | | |- __R8JvZAHlSMyMXyZc2SS8Zg /
|
||||
* | | .....
|
||||
* | | |- snap-20131010.dat - JSON serialized BlobStoreIndexShardSnapshot for snapshot "20131010"
|
||||
* | | |- snap-20131011.dat - JSON serialized BlobStoreIndexShardSnapshot for snapshot "20131011"
|
||||
* | | |- list-123 - JSON serialized BlobStoreIndexShardSnapshot for snapshot "20131011"
|
||||
* | | |- snap-20131010.dat - SMILE serialized {@link BlobStoreIndexShardSnapshot} for snapshot "20131010"
|
||||
* | | |- snap-20131011.dat - SMILE serialized {@link BlobStoreIndexShardSnapshot} for snapshot "20131011"
|
||||
* | | |- index-123 - SMILE serialized {@link BlobStoreIndexShardSnapshots} for the shard
|
||||
* | |
|
||||
* | |- 1/ - data for shard "1" of index "foo"
|
||||
* | | |- __1
|
||||
|
@ -1136,7 +1137,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
|
|||
logger.warn(() -> new ParameterizedMessage("failed to read commit point [{}]", name), e);
|
||||
}
|
||||
}
|
||||
return new Tuple<>(new BlobStoreIndexShardSnapshots(snapshots), -1);
|
||||
return new Tuple<>(new BlobStoreIndexShardSnapshots(snapshots), latest);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2963,6 +2963,108 @@ public class SharedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCas
|
|||
assertAcked(client().admin().cluster().prepareDeleteSnapshot("test-repo", snapshotInfo.snapshotId().getName()).get());
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests that a shard snapshot with a corrupted shard index file can still be used for restore and incremental snapshots.
|
||||
*/
|
||||
public void testSnapshotWithCorruptedShardIndexFile() throws Exception {
|
||||
final Client client = client();
|
||||
final Path repo = randomRepoPath();
|
||||
final String indexName = "test-idx";
|
||||
final int nDocs = randomIntBetween(1, 10);
|
||||
|
||||
logger.info("--> creating index [{}] with [{}] documents in it", indexName, nDocs);
|
||||
assertAcked(prepareCreate(indexName).setSettings(Settings.builder()
|
||||
.put(SETTING_NUMBER_OF_SHARDS, 1).put(SETTING_NUMBER_OF_REPLICAS, 0)));
|
||||
|
||||
final IndexRequestBuilder[] documents = new IndexRequestBuilder[nDocs];
|
||||
for (int j = 0; j < nDocs; j++) {
|
||||
documents[j] = client.prepareIndex(indexName, "_doc").setSource("foo", "bar");
|
||||
}
|
||||
indexRandom(true, documents);
|
||||
flushAndRefresh();
|
||||
|
||||
logger.info("--> creating repository");
|
||||
assertAcked(client().admin().cluster().preparePutRepository("test-repo")
|
||||
.setType("fs")
|
||||
.setSettings(Settings.builder()
|
||||
.put("location", repo)));
|
||||
|
||||
final String snapshot1 = "test-snap-1";
|
||||
logger.info("--> creating snapshot [{}]", snapshot1);
|
||||
final SnapshotInfo snapshotInfo = client().admin().cluster().prepareCreateSnapshot("test-repo", snapshot1)
|
||||
.setWaitForCompletion(true)
|
||||
.get()
|
||||
.getSnapshotInfo();
|
||||
assertThat(snapshotInfo.failedShards(), equalTo(0));
|
||||
assertThat(snapshotInfo.successfulShards(), equalTo(snapshotInfo.totalShards()));
|
||||
assertThat(snapshotInfo.indices(), hasSize(1));
|
||||
|
||||
RepositoriesService service = internalCluster().getInstance(RepositoriesService.class, internalCluster().getMasterName());
|
||||
Repository repository = service.repository("test-repo");
|
||||
|
||||
final RepositoryData repositoryData = getRepositoryData(repository);
|
||||
final Map<String, IndexId> indexIds = repositoryData.getIndices();
|
||||
assertThat(indexIds.size(), equalTo(1));
|
||||
|
||||
final IndexId corruptedIndex = indexIds.get(indexName);
|
||||
final Path shardIndexFile = repo.resolve("indices")
|
||||
.resolve(corruptedIndex.getId()).resolve("0")
|
||||
.resolve("index-0");
|
||||
|
||||
logger.info("--> truncating shard index file [{}]", shardIndexFile);
|
||||
try (SeekableByteChannel outChan = Files.newByteChannel(shardIndexFile, StandardOpenOption.WRITE)) {
|
||||
outChan.truncate(randomInt(10));
|
||||
}
|
||||
|
||||
logger.info("--> verifying snapshot state for [{}]", snapshot1);
|
||||
List<SnapshotInfo> snapshotInfos = client().admin().cluster().prepareGetSnapshots("test-repo").get().getSnapshots();
|
||||
assertThat(snapshotInfos.size(), equalTo(1));
|
||||
assertThat(snapshotInfos.get(0).state(), equalTo(SnapshotState.SUCCESS));
|
||||
assertThat(snapshotInfos.get(0).snapshotId().getName(), equalTo(snapshot1));
|
||||
|
||||
logger.info("--> deleting index [{}]", indexName);
|
||||
assertAcked(client().admin().indices().prepareDelete(indexName));
|
||||
|
||||
logger.info("--> restoring snapshot [{}]", snapshot1);
|
||||
client().admin().cluster().prepareRestoreSnapshot("test-repo", snapshot1)
|
||||
.setRestoreGlobalState(randomBoolean())
|
||||
.setWaitForCompletion(true)
|
||||
.get();
|
||||
ensureGreen();
|
||||
|
||||
assertHitCount(client().prepareSearch(indexName).setSize(0).get(), nDocs);
|
||||
|
||||
logger.info("--> indexing [{}] more documents into [{}]", nDocs, indexName);
|
||||
for (int j = 0; j < nDocs; j++) {
|
||||
documents[j] = client.prepareIndex(indexName, "_doc").setSource("foo2", "bar2");
|
||||
}
|
||||
indexRandom(true, documents);
|
||||
|
||||
final String snapshot2 = "test-snap-2";
|
||||
logger.info("--> creating snapshot [{}]", snapshot2);
|
||||
final SnapshotInfo snapshotInfo2 = client().admin().cluster().prepareCreateSnapshot("test-repo", snapshot2)
|
||||
.setWaitForCompletion(true)
|
||||
.get()
|
||||
.getSnapshotInfo();
|
||||
assertThat(snapshotInfo2.state(), equalTo(SnapshotState.SUCCESS));
|
||||
assertThat(snapshotInfo2.failedShards(), equalTo(0));
|
||||
assertThat(snapshotInfo2.successfulShards(), equalTo(snapshotInfo.totalShards()));
|
||||
assertThat(snapshotInfo2.indices(), hasSize(1));
|
||||
|
||||
logger.info("--> deleting index [{}]", indexName);
|
||||
assertAcked(client().admin().indices().prepareDelete(indexName));
|
||||
|
||||
logger.info("--> restoring snapshot [{}]", snapshot2);
|
||||
client().admin().cluster().prepareRestoreSnapshot("test-repo", snapshot2)
|
||||
.setRestoreGlobalState(randomBoolean())
|
||||
.setWaitForCompletion(true)
|
||||
.get();
|
||||
|
||||
ensureGreen();
|
||||
|
||||
assertHitCount(client().prepareSearch(indexName).setSize(0).get(), 2 * nDocs);
|
||||
}
|
||||
|
||||
public void testCannotCreateSnapshotsWithSameName() throws Exception {
|
||||
final String repositoryName = "test-repo";
|
||||
final String snapshotName = "test-snap";
|
||||
|
|
Loading…
Reference in New Issue