* Add IT for Snapshot Issue in 47552 (#47627) Adding a specific integration test that reproduces the problem fixed in #47552. The issue fixed only reproduces in the snapshot resiliency otherwise which are not available in 6.8 where the fix is being backported to as well.
This commit is contained in:
parent
6bd033931b
commit
1359ef73a3
|
@ -1236,6 +1236,55 @@ public class DedicatedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTest
|
|||
}, 60L, TimeUnit.SECONDS);
|
||||
}
|
||||
|
||||
public void testDataNodeRestartAfterShardSnapshotFailure() throws Exception {
|
||||
logger.info("--> starting a master node and two data nodes");
|
||||
internalCluster().startMasterOnlyNode();
|
||||
final List<String> dataNodes = internalCluster().startDataOnlyNodes(2);
|
||||
logger.info("--> creating repository");
|
||||
assertAcked(client().admin().cluster().preparePutRepository("test-repo")
|
||||
.setType("mock").setSettings(Settings.builder()
|
||||
.put("location", randomRepoPath())
|
||||
.put("compress", randomBoolean())
|
||||
.put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES)));
|
||||
assertAcked(prepareCreate("test-idx", 0, Settings.builder()
|
||||
.put("number_of_shards", 2).put("number_of_replicas", 0)));
|
||||
ensureGreen();
|
||||
logger.info("--> indexing some data");
|
||||
final int numdocs = randomIntBetween(50, 100);
|
||||
IndexRequestBuilder[] builders = new IndexRequestBuilder[numdocs];
|
||||
for (int i = 0; i < builders.length; i++) {
|
||||
builders[i] = client().prepareIndex("test-idx", "type1",
|
||||
Integer.toString(i)).setSource("field1", "bar " + i);
|
||||
}
|
||||
indexRandom(true, builders);
|
||||
flushAndRefresh();
|
||||
blockAllDataNodes("test-repo");
|
||||
logger.info("--> snapshot");
|
||||
client(internalCluster().getMasterName()).admin().cluster()
|
||||
.prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIndices("test-idx").get();
|
||||
logger.info("--> restarting first data node, which should cause the primary shard on it to be failed");
|
||||
internalCluster().restartNode(dataNodes.get(0), InternalTestCluster.EMPTY_CALLBACK);
|
||||
|
||||
logger.info("--> wait for shard snapshot of first primary to show as failed");
|
||||
assertBusy(() -> assertThat(
|
||||
client().admin().cluster().prepareSnapshotStatus("test-repo").setSnapshots("test-snap").get().getSnapshots()
|
||||
.get(0).getShardsStats().getFailedShards(), is(1)), 60L, TimeUnit.SECONDS);
|
||||
|
||||
logger.info("--> restarting second data node, which should cause the primary shard on it to be failed");
|
||||
internalCluster().restartNode(dataNodes.get(1), InternalTestCluster.EMPTY_CALLBACK);
|
||||
|
||||
// check that snapshot completes with both failed shards being accounted for in the snapshot result
|
||||
assertBusy(() -> {
|
||||
GetSnapshotsResponse snapshotsStatusResponse = client().admin().cluster()
|
||||
.prepareGetSnapshots("test-repo").setSnapshots("test-snap").setIgnoreUnavailable(true).get();
|
||||
assertEquals(1, snapshotsStatusResponse.getSnapshots().size());
|
||||
SnapshotInfo snapshotInfo = snapshotsStatusResponse.getSnapshots().get(0);
|
||||
assertTrue(snapshotInfo.state().toString(), snapshotInfo.state().completed());
|
||||
assertThat(snapshotInfo.totalShards(), is(2));
|
||||
assertThat(snapshotInfo.shardFailures(), hasSize(2));
|
||||
}, 60L, TimeUnit.SECONDS);
|
||||
}
|
||||
|
||||
public void testRetentionLeasesClearedOnRestore() throws Exception {
|
||||
final String repoName = "test-repo-retention-leases";
|
||||
assertAcked(client().admin().cluster().preparePutRepository(repoName)
|
||||
|
|
Loading…
Reference in New Issue