Add IT for Snapshot Issue in 47552 (#47627) (#47634)

* Add IT for Snapshot Issue in 47552 (#47627)

Adding a specific integration test that reproduces the problem
fixed in #47552. The issue fixed only reproduces in the snapshot
resiliency otherwise which are not available in 6.8 where the
fix is being backported to as well.
This commit is contained in:
Armin Braun 2019-10-07 10:38:19 +02:00 committed by GitHub
parent 6bd033931b
commit 1359ef73a3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 49 additions and 0 deletions

View File

@ -1236,6 +1236,55 @@ public class DedicatedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTest
}, 60L, TimeUnit.SECONDS);
}
public void testDataNodeRestartAfterShardSnapshotFailure() throws Exception {
logger.info("--> starting a master node and two data nodes");
internalCluster().startMasterOnlyNode();
final List<String> dataNodes = internalCluster().startDataOnlyNodes(2);
logger.info("--> creating repository");
assertAcked(client().admin().cluster().preparePutRepository("test-repo")
.setType("mock").setSettings(Settings.builder()
.put("location", randomRepoPath())
.put("compress", randomBoolean())
.put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES)));
assertAcked(prepareCreate("test-idx", 0, Settings.builder()
.put("number_of_shards", 2).put("number_of_replicas", 0)));
ensureGreen();
logger.info("--> indexing some data");
final int numdocs = randomIntBetween(50, 100);
IndexRequestBuilder[] builders = new IndexRequestBuilder[numdocs];
for (int i = 0; i < builders.length; i++) {
builders[i] = client().prepareIndex("test-idx", "type1",
Integer.toString(i)).setSource("field1", "bar " + i);
}
indexRandom(true, builders);
flushAndRefresh();
blockAllDataNodes("test-repo");
logger.info("--> snapshot");
client(internalCluster().getMasterName()).admin().cluster()
.prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIndices("test-idx").get();
logger.info("--> restarting first data node, which should cause the primary shard on it to be failed");
internalCluster().restartNode(dataNodes.get(0), InternalTestCluster.EMPTY_CALLBACK);
logger.info("--> wait for shard snapshot of first primary to show as failed");
assertBusy(() -> assertThat(
client().admin().cluster().prepareSnapshotStatus("test-repo").setSnapshots("test-snap").get().getSnapshots()
.get(0).getShardsStats().getFailedShards(), is(1)), 60L, TimeUnit.SECONDS);
logger.info("--> restarting second data node, which should cause the primary shard on it to be failed");
internalCluster().restartNode(dataNodes.get(1), InternalTestCluster.EMPTY_CALLBACK);
// check that snapshot completes with both failed shards being accounted for in the snapshot result
assertBusy(() -> {
GetSnapshotsResponse snapshotsStatusResponse = client().admin().cluster()
.prepareGetSnapshots("test-repo").setSnapshots("test-snap").setIgnoreUnavailable(true).get();
assertEquals(1, snapshotsStatusResponse.getSnapshots().size());
SnapshotInfo snapshotInfo = snapshotsStatusResponse.getSnapshots().get(0);
assertTrue(snapshotInfo.state().toString(), snapshotInfo.state().completed());
assertThat(snapshotInfo.totalShards(), is(2));
assertThat(snapshotInfo.shardFailures(), hasSize(2));
}, 60L, TimeUnit.SECONDS);
}
public void testRetentionLeasesClearedOnRestore() throws Exception {
final String repoName = "test-repo-retention-leases";
assertAcked(client().admin().cluster().preparePutRepository(repoName)