Stabilize testSnapshotDeleteRelocatingPrimaryIndex (#61088) (#61096)

Use transport blocking to make relocation take forever instead of relying on the relocation to take long enough to clash with the snapshot.

Closes #61069
This commit is contained in:
Armin Braun 2020-08-13 16:26:56 +02:00 committed by GitHub
parent 843122ccce
commit 3143b5ea47
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 49 additions and 33 deletions

View File

@ -69,6 +69,7 @@ import org.elasticsearch.env.Environment;
import org.elasticsearch.index.seqno.RetentionLeaseActions;
import org.elasticsearch.index.seqno.RetentionLeases;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.indices.recovery.PeerRecoveryTargetService;
import org.elasticsearch.indices.recovery.RecoveryState;
import org.elasticsearch.node.Node;
import org.elasticsearch.plugins.Plugin;
@ -88,6 +89,7 @@ import org.elasticsearch.test.TestCustomMetadata;
import org.elasticsearch.test.disruption.BusyMasterServiceDisruption;
import org.elasticsearch.test.disruption.ServiceDisruptionScheme;
import org.elasticsearch.test.rest.FakeRestRequest;
import org.elasticsearch.test.transport.MockTransportService;
import org.elasticsearch.transport.TransportMessageListener;
import org.elasticsearch.transport.TransportRequest;
import org.elasticsearch.transport.TransportRequestOptions;
@ -176,7 +178,8 @@ public class DedicatedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTest
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Arrays.asList(MockRepository.Plugin.class, TestCustomMetadataPlugin.class, BrokenSettingPlugin.class);
return Arrays.asList(MockRepository.Plugin.class, TestCustomMetadataPlugin.class, BrokenSettingPlugin.class,
MockTransportService.TestPlugin.class);
}
public static class BrokenSettingPlugin extends Plugin {
@ -1310,6 +1313,51 @@ public class DedicatedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTest
assertThat(snapshot3.state(), is(SnapshotState.SUCCESS));
}
public void testSnapshotDeleteRelocatingPrimaryIndex() throws Exception {
internalCluster().startMasterOnlyNode();
final List<String> dataNodes = internalCluster().startDataOnlyNodes(2);
final String repoName = "test-repo";
createRepository(repoName, "fs");
// Create index on two nodes and make sure each node has a primary by setting no replicas
final String indexName = "test-idx";
assertAcked(prepareCreate(indexName, 2, indexSettingsNoReplicas(between(2, 10))));
ensureGreen(indexName);
indexRandomDocs(indexName, 100);
// Drop all file chunk requests so that below relocation takes forever and we're guaranteed to run the snapshot in parallel to it
for (String nodeName : dataNodes) {
((MockTransportService) internalCluster().getInstance(TransportService.class, nodeName)).addSendBehavior(
(connection, requestId, action, request, options) -> {
if (PeerRecoveryTargetService.Actions.FILE_CHUNK.equals(action)) {
return;
}
connection.sendRequest(requestId, action, request, options);
});
}
logger.info("--> start relocations");
allowNodes(indexName, 1);
logger.info("--> wait for relocations to start");
assertBusy(() -> assertThat(
client().admin().cluster().prepareHealth(indexName).execute().actionGet().getRelocatingShards(), greaterThan(0)),
1L, TimeUnit.MINUTES);
logger.info("--> snapshot");
client().admin().cluster().prepareCreateSnapshot(repoName, "test-snap")
.setWaitForCompletion(false).setPartial(true).setIndices(indexName).get();
assertAcked(client().admin().indices().prepareDelete(indexName));
logger.info("--> wait for snapshot to complete");
SnapshotInfo snapshotInfo = waitForCompletion(repoName, "test-snap", TimeValue.timeValueSeconds(600));
assertThat(snapshotInfo.state(), equalTo(SnapshotState.PARTIAL));
assertThat(snapshotInfo.shardFailures().size(), greaterThan(0));
logger.info("--> done");
}
private long calculateTotalFilesSize(List<Path> files) {
return files.stream().mapToLong(f -> {
try {

View File

@ -1937,38 +1937,6 @@ public class SharedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCas
logger.info("--> done");
}
public void testSnapshotDeleteRelocatingPrimaryIndex() throws Exception {
final String repoName = "test-repo";
createRepository(repoName, "fs");
// Create index on two nodes and make sure each node has a primary by setting no replicas
final String indexName = "test-idx";
assertAcked(prepareCreate(indexName, 2, indexSettingsNoReplicas(between(2, 10))));
ensureGreen(indexName);
indexRandomDocs(indexName, 100);
logger.info("--> start relocations");
allowNodes(indexName, 1);
logger.info("--> wait for relocations to start");
assertBusy(() -> assertThat(
client().admin().cluster().prepareHealth(indexName).execute().actionGet().getRelocatingShards(), greaterThan(0)),
1L, TimeUnit.MINUTES);
logger.info("--> snapshot");
client().admin().cluster().prepareCreateSnapshot(repoName, "test-snap")
.setWaitForCompletion(false).setPartial(true).setIndices(indexName).get();
assertAcked(client().admin().indices().prepareDelete(indexName));
logger.info("--> wait for snapshot to complete");
SnapshotInfo snapshotInfo = waitForCompletion(repoName, "test-snap", TimeValue.timeValueSeconds(600));
assertThat(snapshotInfo.state(), equalTo(SnapshotState.PARTIAL));
assertThat(snapshotInfo.shardFailures().size(), greaterThan(0));
logger.info("--> done");
}
public void testSnapshotMoreThanOnce() throws InterruptedException {
Client client = client();