Make mock repository blocking tests more deterministic and reproducible
This commit is contained in:
parent
904c0abb3e
commit
c3e53f3889
|
@ -31,7 +31,6 @@ import org.junit.Before;
|
||||||
import org.junit.Ignore;
|
import org.junit.Ignore;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.Collection;
|
|
||||||
|
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
|
||||||
|
@ -101,24 +100,17 @@ public abstract class AbstractSnapshotTests extends ElasticsearchIntegrationTest
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
public String waitForCompletionOrBlock(Collection<String> nodes, String repository, String snapshot, TimeValue timeout) throws InterruptedException {
|
public void waitForBlock(String node, String repository, TimeValue timeout) throws InterruptedException {
|
||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
|
RepositoriesService repositoriesService = cluster().getInstance(RepositoriesService.class, node);
|
||||||
|
MockRepository mockRepository = (MockRepository) repositoriesService.repository(repository);
|
||||||
while (System.currentTimeMillis() - start < timeout.millis()) {
|
while (System.currentTimeMillis() - start < timeout.millis()) {
|
||||||
ImmutableList<SnapshotInfo> snapshotInfos = client().admin().cluster().prepareGetSnapshots(repository).setSnapshots(snapshot).get().getSnapshots();
|
if (mockRepository.blocked()) {
|
||||||
assertThat(snapshotInfos.size(), equalTo(1));
|
return;
|
||||||
if (snapshotInfos.get(0).state().completed()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
for (String node : nodes) {
|
|
||||||
RepositoriesService repositoriesService = cluster().getInstance(RepositoriesService.class, node);
|
|
||||||
if (((MockRepository) repositoriesService.repository(repository)).blocked()) {
|
|
||||||
return node;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Thread.sleep(100);
|
Thread.sleep(100);
|
||||||
}
|
}
|
||||||
fail("Timeout!!!");
|
fail("Timeout!!!");
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public SnapshotInfo waitForCompletion(String repository, String snapshot, TimeValue timeout) throws InterruptedException {
|
public SnapshotInfo waitForCompletion(String repository, String snapshot, TimeValue timeout) throws InterruptedException {
|
||||||
|
@ -135,9 +127,16 @@ public abstract class AbstractSnapshotTests extends ElasticsearchIntegrationTest
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void unblock(String repository) {
|
public static String blockNodeWithIndex(String index) {
|
||||||
for (RepositoriesService repositoriesService : cluster().getInstances(RepositoriesService.class)) {
|
for(String node : cluster().nodesInclude("test-idx")) {
|
||||||
((MockRepository) repositoriesService.repository(repository)).unblock();
|
((MockRepository)cluster().getInstance(RepositoriesService.class, node).repository("test-repo")).blockOnDataFiles(true);
|
||||||
|
return node;
|
||||||
}
|
}
|
||||||
|
fail("No nodes for the index " + index + " found");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void unblockNode(String node) {
|
||||||
|
((MockRepository)cluster().getInstance(RepositoriesService.class, node).repository("test-repo")).unblock();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,6 @@
|
||||||
package org.elasticsearch.snapshots;
|
package org.elasticsearch.snapshots;
|
||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.LifecycleScope;
|
import com.carrotsearch.randomizedtesting.LifecycleScope;
|
||||||
import com.google.common.collect.ImmutableList;
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.elasticsearch.action.admin.cluster.repositories.put.PutRepositoryResponse;
|
import org.elasticsearch.action.admin.cluster.repositories.put.PutRepositoryResponse;
|
||||||
import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse;
|
import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse;
|
||||||
|
@ -106,31 +105,27 @@ public class DedicatedClusterSnapshotRestoreTests extends AbstractSnapshotTests
|
||||||
ImmutableSettings.settingsBuilder()
|
ImmutableSettings.settingsBuilder()
|
||||||
.put("location", newTempDir(LifecycleScope.TEST))
|
.put("location", newTempDir(LifecycleScope.TEST))
|
||||||
.put("random", randomAsciiOfLength(10))
|
.put("random", randomAsciiOfLength(10))
|
||||||
.put("random_data_file_blocking_rate", 0.1)
|
|
||||||
.put("wait_after_unblock", 200)
|
.put("wait_after_unblock", 200)
|
||||||
).get();
|
).get();
|
||||||
assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true));
|
assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true));
|
||||||
|
|
||||||
|
// Pick one node and block it
|
||||||
|
String blockedNode = blockNodeWithIndex("test-idx");
|
||||||
|
|
||||||
logger.info("--> snapshot");
|
logger.info("--> snapshot");
|
||||||
client.admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIndices("test-idx").get();
|
client.admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIndices("test-idx").get();
|
||||||
|
|
||||||
String blockedNode = waitForCompletionOrBlock(nodes, "test-repo", "test-snap", TimeValue.timeValueSeconds(60));
|
logger.info("--> waiting for block to kick in");
|
||||||
if (blockedNode != null) {
|
waitForBlock(blockedNode, "test-repo", TimeValue.timeValueSeconds(60));
|
||||||
logger.info("--> execution was blocked on node [{}], shutting it down", blockedNode);
|
|
||||||
unblock("test-repo");
|
|
||||||
logger.info("--> stopping node", blockedNode);
|
|
||||||
stopNode(blockedNode);
|
|
||||||
logger.info("--> waiting for completion");
|
|
||||||
SnapshotInfo snapshotInfo = waitForCompletion("test-repo", "test-snap", TimeValue.timeValueSeconds(60));
|
|
||||||
logger.info("Number of failed shards [{}]", snapshotInfo.shardFailures().size());
|
|
||||||
logger.info("--> done");
|
|
||||||
} else {
|
|
||||||
logger.info("--> done without blocks");
|
|
||||||
ImmutableList<SnapshotInfo> snapshotInfos = client().admin().cluster().prepareGetSnapshots("test-repo").setSnapshots("test-snap").get().getSnapshots();
|
|
||||||
assertThat(snapshotInfos.size(), equalTo(1));
|
|
||||||
assertThat(snapshotInfos.get(0).state(), equalTo(SnapshotState.SUCCESS));
|
|
||||||
assertThat(snapshotInfos.get(0).shardFailures().size(), equalTo(0));
|
|
||||||
|
|
||||||
}
|
logger.info("--> execution was blocked on node [{}], shutting it down", blockedNode);
|
||||||
|
unblockNode(blockedNode);
|
||||||
|
|
||||||
|
logger.info("--> stopping node", blockedNode);
|
||||||
|
stopNode(blockedNode);
|
||||||
|
logger.info("--> waiting for completion");
|
||||||
|
SnapshotInfo snapshotInfo = waitForCompletion("test-repo", "test-snap", TimeValue.timeValueSeconds(60));
|
||||||
|
logger.info("Number of failed shards [{}]", snapshotInfo.shardFailures().size());
|
||||||
|
logger.info("--> done");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -665,7 +665,6 @@ public class SharedClusterSnapshotRestoreTests extends AbstractSnapshotTests {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@TestLogging("cluster.routing.allocation.decider:TRACE")
|
@TestLogging("cluster.routing.allocation.decider:TRACE")
|
||||||
// @LuceneTestCase.AwaitsFix(bugUrl="imotov is working on the fix")
|
|
||||||
public void moveShardWhileSnapshottingTest() throws Exception {
|
public void moveShardWhileSnapshottingTest() throws Exception {
|
||||||
Client client = client();
|
Client client = client();
|
||||||
File repositoryLocation = newTempDir(LifecycleScope.TEST);
|
File repositoryLocation = newTempDir(LifecycleScope.TEST);
|
||||||
|
@ -675,7 +674,6 @@ public class SharedClusterSnapshotRestoreTests extends AbstractSnapshotTests {
|
||||||
ImmutableSettings.settingsBuilder()
|
ImmutableSettings.settingsBuilder()
|
||||||
.put("location", repositoryLocation)
|
.put("location", repositoryLocation)
|
||||||
.put("random", randomAsciiOfLength(10))
|
.put("random", randomAsciiOfLength(10))
|
||||||
.put("random_data_file_blocking_rate", 0.1)
|
|
||||||
.put("wait_after_unblock", 200)
|
.put("wait_after_unblock", 200)
|
||||||
).get();
|
).get();
|
||||||
assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true));
|
assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true));
|
||||||
|
@ -690,23 +688,28 @@ public class SharedClusterSnapshotRestoreTests extends AbstractSnapshotTests {
|
||||||
refresh();
|
refresh();
|
||||||
assertThat(client.prepareCount("test-idx").get().getCount(), equalTo(100L));
|
assertThat(client.prepareCount("test-idx").get().getCount(), equalTo(100L));
|
||||||
|
|
||||||
|
// Pick one node and block it
|
||||||
|
String blockedNode = blockNodeWithIndex("test-idx");
|
||||||
|
|
||||||
logger.info("--> snapshot");
|
logger.info("--> snapshot");
|
||||||
client.admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIndices("test-idx").get();
|
client.admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIndices("test-idx").get();
|
||||||
String blockedNode = waitForCompletionOrBlock(cluster().nodesInclude("test-idx"), "test-repo", "test-snap", TimeValue.timeValueSeconds(60));
|
|
||||||
if (blockedNode != null) {
|
logger.info("--> waiting for block to kick in");
|
||||||
logger.info("--> move shards away from the node");
|
waitForBlock(blockedNode, "test-repo", TimeValue.timeValueSeconds(60));
|
||||||
ImmutableSettings.Builder excludeSettings = ImmutableSettings.builder().put("index.routing.allocation.exclude._name", blockedNode);
|
|
||||||
client().admin().indices().prepareUpdateSettings("test-idx").setSettings(excludeSettings).get();
|
logger.info("--> execution was blocked on node [{}], moving shards away from this node", blockedNode);
|
||||||
logger.info("--> execution was blocked on node [{}], moving shards away from this node", blockedNode);
|
ImmutableSettings.Builder excludeSettings = ImmutableSettings.builder().put("index.routing.allocation.exclude._name", blockedNode);
|
||||||
unblock("test-repo");
|
client().admin().indices().prepareUpdateSettings("test-idx").setSettings(excludeSettings).get();
|
||||||
logger.info("--> waiting for completion");
|
|
||||||
SnapshotInfo snapshotInfo = waitForCompletion("test-repo", "test-snap", TimeValue.timeValueSeconds(60));
|
logger.info("--> unblocking blocked node");
|
||||||
logger.info("Number of failed shards [{}]", snapshotInfo.shardFailures().size());
|
unblockNode(blockedNode);
|
||||||
logger.info("--> done");
|
logger.info("--> waiting for completion");
|
||||||
} else {
|
SnapshotInfo snapshotInfo = waitForCompletion("test-repo", "test-snap", TimeValue.timeValueSeconds(600));
|
||||||
logger.info("--> done without blocks");
|
logger.info("Number of failed shards [{}]", snapshotInfo.shardFailures().size());
|
||||||
}
|
logger.info("--> done");
|
||||||
|
|
||||||
ImmutableList<SnapshotInfo> snapshotInfos = client().admin().cluster().prepareGetSnapshots("test-repo").setSnapshots("test-snap").get().getSnapshots();
|
ImmutableList<SnapshotInfo> snapshotInfos = client().admin().cluster().prepareGetSnapshots("test-repo").setSnapshots("test-snap").get().getSnapshots();
|
||||||
|
|
||||||
assertThat(snapshotInfos.size(), equalTo(1));
|
assertThat(snapshotInfos.size(), equalTo(1));
|
||||||
assertThat(snapshotInfos.get(0).state(), equalTo(SnapshotState.SUCCESS));
|
assertThat(snapshotInfos.get(0).state(), equalTo(SnapshotState.SUCCESS));
|
||||||
assertThat(snapshotInfos.get(0).shardFailures().size(), equalTo(0));
|
assertThat(snapshotInfos.get(0).shardFailures().size(), equalTo(0));
|
||||||
|
|
|
@ -46,10 +46,6 @@ public class MockRepository extends FsRepository {
|
||||||
|
|
||||||
private final AtomicLong failureCounter = new AtomicLong();
|
private final AtomicLong failureCounter = new AtomicLong();
|
||||||
|
|
||||||
private volatile boolean blockable = true;
|
|
||||||
|
|
||||||
private volatile boolean blocked = false;
|
|
||||||
|
|
||||||
public void resetFailureCount() {
|
public void resetFailureCount() {
|
||||||
failureCounter.set(0);
|
failureCounter.set(0);
|
||||||
}
|
}
|
||||||
|
@ -62,23 +58,25 @@ public class MockRepository extends FsRepository {
|
||||||
|
|
||||||
private final double randomDataFileIOExceptionRate;
|
private final double randomDataFileIOExceptionRate;
|
||||||
|
|
||||||
private final double randomControlBlockingRate;
|
|
||||||
|
|
||||||
private final double randomDataFileBlockingRate;
|
|
||||||
|
|
||||||
private final long waitAfterUnblock;
|
private final long waitAfterUnblock;
|
||||||
|
|
||||||
private final MockBlobStore mockBlobStore;
|
private final MockBlobStore mockBlobStore;
|
||||||
|
|
||||||
private final String randomPrefix;
|
private final String randomPrefix;
|
||||||
|
|
||||||
|
private volatile boolean blockOnControlFiles;
|
||||||
|
|
||||||
|
private volatile boolean blockOnDataFiles;
|
||||||
|
|
||||||
|
private volatile boolean blocked = false;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public MockRepository(RepositoryName name, RepositorySettings repositorySettings, IndexShardRepository indexShardRepository) throws IOException {
|
public MockRepository(RepositoryName name, RepositorySettings repositorySettings, IndexShardRepository indexShardRepository) throws IOException {
|
||||||
super(name, repositorySettings, indexShardRepository);
|
super(name, repositorySettings, indexShardRepository);
|
||||||
randomControlIOExceptionRate = repositorySettings.settings().getAsDouble("random_control_io_exception_rate", 0.0);
|
randomControlIOExceptionRate = repositorySettings.settings().getAsDouble("random_control_io_exception_rate", 0.0);
|
||||||
randomDataFileIOExceptionRate = repositorySettings.settings().getAsDouble("random_data_file_io_exception_rate", 0.0);
|
randomDataFileIOExceptionRate = repositorySettings.settings().getAsDouble("random_data_file_io_exception_rate", 0.0);
|
||||||
randomControlBlockingRate = repositorySettings.settings().getAsDouble("random_control_blocking_rate", 0.0);
|
blockOnControlFiles = repositorySettings.settings().getAsBoolean("block_on_control", false);
|
||||||
randomDataFileBlockingRate = repositorySettings.settings().getAsDouble("random_data_file_blocking_rate", 0.0);
|
blockOnDataFiles = repositorySettings.settings().getAsBoolean("block_on_data", false);
|
||||||
randomPrefix = repositorySettings.settings().get("random");
|
randomPrefix = repositorySettings.settings().get("random");
|
||||||
waitAfterUnblock = repositorySettings.settings().getAsLong("wait_after_unblock", 0L);
|
waitAfterUnblock = repositorySettings.settings().getAsLong("wait_after_unblock", 0L);
|
||||||
logger.info("starting mock repository with random prefix " + randomPrefix);
|
logger.info("starting mock repository with random prefix " + randomPrefix);
|
||||||
|
@ -108,6 +106,14 @@ public class MockRepository extends FsRepository {
|
||||||
mockBlobStore.unblockExecution();
|
mockBlobStore.unblockExecution();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void blockOnDataFiles(boolean blocked) {
|
||||||
|
blockOnDataFiles = blocked;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void blockOnControlFiles(boolean blocked) {
|
||||||
|
blockOnControlFiles = blocked;
|
||||||
|
}
|
||||||
|
|
||||||
public class MockBlobStore extends BlobStoreWrapper {
|
public class MockBlobStore extends BlobStoreWrapper {
|
||||||
ConcurrentMap<String, AtomicLong> accessCounts = new ConcurrentHashMap<String, AtomicLong>();
|
ConcurrentMap<String, AtomicLong> accessCounts = new ConcurrentHashMap<String, AtomicLong>();
|
||||||
|
|
||||||
|
@ -133,8 +139,10 @@ public class MockRepository extends FsRepository {
|
||||||
|
|
||||||
public synchronized void unblockExecution() {
|
public synchronized void unblockExecution() {
|
||||||
if (blocked) {
|
if (blocked) {
|
||||||
blockable = false;
|
|
||||||
blocked = false;
|
blocked = false;
|
||||||
|
// Clean blocking flags, so we wouldn't try to block again
|
||||||
|
blockOnDataFiles = false;
|
||||||
|
blockOnControlFiles = false;
|
||||||
this.notifyAll();
|
this.notifyAll();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -146,12 +154,13 @@ public class MockRepository extends FsRepository {
|
||||||
private synchronized boolean blockExecution() {
|
private synchronized boolean blockExecution() {
|
||||||
boolean wasBlocked = false;
|
boolean wasBlocked = false;
|
||||||
try {
|
try {
|
||||||
while (blockable) {
|
while (blockOnDataFiles || blockOnControlFiles) {
|
||||||
blocked = true;
|
blocked = true;
|
||||||
this.wait();
|
this.wait();
|
||||||
wasBlocked = true;
|
wasBlocked = true;
|
||||||
}
|
}
|
||||||
} catch (InterruptedException ex) {
|
} catch (InterruptedException ex) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
}
|
}
|
||||||
return wasBlocked;
|
return wasBlocked;
|
||||||
}
|
}
|
||||||
|
@ -190,7 +199,7 @@ public class MockRepository extends FsRepository {
|
||||||
logger.info("throwing random IOException for file [{}] at path [{}]", blobName, path());
|
logger.info("throwing random IOException for file [{}] at path [{}]", blobName, path());
|
||||||
addFailure();
|
addFailure();
|
||||||
throw new IOException("Random IOException");
|
throw new IOException("Random IOException");
|
||||||
} else if (shouldFail(blobName, randomDataFileBlockingRate)) {
|
} else if (blockOnDataFiles) {
|
||||||
logger.info("blocking I/O operation for file [{}] at path [{}]", blobName, path());
|
logger.info("blocking I/O operation for file [{}] at path [{}]", blobName, path());
|
||||||
if (blockExecution() && waitAfterUnblock > 0) {
|
if (blockExecution() && waitAfterUnblock > 0) {
|
||||||
try {
|
try {
|
||||||
|
@ -207,7 +216,7 @@ public class MockRepository extends FsRepository {
|
||||||
logger.info("throwing random IOException for file [{}] at path [{}]", blobName, path());
|
logger.info("throwing random IOException for file [{}] at path [{}]", blobName, path());
|
||||||
addFailure();
|
addFailure();
|
||||||
throw new IOException("Random IOException");
|
throw new IOException("Random IOException");
|
||||||
} else if (shouldFail(blobName, randomControlBlockingRate)) {
|
} else if (blockOnControlFiles) {
|
||||||
logger.info("blocking I/O operation for file [{}] at path [{}]", blobName, path());
|
logger.info("blocking I/O operation for file [{}] at path [{}]", blobName, path());
|
||||||
if (blockExecution() && waitAfterUnblock > 0) {
|
if (blockExecution() && waitAfterUnblock > 0) {
|
||||||
try {
|
try {
|
||||||
|
|
Loading…
Reference in New Issue