Make mock repository blocking tests more deterministic and reproducible

This commit is contained in:
Igor Motov 2013-11-12 13:29:51 -05:00
parent 904c0abb3e
commit c3e53f3889
4 changed files with 71 additions and 65 deletions

View File

@ -31,7 +31,6 @@ import org.junit.Before;
import org.junit.Ignore; import org.junit.Ignore;
import java.io.File; import java.io.File;
import java.util.Collection;
import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.equalTo;
@ -101,24 +100,17 @@ public abstract class AbstractSnapshotTests extends ElasticsearchIntegrationTest
}); });
} }
public String waitForCompletionOrBlock(Collection<String> nodes, String repository, String snapshot, TimeValue timeout) throws InterruptedException { public void waitForBlock(String node, String repository, TimeValue timeout) throws InterruptedException {
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
RepositoriesService repositoriesService = cluster().getInstance(RepositoriesService.class, node);
MockRepository mockRepository = (MockRepository) repositoriesService.repository(repository);
while (System.currentTimeMillis() - start < timeout.millis()) { while (System.currentTimeMillis() - start < timeout.millis()) {
ImmutableList<SnapshotInfo> snapshotInfos = client().admin().cluster().prepareGetSnapshots(repository).setSnapshots(snapshot).get().getSnapshots(); if (mockRepository.blocked()) {
assertThat(snapshotInfos.size(), equalTo(1)); return;
if (snapshotInfos.get(0).state().completed()) {
return null;
}
for (String node : nodes) {
RepositoriesService repositoriesService = cluster().getInstance(RepositoriesService.class, node);
if (((MockRepository) repositoriesService.repository(repository)).blocked()) {
return node;
}
} }
Thread.sleep(100); Thread.sleep(100);
} }
fail("Timeout!!!"); fail("Timeout!!!");
return null;
} }
public SnapshotInfo waitForCompletion(String repository, String snapshot, TimeValue timeout) throws InterruptedException { public SnapshotInfo waitForCompletion(String repository, String snapshot, TimeValue timeout) throws InterruptedException {
@ -135,9 +127,16 @@ public abstract class AbstractSnapshotTests extends ElasticsearchIntegrationTest
return null; return null;
} }
public static void unblock(String repository) { public static String blockNodeWithIndex(String index) {
for (RepositoriesService repositoriesService : cluster().getInstances(RepositoriesService.class)) { for(String node : cluster().nodesInclude("test-idx")) {
((MockRepository) repositoriesService.repository(repository)).unblock(); ((MockRepository)cluster().getInstance(RepositoriesService.class, node).repository("test-repo")).blockOnDataFiles(true);
return node;
} }
fail("No nodes for the index " + index + " found");
return null;
}
public static void unblockNode(String node) {
((MockRepository)cluster().getInstance(RepositoriesService.class, node).repository("test-repo")).unblock();
} }
} }

View File

@ -20,7 +20,6 @@
package org.elasticsearch.snapshots; package org.elasticsearch.snapshots;
import com.carrotsearch.randomizedtesting.LifecycleScope; import com.carrotsearch.randomizedtesting.LifecycleScope;
import com.google.common.collect.ImmutableList;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.elasticsearch.action.admin.cluster.repositories.put.PutRepositoryResponse; import org.elasticsearch.action.admin.cluster.repositories.put.PutRepositoryResponse;
import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse;
@ -106,31 +105,27 @@ public class DedicatedClusterSnapshotRestoreTests extends AbstractSnapshotTests
ImmutableSettings.settingsBuilder() ImmutableSettings.settingsBuilder()
.put("location", newTempDir(LifecycleScope.TEST)) .put("location", newTempDir(LifecycleScope.TEST))
.put("random", randomAsciiOfLength(10)) .put("random", randomAsciiOfLength(10))
.put("random_data_file_blocking_rate", 0.1)
.put("wait_after_unblock", 200) .put("wait_after_unblock", 200)
).get(); ).get();
assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true));
// Pick one node and block it
String blockedNode = blockNodeWithIndex("test-idx");
logger.info("--> snapshot"); logger.info("--> snapshot");
client.admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIndices("test-idx").get(); client.admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIndices("test-idx").get();
String blockedNode = waitForCompletionOrBlock(nodes, "test-repo", "test-snap", TimeValue.timeValueSeconds(60)); logger.info("--> waiting for block to kick in");
if (blockedNode != null) { waitForBlock(blockedNode, "test-repo", TimeValue.timeValueSeconds(60));
logger.info("--> execution was blocked on node [{}], shutting it down", blockedNode);
unblock("test-repo");
logger.info("--> stopping node", blockedNode);
stopNode(blockedNode);
logger.info("--> waiting for completion");
SnapshotInfo snapshotInfo = waitForCompletion("test-repo", "test-snap", TimeValue.timeValueSeconds(60));
logger.info("Number of failed shards [{}]", snapshotInfo.shardFailures().size());
logger.info("--> done");
} else {
logger.info("--> done without blocks");
ImmutableList<SnapshotInfo> snapshotInfos = client().admin().cluster().prepareGetSnapshots("test-repo").setSnapshots("test-snap").get().getSnapshots();
assertThat(snapshotInfos.size(), equalTo(1));
assertThat(snapshotInfos.get(0).state(), equalTo(SnapshotState.SUCCESS));
assertThat(snapshotInfos.get(0).shardFailures().size(), equalTo(0));
} logger.info("--> execution was blocked on node [{}], shutting it down", blockedNode);
unblockNode(blockedNode);
logger.info("--> stopping node", blockedNode);
stopNode(blockedNode);
logger.info("--> waiting for completion");
SnapshotInfo snapshotInfo = waitForCompletion("test-repo", "test-snap", TimeValue.timeValueSeconds(60));
logger.info("Number of failed shards [{}]", snapshotInfo.shardFailures().size());
logger.info("--> done");
} }
} }

View File

@ -665,7 +665,6 @@ public class SharedClusterSnapshotRestoreTests extends AbstractSnapshotTests {
@Test @Test
@TestLogging("cluster.routing.allocation.decider:TRACE") @TestLogging("cluster.routing.allocation.decider:TRACE")
// @LuceneTestCase.AwaitsFix(bugUrl="imotov is working on the fix")
public void moveShardWhileSnapshottingTest() throws Exception { public void moveShardWhileSnapshottingTest() throws Exception {
Client client = client(); Client client = client();
File repositoryLocation = newTempDir(LifecycleScope.TEST); File repositoryLocation = newTempDir(LifecycleScope.TEST);
@ -675,7 +674,6 @@ public class SharedClusterSnapshotRestoreTests extends AbstractSnapshotTests {
ImmutableSettings.settingsBuilder() ImmutableSettings.settingsBuilder()
.put("location", repositoryLocation) .put("location", repositoryLocation)
.put("random", randomAsciiOfLength(10)) .put("random", randomAsciiOfLength(10))
.put("random_data_file_blocking_rate", 0.1)
.put("wait_after_unblock", 200) .put("wait_after_unblock", 200)
).get(); ).get();
assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true)); assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true));
@ -690,23 +688,28 @@ public class SharedClusterSnapshotRestoreTests extends AbstractSnapshotTests {
refresh(); refresh();
assertThat(client.prepareCount("test-idx").get().getCount(), equalTo(100L)); assertThat(client.prepareCount("test-idx").get().getCount(), equalTo(100L));
// Pick one node and block it
String blockedNode = blockNodeWithIndex("test-idx");
logger.info("--> snapshot"); logger.info("--> snapshot");
client.admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIndices("test-idx").get(); client.admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIndices("test-idx").get();
String blockedNode = waitForCompletionOrBlock(cluster().nodesInclude("test-idx"), "test-repo", "test-snap", TimeValue.timeValueSeconds(60));
if (blockedNode != null) { logger.info("--> waiting for block to kick in");
logger.info("--> move shards away from the node"); waitForBlock(blockedNode, "test-repo", TimeValue.timeValueSeconds(60));
ImmutableSettings.Builder excludeSettings = ImmutableSettings.builder().put("index.routing.allocation.exclude._name", blockedNode);
client().admin().indices().prepareUpdateSettings("test-idx").setSettings(excludeSettings).get(); logger.info("--> execution was blocked on node [{}], moving shards away from this node", blockedNode);
logger.info("--> execution was blocked on node [{}], moving shards away from this node", blockedNode); ImmutableSettings.Builder excludeSettings = ImmutableSettings.builder().put("index.routing.allocation.exclude._name", blockedNode);
unblock("test-repo"); client().admin().indices().prepareUpdateSettings("test-idx").setSettings(excludeSettings).get();
logger.info("--> waiting for completion");
SnapshotInfo snapshotInfo = waitForCompletion("test-repo", "test-snap", TimeValue.timeValueSeconds(60)); logger.info("--> unblocking blocked node");
logger.info("Number of failed shards [{}]", snapshotInfo.shardFailures().size()); unblockNode(blockedNode);
logger.info("--> done"); logger.info("--> waiting for completion");
} else { SnapshotInfo snapshotInfo = waitForCompletion("test-repo", "test-snap", TimeValue.timeValueSeconds(600));
logger.info("--> done without blocks"); logger.info("Number of failed shards [{}]", snapshotInfo.shardFailures().size());
} logger.info("--> done");
ImmutableList<SnapshotInfo> snapshotInfos = client().admin().cluster().prepareGetSnapshots("test-repo").setSnapshots("test-snap").get().getSnapshots(); ImmutableList<SnapshotInfo> snapshotInfos = client().admin().cluster().prepareGetSnapshots("test-repo").setSnapshots("test-snap").get().getSnapshots();
assertThat(snapshotInfos.size(), equalTo(1)); assertThat(snapshotInfos.size(), equalTo(1));
assertThat(snapshotInfos.get(0).state(), equalTo(SnapshotState.SUCCESS)); assertThat(snapshotInfos.get(0).state(), equalTo(SnapshotState.SUCCESS));
assertThat(snapshotInfos.get(0).shardFailures().size(), equalTo(0)); assertThat(snapshotInfos.get(0).shardFailures().size(), equalTo(0));

View File

@ -46,10 +46,6 @@ public class MockRepository extends FsRepository {
private final AtomicLong failureCounter = new AtomicLong(); private final AtomicLong failureCounter = new AtomicLong();
private volatile boolean blockable = true;
private volatile boolean blocked = false;
public void resetFailureCount() { public void resetFailureCount() {
failureCounter.set(0); failureCounter.set(0);
} }
@ -62,23 +58,25 @@ public class MockRepository extends FsRepository {
private final double randomDataFileIOExceptionRate; private final double randomDataFileIOExceptionRate;
private final double randomControlBlockingRate;
private final double randomDataFileBlockingRate;
private final long waitAfterUnblock; private final long waitAfterUnblock;
private final MockBlobStore mockBlobStore; private final MockBlobStore mockBlobStore;
private final String randomPrefix; private final String randomPrefix;
private volatile boolean blockOnControlFiles;
private volatile boolean blockOnDataFiles;
private volatile boolean blocked = false;
@Inject @Inject
public MockRepository(RepositoryName name, RepositorySettings repositorySettings, IndexShardRepository indexShardRepository) throws IOException { public MockRepository(RepositoryName name, RepositorySettings repositorySettings, IndexShardRepository indexShardRepository) throws IOException {
super(name, repositorySettings, indexShardRepository); super(name, repositorySettings, indexShardRepository);
randomControlIOExceptionRate = repositorySettings.settings().getAsDouble("random_control_io_exception_rate", 0.0); randomControlIOExceptionRate = repositorySettings.settings().getAsDouble("random_control_io_exception_rate", 0.0);
randomDataFileIOExceptionRate = repositorySettings.settings().getAsDouble("random_data_file_io_exception_rate", 0.0); randomDataFileIOExceptionRate = repositorySettings.settings().getAsDouble("random_data_file_io_exception_rate", 0.0);
randomControlBlockingRate = repositorySettings.settings().getAsDouble("random_control_blocking_rate", 0.0); blockOnControlFiles = repositorySettings.settings().getAsBoolean("block_on_control", false);
randomDataFileBlockingRate = repositorySettings.settings().getAsDouble("random_data_file_blocking_rate", 0.0); blockOnDataFiles = repositorySettings.settings().getAsBoolean("block_on_data", false);
randomPrefix = repositorySettings.settings().get("random"); randomPrefix = repositorySettings.settings().get("random");
waitAfterUnblock = repositorySettings.settings().getAsLong("wait_after_unblock", 0L); waitAfterUnblock = repositorySettings.settings().getAsLong("wait_after_unblock", 0L);
logger.info("starting mock repository with random prefix " + randomPrefix); logger.info("starting mock repository with random prefix " + randomPrefix);
@ -108,6 +106,14 @@ public class MockRepository extends FsRepository {
mockBlobStore.unblockExecution(); mockBlobStore.unblockExecution();
} }
public void blockOnDataFiles(boolean blocked) {
blockOnDataFiles = blocked;
}
public void blockOnControlFiles(boolean blocked) {
blockOnControlFiles = blocked;
}
public class MockBlobStore extends BlobStoreWrapper { public class MockBlobStore extends BlobStoreWrapper {
ConcurrentMap<String, AtomicLong> accessCounts = new ConcurrentHashMap<String, AtomicLong>(); ConcurrentMap<String, AtomicLong> accessCounts = new ConcurrentHashMap<String, AtomicLong>();
@ -133,8 +139,10 @@ public class MockRepository extends FsRepository {
public synchronized void unblockExecution() { public synchronized void unblockExecution() {
if (blocked) { if (blocked) {
blockable = false;
blocked = false; blocked = false;
// Clean blocking flags, so we wouldn't try to block again
blockOnDataFiles = false;
blockOnControlFiles = false;
this.notifyAll(); this.notifyAll();
} }
} }
@ -146,12 +154,13 @@ public class MockRepository extends FsRepository {
private synchronized boolean blockExecution() { private synchronized boolean blockExecution() {
boolean wasBlocked = false; boolean wasBlocked = false;
try { try {
while (blockable) { while (blockOnDataFiles || blockOnControlFiles) {
blocked = true; blocked = true;
this.wait(); this.wait();
wasBlocked = true; wasBlocked = true;
} }
} catch (InterruptedException ex) { } catch (InterruptedException ex) {
Thread.currentThread().interrupt();
} }
return wasBlocked; return wasBlocked;
} }
@ -190,7 +199,7 @@ public class MockRepository extends FsRepository {
logger.info("throwing random IOException for file [{}] at path [{}]", blobName, path()); logger.info("throwing random IOException for file [{}] at path [{}]", blobName, path());
addFailure(); addFailure();
throw new IOException("Random IOException"); throw new IOException("Random IOException");
} else if (shouldFail(blobName, randomDataFileBlockingRate)) { } else if (blockOnDataFiles) {
logger.info("blocking I/O operation for file [{}] at path [{}]", blobName, path()); logger.info("blocking I/O operation for file [{}] at path [{}]", blobName, path());
if (blockExecution() && waitAfterUnblock > 0) { if (blockExecution() && waitAfterUnblock > 0) {
try { try {
@ -207,7 +216,7 @@ public class MockRepository extends FsRepository {
logger.info("throwing random IOException for file [{}] at path [{}]", blobName, path()); logger.info("throwing random IOException for file [{}] at path [{}]", blobName, path());
addFailure(); addFailure();
throw new IOException("Random IOException"); throw new IOException("Random IOException");
} else if (shouldFail(blobName, randomControlBlockingRate)) { } else if (blockOnControlFiles) {
logger.info("blocking I/O operation for file [{}] at path [{}]", blobName, path()); logger.info("blocking I/O operation for file [{}] at path [{}]", blobName, path());
if (blockExecution() && waitAfterUnblock > 0) { if (blockExecution() && waitAfterUnblock > 0) {
try { try {