Tests: always run Lucene's CheckIndex when shards are closed in tests and fail the test if corruption is detected

Today we only run 10% of the time, and the test doesn't fail when
corruption is detected.

I think it's better to always run and fail the test, so we can catch
any possible resiliency bugs in Lucene/Elasticsearch causing corruption.

For known tests that create corrupted indices, it's easy to set
MockFSDirectoryService.CHECK_INDEX_ON_CLOSE to false...

Closes #7730
This commit is contained in:
Michael McCandless 2014-09-25 16:50:48 -04:00 committed by mikemccand
parent 091578d117
commit 637c6d1606
10 changed files with 37 additions and 26 deletions

View File

@ -453,7 +453,7 @@ public class InternalIndexService extends AbstractIndexComponent implements Inde
}
// call this before we close the store, so we can release resources for it
indicesLifecycle.afterIndexShardClosed(sId);
indicesLifecycle.afterIndexShardClosed(sId, indexShard);
// if we delete or have no gateway or the store is not persistent, clean the store...
Store store = shardInjector.getInstance(Store.class);
// and close it

View File

@ -130,7 +130,7 @@ public interface IndicesLifecycle {
*
* @param shardId The shard id
*/
public void afterIndexShardClosed(ShardId shardId) {
public void afterIndexShardClosed(ShardId shardId, @Nullable IndexShard indexShard) {
}

View File

@ -152,10 +152,10 @@ public class InternalIndicesLifecycle extends AbstractComponent implements Indic
}
}
public void afterIndexShardClosed(ShardId shardId) {
public void afterIndexShardClosed(ShardId shardId, @Nullable IndexShard indexShard) {
for (Listener listener : listeners) {
try {
listener.afterIndexShardClosed(shardId);
listener.afterIndexShardClosed(shardId, indexShard);
} catch (Throwable t) {
logger.warn("{} failed to invoke after shard closed callback", t, shardId);
}

View File

@ -371,12 +371,16 @@ public abstract class AbstractRandomizedTest extends RandomizedTest {
// Suite and test case setup/ cleanup.
// -----------------------------------------------------------------
/** MockFSDirectoryService sets this: */
public static boolean checkIndexFailed;
/**
* For subclasses to override. Overrides must call {@code super.setUp()}.
*/
@Before
public void setUp() throws Exception {
parentChainCallRule.setupCalled = true;
checkIndexFailed = false;
}
/**
@ -385,6 +389,7 @@ public abstract class AbstractRandomizedTest extends RandomizedTest {
@After
public void tearDown() throws Exception {
parentChainCallRule.teardownCalled = true;
assertFalse("at least one shard failed CheckIndex", checkIndexFailed);
}

View File

@ -19,9 +19,6 @@
package org.elasticsearch.cluster.ack;
import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableList;
import org.elasticsearch.action.admin.cluster.reroute.ClusterRerouteResponse;
import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesResponse;
@ -49,7 +46,11 @@ import org.elasticsearch.discovery.DiscoverySettings;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.warmer.IndexWarmersMetaData;
import org.elasticsearch.test.ElasticsearchIntegrationTest;
import org.elasticsearch.test.store.MockFSDirectoryService;
import org.junit.Test;
import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableList;
import static org.elasticsearch.cluster.metadata.IndexMetaData.*;
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
@ -395,7 +396,11 @@ public class AckTests extends ElasticsearchIntegrationTest {
@Test
public void testOpenIndexNoAcknowledgement() {
createIndex("test");
// TODO: this test fails CheckIndex test for some reason ... seems like the index is being deleted while we run CheckIndex??
assertAcked(client().admin().indices().prepareCreate("test").setSettings(
ImmutableSettings.settingsBuilder()
// Never run CheckIndex in the end:
.put(MockFSDirectoryService.CHECK_INDEX_ON_CLOSE, false).build()));
ensureGreen();
CloseIndexResponse closeIndexResponse = client().admin().indices().prepareClose("test").execute().actionGet();

View File

@ -317,6 +317,8 @@ public class CorruptedFileTest extends ElasticsearchIntegrationTest {
assertAcked(prepareCreate("test").setSettings(ImmutableSettings.builder()
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "0")
.put(InternalEngine.INDEX_FAIL_ON_CORRUPTION, true)
// This does corrupt files on the replica, so we can't check:
.put(MockFSDirectoryService.CHECK_INDEX_ON_CLOSE, false)
.put("index.routing.allocation.include._name", primariesNode.getNode().name())
.put("indices.recovery.concurrent_streams", 10)
));

View File

@ -38,10 +38,7 @@ public class FlushTest extends ElasticsearchIntegrationTest {
@Test
public void testWaitIfOngoing() throws InterruptedException {
assertAcked(client().admin().indices().prepareCreate("test").setSettings(
ImmutableSettings.settingsBuilder()
// Always run CheckIndex in the end:
.put(MockFSDirectoryService.CHECK_INDEX_ON_CLOSE, true).build()));
createIndex("test");
ensureGreen("test");
final int numIters = scaledRandomIntBetween(10, 30);
for (int i = 0; i < numIters; i++) {

View File

@ -92,7 +92,6 @@ public class SearchWithRandomExceptionsTests extends ElasticsearchIntegrationTes
if (createIndexWithoutErrors) {
Builder settings = settingsBuilder()
.put("index.number_of_replicas", randomIntBetween(0, 1))
.put(MockFSDirectoryService.CHECK_INDEX_ON_CLOSE, true)
.put("gateway.type", "local");
logger.info("creating index: [test] using settings: [{}]", settings.build().getAsMap());
client().admin().indices().prepareCreate("test")
@ -107,7 +106,6 @@ public class SearchWithRandomExceptionsTests extends ElasticsearchIntegrationTes
client().admin().indices().prepareFlush("test").setWaitIfOngoing(true).execute().get();
client().admin().indices().prepareClose("test").execute().get();
client().admin().indices().prepareUpdateSettings("test").setSettings(settingsBuilder()
.put(MockFSDirectoryService.CHECK_INDEX_ON_CLOSE, true)
.put(MockDirectoryHelper.RANDOM_IO_EXCEPTION_RATE, exceptionRate)
.put(MockDirectoryHelper.RANDOM_IO_EXCEPTION_RATE_ON_OPEN, exceptionOnOpenRate));
client().admin().indices().prepareOpen("test").execute().get();

View File

@ -21,9 +21,11 @@ package org.elasticsearch.test.store;
import com.google.common.base.Charsets;
import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockFactory;
import org.apache.lucene.store.StoreRateLimiting;
import org.apache.lucene.util.AbstractRandomizedTest;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
@ -60,15 +62,15 @@ public class MockFSDirectoryService extends FsDirectoryService {
final long seed = indexSettings.getAsLong(ElasticsearchIntegrationTest.SETTING_INDEX_SEED, 0l);
Random random = new Random(seed);
helper = new MockDirectoryHelper(shardId, indexSettings, logger, random, seed);
checkIndexOnClose = indexSettings.getAsBoolean(CHECK_INDEX_ON_CLOSE, random.nextDouble() < 0.1);
checkIndexOnClose = indexSettings.getAsBoolean(CHECK_INDEX_ON_CLOSE, true);
delegateService = helper.randomDirectorService(indexStore);
if (checkIndexOnClose) {
final IndicesLifecycle.Listener listener = new IndicesLifecycle.Listener() {
@Override
public void beforeIndexShardClosed(ShardId sid, @Nullable IndexShard indexShard) {
public void afterIndexShardClosed(ShardId sid, @Nullable IndexShard indexShard) {
if (shardId.equals(sid) && indexShard != null) {
checkIndex(((InternalIndexShard) indexShard).store());
checkIndex(((InternalIndexShard) indexShard).store(), sid);
}
service.indicesLifecycle().removeListener(this);
}
@ -87,19 +89,24 @@ public class MockFSDirectoryService extends FsDirectoryService {
throw new UnsupportedOperationException();
}
public void checkIndex(Store store) throws IndexShardException {
public void checkIndex(Store store, ShardId shardId) throws IndexShardException {
try {
if (!Lucene.indexExists(store.directory())) {
Directory dir = store.directory();
if (!Lucene.indexExists(dir)) {
return;
}
CheckIndex checkIndex = new CheckIndex(store.directory());
if (IndexWriter.isLocked(dir)) {
AbstractRandomizedTest.checkIndexFailed = true;
throw new IllegalStateException("IndexWriter is still open on shard " + shardId);
}
CheckIndex checkIndex = new CheckIndex(dir);
BytesStreamOutput os = new BytesStreamOutput();
PrintStream out = new PrintStream(os, false, Charsets.UTF_8.name());
checkIndex.setInfoStream(out);
out.flush();
CheckIndex.Status status = checkIndex.checkIndex();
if (!status.clean) {
AbstractRandomizedTest.checkIndexFailed = true;
logger.warn("check index [failure]\n{}", new String(os.bytes().toBytes(), Charsets.UTF_8));
throw new IndexShardException(shardId, "index check failure");
} else {

View File

@ -526,10 +526,7 @@ public class SimpleVersioningTests extends ElasticsearchIntegrationTest {
@Test
@Slow
public void testRandomIDsAndVersions() throws Exception {
assertAcked(client().admin().indices().prepareCreate("test").setSettings(
ImmutableSettings.settingsBuilder()
// Always run CheckIndex in the end:
.put(MockFSDirectoryService.CHECK_INDEX_ON_CLOSE, true).build()));
createIndex("test");
ensureGreen();
// TODO: sometimes use _bulk API