Internal: Mark store as corrupted instead of deleting state file on engine failure

Currently, we delete the shard _state file on engine failure.
This behaviour does not persist the engine failure reason for later inspection.

This commit marks the shard store as corrupted instead of deleting
the _state file to ensure the store index can not be opened after and
the engine failure is persisted.
This commit is contained in:
Areek Zillur 2015-06-18 17:28:31 -04:00
parent f67ae63d88
commit a8c2886b3f
2 changed files with 22 additions and 3 deletions

View File

@ -1272,10 +1272,10 @@ public class IndexShard extends AbstractIndexShardComponent {
@Override @Override
public void onFailedEngine(ShardId shardId, String reason, @Nullable Throwable failure) { public void onFailedEngine(ShardId shardId, String reason, @Nullable Throwable failure) {
try { try {
// delete the shard state so this folder will not be reused // mark as corrupted, so opening the store will fail
MetaDataStateFormat.deleteMetaState(nodeEnv.availableShardPaths(shardId)); store.markStoreCorrupted(new IOException("failed engine (reason: [" + reason + "])", failure));
} catch (IOException e) { } catch (IOException e) {
logger.warn("failed to delete shard state", e); logger.warn("failed to mark shard store as corrupted", e);
} finally { } finally {
for (Engine.FailedEngineListener listener : delegates) { for (Engine.FailedEngineListener listener : delegates) {
try { try {

View File

@ -33,6 +33,7 @@ import org.elasticsearch.env.ShardLock;
import org.elasticsearch.index.IndexService; import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.query.QueryParsingException; import org.elasticsearch.index.query.QueryParsingException;
import org.elasticsearch.index.store.Store;
import org.elasticsearch.index.translog.Translog; import org.elasticsearch.index.translog.Translog;
import org.elasticsearch.index.translog.TranslogConfig; import org.elasticsearch.index.translog.TranslogConfig;
import org.elasticsearch.indices.IndicesService; import org.elasticsearch.indices.IndicesService;
@ -206,6 +207,24 @@ public class IndexShardTests extends ElasticsearchSingleNodeTest {
} }
public void testFailShard() throws Exception {
createIndex("test");
ensureGreen();
IndicesService indicesService = getInstanceFromNode(IndicesService.class);
NodeEnvironment env = getInstanceFromNode(NodeEnvironment.class);
IndexService test = indicesService.indexService("test");
IndexShard shard = test.shard(0);
// fail shard
shard.failShard("test shard fail", new IOException("corrupted"));
// check state file still exists
ShardStateMetaData shardStateMetaData = load(logger, env.availableShardPaths(shard.shardId));
assertEquals(shardStateMetaData, getShardStateMetadata(shard));
ShardPath shardPath = ShardPath.loadShardPath(logger, env, shard.shardId(), test.getIndexSettings());
assertNotNull(shardPath);
// but index can't be opened for a failed shard
assertThat("store index should be corrupted", Store.canOpenIndex(logger, shardPath.resolveIndex()), equalTo(false));
}
ShardStateMetaData getShardStateMetadata(IndexShard shard) { ShardStateMetaData getShardStateMetadata(IndexShard shard) {
ShardRouting shardRouting = shard.routingEntry(); ShardRouting shardRouting = shard.routingEntry();
if (shardRouting == null) { if (shardRouting == null) {