[ENGINE] Mark store as corrupted before sending failed shard

We have to mark a shard as corrupted if necessary before the
shard failed event is fired ie. before we call the corresponding
listener in the engine. Otherwise the shard might be re-allocated
on the same node and just started up without being marked as corrupted.

Relates to #5924
This commit is contained in:
Simon Willnauer 2014-07-14 09:56:50 +02:00
parent e8ff007852
commit 86bc79202d
2 changed files with 24 additions and 20 deletions

View File

@ -24,7 +24,6 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.*; import org.apache.lucene.index.*;
import org.apache.lucene.search.*; import org.apache.lucene.search.*;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
@ -396,6 +395,10 @@ public class Lucene {
return DirectoryReader.indexExists(directory); return DirectoryReader.indexExists(directory);
} }
/**
* Returns <tt>true</tt> iff the given exception or
* one of it's causes is an instance of {@link CorruptIndexException} otherwise <tt>false</tt>.
*/
public static boolean isCorruptionException(Throwable t) { public static boolean isCorruptionException(Throwable t) {
return ExceptionsHelper.unwrap(t, CorruptIndexException.class) != null; return ExceptionsHelper.unwrap(t, CorruptIndexException.class) != null;
} }

View File

@ -1275,34 +1275,35 @@ public class InternalEngine extends AbstractIndexShardComponent implements Engin
public void failEngine(String reason, Throwable failure) { public void failEngine(String reason, Throwable failure) {
assert failure != null; assert failure != null;
if (failEngineLock.tryLock()) { if (failEngineLock.tryLock()) {
assert !readLock.assertLockIsHeld() : "readLock is held by a thread that tries to fail the engine";
if (failedEngine != null) {
logger.debug("tried to fail engine but engine is already failed. ignoring. [{}]", reason, failure);
return;
}
try { try {
logger.warn("failed engine [{}]", reason, failure); // we first mark the store as corrupted before we notify any listeners
// we must set a failure exception, generate one if not supplied // this must happen first otherwise we might try to reallocate so quickly
failedEngine = failure; // on the same node that we don't see the corrupted marker file when
for (FailedEngineListener listener : failedEngineListeners) { // the shard is initializing
listener.onFailedEngine(shardId, reason, failure); if (Lucene.isCorruptionException(failure)) {
try {
store.markStoreCorrupted(ExceptionsHelper.unwrap(failure, CorruptIndexException.class));
} catch (IOException e) {
logger.warn("Couldn't marks store corrupted", e);
}
} }
} finally { } finally {
assert !readLock.assertLockIsHeld() : "readLock is held by a thread that tries to fail the engine";
if (failedEngine != null) {
logger.debug("tried to fail engine but engine is already failed. ignoring. [{}]", reason, failure);
return;
}
try { try {
if (Lucene.isCorruptionException(failure)) { logger.warn("failed engine [{}]", reason, failure);
try { // we must set a failure exception, generate one if not supplied
store.markStoreCorrupted(ExceptionsHelper.unwrap(failure, CorruptIndexException.class)); failedEngine = failure;
} catch (IOException e) { for (FailedEngineListener listener : failedEngineListeners) {
logger.warn("Couldn't marks store corrupted", e); listener.onFailedEngine(shardId, reason, failure);
}
} }
} finally { } finally {
// close the engine whatever happens...
close(); close();
} }
} }
} else { } else {
logger.debug("tried to fail engine but could not acquire lock - engine should be failed by now [{}]", reason, failure); logger.debug("tried to fail engine but could not acquire lock - engine should be failed by now [{}]", reason, failure);
} }