Expose tragic event to translog, close translog once we hit a tragic even and fail engine if we hit one too

This commit is contained in:
Simon Willnauer 2015-12-14 15:01:55 +01:00
parent c6003b6f13
commit 6cefdc82f6
4 changed files with 59 additions and 26 deletions

View File

@ -782,9 +782,13 @@ public class InternalEngine extends Engine {
// but we are double-checking it's failed and closed // but we are double-checking it's failed and closed
if (indexWriter.isOpen() == false && indexWriter.getTragicException() != null) { if (indexWriter.isOpen() == false && indexWriter.getTragicException() != null) {
failEngine("already closed by tragic event", indexWriter.getTragicException()); failEngine("already closed by tragic event", indexWriter.getTragicException());
} else if (translog.isOpen() == false && translog.getTragicException() != null) {
failEngine("already closed by tragic event", translog.getTragicException());
} }
return true; return true;
} else if (t != null && indexWriter.isOpen() == false && indexWriter.getTragicException() == t) { } else if (t != null &&
((indexWriter.isOpen() == false && indexWriter.getTragicException() == t)
|| (translog.isOpen() == false && translog.getTragicException() == t))) {
// this spot on - we are handling the tragic event exception here so we have to fail the engine // this spot on - we are handling the tragic event exception here so we have to fail the engine
// right away // right away
failEngine(source, t); failEngine(source, t);

View File

@ -279,7 +279,8 @@ public class Translog extends AbstractIndexShardComponent implements IndexShardC
} }
} }
boolean isOpen() { /** Returns {@code true} if this {@code Translog} is still open. */
public boolean isOpen() {
return closed.get() == false; return closed.get() == false;
} }
@ -397,7 +398,7 @@ public class Translog extends AbstractIndexShardComponent implements IndexShardC
* @see Index * @see Index
* @see org.elasticsearch.index.translog.Translog.Delete * @see org.elasticsearch.index.translog.Translog.Delete
*/ */
public Location add(Operation operation) throws TranslogException { public Location add(Operation operation) throws IOException {
final ReleasableBytesStreamOutput out = new ReleasableBytesStreamOutput(bigArrays); final ReleasableBytesStreamOutput out = new ReleasableBytesStreamOutput(bigArrays);
try { try {
final BufferedChecksumStreamOutput checksumStreamOutput = new BufferedChecksumStreamOutput(out); final BufferedChecksumStreamOutput checksumStreamOutput = new BufferedChecksumStreamOutput(out);
@ -419,7 +420,14 @@ public class Translog extends AbstractIndexShardComponent implements IndexShardC
assert current.assertBytesAtLocation(location, bytes); assert current.assertBytesAtLocation(location, bytes);
return location; return location;
} }
} catch (AlreadyClosedException ex) { } catch (AlreadyClosedException | IOException ex) {
if (current.getTragicException() != null) {
try {
close();
} catch (Exception inner) {
ex.addSuppressed(inner);
}
}
throw ex; throw ex;
} catch (Throwable e) { } catch (Throwable e) {
throw new TranslogException(shardId, "Failed to write operation [" + operation + "]", e); throw new TranslogException(shardId, "Failed to write operation [" + operation + "]", e);
@ -433,6 +441,7 @@ public class Translog extends AbstractIndexShardComponent implements IndexShardC
* Snapshots are fixed in time and will not be updated with future operations. * Snapshots are fixed in time and will not be updated with future operations.
*/ */
public Snapshot newSnapshot() { public Snapshot newSnapshot() {
ensureOpen();
try (ReleasableLock lock = readLock.acquire()) { try (ReleasableLock lock = readLock.acquire()) {
ArrayList<TranslogReader> toOpen = new ArrayList<>(); ArrayList<TranslogReader> toOpen = new ArrayList<>();
toOpen.addAll(recoveredTranslogs); toOpen.addAll(recoveredTranslogs);
@ -497,6 +506,15 @@ public class Translog extends AbstractIndexShardComponent implements IndexShardC
if (closed.get() == false) { if (closed.get() == false) {
current.sync(); current.sync();
} }
} catch (AlreadyClosedException | IOException ex) {
if (current.getTragicException() != null) {
try {
close();
} catch (Exception inner) {
ex.addSuppressed(inner);
}
}
throw ex;
} }
} }
@ -1296,6 +1314,7 @@ public class Translog extends AbstractIndexShardComponent implements IndexShardC
throw new IllegalStateException("already committing a translog with generation: " + currentCommittingTranslog.getGeneration()); throw new IllegalStateException("already committing a translog with generation: " + currentCommittingTranslog.getGeneration());
} }
final TranslogWriter oldCurrent = current; final TranslogWriter oldCurrent = current;
oldCurrent.ensureOpen();
oldCurrent.sync(); oldCurrent.sync();
currentCommittingTranslog = current.immutableReader(); currentCommittingTranslog = current.immutableReader();
Path checkpoint = location.resolve(CHECKPOINT_FILE_NAME); Path checkpoint = location.resolve(CHECKPOINT_FILE_NAME);
@ -1391,7 +1410,7 @@ public class Translog extends AbstractIndexShardComponent implements IndexShardC
private void ensureOpen() { private void ensureOpen() {
if (closed.get()) { if (closed.get()) {
throw new AlreadyClosedException("translog is already closed"); throw new AlreadyClosedException("translog is already closed", current.getTragicException());
} }
} }
@ -1406,4 +1425,11 @@ public class Translog extends AbstractIndexShardComponent implements IndexShardC
return TranslogWriter.ChannelFactory.DEFAULT; return TranslogWriter.ChannelFactory.DEFAULT;
} }
/** If this {@code Translog} was closed as a side-effect of a tragic exception,
* e.g. disk full while flushing a new segment, this returns the root cause exception.
* Otherwise (no tragic exception has occurred) it returns null. */
public Throwable getTragicException() {
return current.getTragicException();
}
} }

View File

@ -56,7 +56,7 @@ public class TranslogWriter extends TranslogReader {
/* the offset in bytes written to the file */ /* the offset in bytes written to the file */
protected volatile long writtenOffset; protected volatile long writtenOffset;
/* if we hit an exception that we can't recover from we assign it to this var and ship it with every AlreadyClosedException we throw */ /* if we hit an exception that we can't recover from we assign it to this var and ship it with every AlreadyClosedException we throw */
private volatile Throwable tragicEvent; private volatile Throwable tragedy;
public TranslogWriter(ShardId shardId, long generation, ChannelReference channelReference) throws IOException { public TranslogWriter(ShardId shardId, long generation, ChannelReference channelReference) throws IOException {
@ -94,6 +94,12 @@ public class TranslogWriter extends TranslogReader {
throw throwable; throw throwable;
} }
} }
/** If this {@code TranslogWriter} was closed as a side-effect of a tragic exception,
* e.g. disk full while flushing a new segment, this returns the root cause exception.
* Otherwise (no tragic exception has occurred) it returns null. */
public Throwable getTragicException() {
return tragedy;
}
public enum Type { public enum Type {
@ -125,10 +131,10 @@ public class TranslogWriter extends TranslogReader {
protected final void closeWithTragicEvent(Throwable throwable) throws IOException { protected final void closeWithTragicEvent(Throwable throwable) throws IOException {
try (ReleasableLock lock = writeLock.acquire()) { try (ReleasableLock lock = writeLock.acquire()) {
if (throwable != null) { if (throwable != null) {
if (tragicEvent == null) { if (tragedy == null) {
tragicEvent = throwable; tragedy = throwable;
} else { } else {
tragicEvent.addSuppressed(throwable); tragedy.addSuppressed(throwable);
} }
} }
close(); close();
@ -316,7 +322,7 @@ public class TranslogWriter extends TranslogReader {
protected final void ensureOpen() { protected final void ensureOpen() {
if (isClosed()) { if (isClosed()) {
throw new AlreadyClosedException("translog [" + getGeneration() + "] is already closed", tragicEvent); throw new AlreadyClosedException("translog [" + getGeneration() + "] is already closed", tragedy);
} }
} }
} }

View File

@ -124,7 +124,7 @@ public class TranslogTests extends ESTestCase {
return new TranslogConfig(shardId, path, IndexSettingsModule.newIndexSettings(shardId.index(), build), Translog.Durabilty.REQUEST, BigArrays.NON_RECYCLING_INSTANCE, null); return new TranslogConfig(shardId, path, IndexSettingsModule.newIndexSettings(shardId.index(), build), Translog.Durabilty.REQUEST, BigArrays.NON_RECYCLING_INSTANCE, null);
} }
protected void addToTranslogAndList(Translog translog, ArrayList<Translog.Operation> list, Translog.Operation op) { protected void addToTranslogAndList(Translog translog, ArrayList<Translog.Operation> list, Translog.Operation op) throws IOException {
list.add(op); list.add(op);
translog.add(op); translog.add(op);
} }
@ -335,7 +335,7 @@ public class TranslogTests extends ESTestCase {
} }
} }
public void testSnapshot() { public void testSnapshot() throws IOException {
ArrayList<Translog.Operation> ops = new ArrayList<>(); ArrayList<Translog.Operation> ops = new ArrayList<>();
Translog.Snapshot snapshot = translog.newSnapshot(); Translog.Snapshot snapshot = translog.newSnapshot();
assertThat(snapshot, SnapshotMatchers.size(0)); assertThat(snapshot, SnapshotMatchers.size(0));
@ -394,7 +394,7 @@ public class TranslogTests extends ESTestCase {
Translog.Snapshot snapshot = translog.newSnapshot(); Translog.Snapshot snapshot = translog.newSnapshot();
fail("translog is closed"); fail("translog is closed");
} catch (AlreadyClosedException ex) { } catch (AlreadyClosedException ex) {
assertThat(ex.getMessage(), containsString("translog-1.tlog is already closed can't increment")); assertEquals(ex.getMessage(), "translog is already closed");
} }
} }
@ -639,7 +639,7 @@ public class TranslogTests extends ESTestCase {
final String threadId = "writer_" + i; final String threadId = "writer_" + i;
writers[i] = new Thread(new AbstractRunnable() { writers[i] = new Thread(new AbstractRunnable() {
@Override @Override
public void doRun() throws BrokenBarrierException, InterruptedException { public void doRun() throws BrokenBarrierException, InterruptedException, IOException {
barrier.await(); barrier.await();
int counter = 0; int counter = 0;
while (run.get()) { while (run.get()) {
@ -1287,7 +1287,6 @@ public class TranslogTests extends ESTestCase {
public void testFailFlush() throws IOException { public void testFailFlush() throws IOException {
Path tempDir = createTempDir(); Path tempDir = createTempDir();
final AtomicBoolean failWrite = new AtomicBoolean();
final AtomicBoolean simulateDiskFull = new AtomicBoolean(); final AtomicBoolean simulateDiskFull = new AtomicBoolean();
TranslogConfig config = getTranslogConfig(tempDir); TranslogConfig config = getTranslogConfig(tempDir);
Translog translog = new Translog(config) { Translog translog = new Translog(config) {
@ -1303,9 +1302,6 @@ public class TranslogTests extends ESTestCase {
@Override @Override
public int write(ByteBuffer src) throws IOException { public int write(ByteBuffer src) throws IOException {
if (failWrite.get()) {
throw new IOException("boom");
}
if (simulateDiskFull.get()) { if (simulateDiskFull.get()) {
if (src.limit() > 1) { if (src.limit() > 1) {
final int pos = src.position(); final int pos = src.position();
@ -1337,11 +1333,8 @@ public class TranslogTests extends ESTestCase {
opsSynced++; opsSynced++;
} catch (IOException ex) { } catch (IOException ex) {
failed = true; failed = true;
assertFalse(translog.isOpen());
assertEquals("no space left on device", ex.getMessage()); assertEquals("no space left on device", ex.getMessage());
} catch (TranslogException ex) {
// we catch IOExceptions in Translog#add -- that's how we got here
failed = true;
assertTrue(ex.toString(), ex.getMessage().startsWith("Failed to write operation"));
} }
simulateDiskFull.set(randomBoolean()); simulateDiskFull.set(randomBoolean());
} }
@ -1362,16 +1355,20 @@ public class TranslogTests extends ESTestCase {
fail("already closed"); fail("already closed");
} catch (AlreadyClosedException ex) { } catch (AlreadyClosedException ex) {
// all is well // all is well
assertNotNull(ex.getCause());
assertSame(translog.getTragicException(), ex.getCause());
} }
try { try {
translog.close(); translog.commit();
if (opsAdded != opsSynced) { fail("already closed");
fail("already closed");
}
} catch (AlreadyClosedException ex) { } catch (AlreadyClosedException ex) {
assertNotNull(ex.getCause()); assertNotNull(ex.getCause());
assertSame(translog.getTragicException(), ex.getCause());
} }
assertFalse(translog.isOpen());
translog.close(); // we are closed
config.setTranslogGeneration(translogGeneration); config.setTranslogGeneration(translogGeneration);
try (Translog tlog = new Translog(config)){ try (Translog tlog = new Translog(config)){
assertEquals("lastCommitted must be 1 less than current", translogGeneration.translogFileGeneration + 1, tlog.currentFileGeneration()); assertEquals("lastCommitted must be 1 less than current", translogGeneration.translogFileGeneration + 1, tlog.currentFileGeneration());