From cb3295b21268d1d5fba5a356d66df4a5ca7dcbd5 Mon Sep 17 00:00:00 2001 From: Jason Tedor Date: Fri, 6 Apr 2018 10:33:21 -0400 Subject: [PATCH] Close translog writer if exception on write channel (#29401) Today we close the translog write tragically if we experience any I/O exception on a write. These tragic closes lead to use closing the translog and failing the engine. Yet, there is one case that is missed which is when we touch the write channel during a read (checking if reading from the writer would put us past what has been flushed). This commit addresses this by closing the writer tragically if we encounter an I/O exception on the write channel while reading. This becomes interesting when we consider that this method is invoked from the engine through the translog as part of getting a document from the translog. This means we have to consider closing the translog here as well which will cascade up into us finally failing the engine. Note that there is no semantic change to, for example, primary/replica resync and recovery. These actions will take a snapshot of the translog which syncs the translog to disk. If an I/O exception occurs during the sync we already close the writer tragically and once we have synced we do not ever read past the position that was synced while taking the snapshot. --- .../index/translog/Translog.java | 11 +++++++- .../index/translog/TranslogWriter.java | 25 +++++++++++++------ 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/translog/Translog.java b/server/src/main/java/org/elasticsearch/index/translog/Translog.java index b6b6f656be4..ec1ae17d353 100644 --- a/server/src/main/java/org/elasticsearch/index/translog/Translog.java +++ b/server/src/main/java/org/elasticsearch/index/translog/Translog.java @@ -584,7 +584,16 @@ public class Translog extends AbstractIndexShardComponent implements IndexShardC if (current.generation == location.generation) { // no need to fsync here the read operation will ensure that buffers are written to disk // if they are still in RAM and we are reading onto that position - return current.read(location); + try { + return current.read(location); + } catch (final IOException e) { + try { + closeOnTragicEvent(e); + } catch (final Exception inner) { + e.addSuppressed(inner); + } + throw e; + } } else { // read backwards - it's likely we need to read on that is recent for (int i = readers.size() - 1; i >= 0; i--) { diff --git a/server/src/main/java/org/elasticsearch/index/translog/TranslogWriter.java b/server/src/main/java/org/elasticsearch/index/translog/TranslogWriter.java index 4aeb30d20c3..4846fdb4e46 100644 --- a/server/src/main/java/org/elasticsearch/index/translog/TranslogWriter.java +++ b/server/src/main/java/org/elasticsearch/index/translog/TranslogWriter.java @@ -380,16 +380,25 @@ public class TranslogWriter extends BaseTranslogReader implements Closeable { @Override protected void readBytes(ByteBuffer targetBuffer, long position) throws IOException { - if (position + targetBuffer.remaining() > getWrittenOffset()) { - synchronized (this) { - // we only flush here if it's really really needed - try to minimize the impact of the read operation - // in some cases ie. a tragic event we might still be able to read the relevant value - // which is not really important in production but some test can make most strict assumptions - // if we don't fail in this call unless absolutely necessary. - if (position + targetBuffer.remaining() > getWrittenOffset()) { - outputStream.flush(); + try { + if (position + targetBuffer.remaining() > getWrittenOffset()) { + synchronized (this) { + // we only flush here if it's really really needed - try to minimize the impact of the read operation + // in some cases ie. a tragic event we might still be able to read the relevant value + // which is not really important in production but some test can make most strict assumptions + // if we don't fail in this call unless absolutely necessary. + if (position + targetBuffer.remaining() > getWrittenOffset()) { + outputStream.flush(); + } } } + } catch (final IOException e) { + try { + closeWithTragicEvent(e); + } catch (final IOException inner) { + e.addSuppressed(inner); + } + throw e; } // we don't have to have a lock here because we only write ahead to the file, so all writes has been complete // for the requested location.