From a8b063d61b1646d595a8f4ea5a54ea7867292779 Mon Sep 17 00:00:00 2001 From: Mark Payne Date: Sat, 29 Aug 2015 13:16:16 -0400 Subject: [PATCH] NIFI-902: Ensure that we close the underlying file stream when we roll over a partition instead of the bufferedoutputstream, which could cause corruption of there was a failure to flush the entire buffer previously. --- .../org/wali/MinimalLockingWriteAheadLog.java | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/nifi-commons/nifi-write-ahead-log/src/main/java/org/wali/MinimalLockingWriteAheadLog.java b/nifi-commons/nifi-write-ahead-log/src/main/java/org/wali/MinimalLockingWriteAheadLog.java index f354f69be4..501c3304d7 100644 --- a/nifi-commons/nifi-write-ahead-log/src/main/java/org/wali/MinimalLockingWriteAheadLog.java +++ b/nifi-commons/nifi-write-ahead-log/src/main/java/org/wali/MinimalLockingWriteAheadLog.java @@ -26,6 +26,7 @@ import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.OutputStream; import java.nio.channels.FileChannel; import java.nio.file.Files; import java.nio.file.Path; @@ -720,7 +721,33 @@ public final class MinimalLockingWriteAheadLog implements WriteAheadRepositor public void rollover() throws IOException { lock.lock(); try { - final DataOutputStream out = dataOut; + // Note that here we are closing fileOut and NOT dataOut. + // This is very much intentional, not an oversight. This is done because of + // the way that the OutputStreams are structured. dataOut wraps a BufferedOutputStream, + // which then wraps the FileOutputStream. If we close 'dataOut', then this will call + // the flush() method of BufferedOutputStream. Under normal conditions, this is fine. + // However, there is a very important corner case to consider: + // + // If we are writing to the DataOutputStream in the update() method and that + // call to write() then results in the BufferedOutputStream calling flushBuffer() - + // or if we finish the call to update() and call flush() ourselves - it is possible + // that the internal buffer of the BufferedOutputStream can get partially written to + // to the FileOutputStream and then an IOException occurs. If this occurs, we have + // written a partial record to disk. This still is okay, as we have logic to handle + // the condition where we have a partial record and then an unexpected End-of-File. + // But if we then call close() on 'dataOut', this will call the flush() method of the + // underlying BufferedOutputStream. As a result, we will end up again writing the internal + // buffer of the BufferedOutputStream to the underlying file. At this point, we are left + // not with an unexpected/premature End-of-File but instead a bunch of seemingly random + // bytes that happened to be residing in that internal buffer, and this will result in + // a corrupt and unrecoverable Write-Ahead Log. + // + // Additionally, we are okay not ever calling close on the wrapping BufferedOutputStream and + // DataOutputStream because they don't actually hold any resources that need to be reclaimed, + // and after each update to the Write-Ahead Log, we call flush() ourselves to ensure that we don't + // leave arbitrary data in the BufferedOutputStream that hasn't been flushed to the underlying + // FileOutputStream. + final OutputStream out = fileOut; if (out != null) { try { out.close();