From 2b61b205fc76b25091a0078cf4ca27cd2e530aff Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 16 Jun 2020 12:04:41 +0200 Subject: [PATCH] LUCENE-9396: Improve truncation detection for points. (#1557) --- lucene/CHANGES.txt | 2 + .../org/apache/lucene/codecs/CodecUtil.java | 20 ++++++++- .../codecs/lucene86/Lucene86PointsReader.java | 9 +++- .../codecs/lucene86/Lucene86PointsWriter.java | 4 +- .../apache/lucene/codecs/TestCodecUtil.java | 45 +++++++++++++++++++ 5 files changed, 76 insertions(+), 4 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index da609cec0fd..db589536f34 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -210,6 +210,8 @@ Improvements * LUCENE-9397: UniformSplit supports encodable fields metadata. (Bruno Roustant) +* LUCENE-9396: Improved truncation detection for points. (Adrien Grand, Robert Muir) + Optimizations --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java index 2e736c1fda5..8c40e2adcd6 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java @@ -490,7 +490,25 @@ public final class CodecUtil { validateFooter(in); return readCRC(in); } - + + /** + * Returns (but does not validate) the checksum previously written by {@link #checkFooter}. + * @return actual checksum value + * @throws IOException if the footer is invalid + */ + public static long retrieveChecksum(IndexInput in, long expectedLength) throws IOException { + if (expectedLength < footerLength()) { + throw new IllegalArgumentException("expectedLength cannot be less than the footer length"); + } + if (in.length() < expectedLength) { + throw new CorruptIndexException("truncated file: length=" + in.length() + " but expectedLength==" + expectedLength, in); + } else if (in.length() > expectedLength) { + throw new CorruptIndexException("file too long: length=" + in.length() + " but expectedLength==" + expectedLength, in); + } + + return retrieveChecksum(in); + } + private static void validateFooter(IndexInput in) throws IOException { long remaining = in.length() - in.getFilePointer(); long expected = footerLength(); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsReader.java index 9aabc97372a..fdc3cbd78b1 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsReader.java @@ -63,7 +63,6 @@ public class Lucene86PointsReader extends PointsReader implements Closeable { Lucene86PointsFormat.VERSION_CURRENT, readState.segmentInfo.getId(), readState.segmentSuffix); - CodecUtil.retrieveChecksum(indexIn); dataIn = readState.directory.openInput(dataFileName, readState.context); CodecUtil.checkIndexHeader(dataIn, @@ -72,8 +71,8 @@ public class Lucene86PointsReader extends PointsReader implements Closeable { Lucene86PointsFormat.VERSION_CURRENT, readState.segmentInfo.getId(), readState.segmentSuffix); - CodecUtil.retrieveChecksum(dataIn); + long indexLength = -1, dataLength = -1; try (ChecksumIndexInput metaIn = readState.directory.openChecksumInput(metaFileName, readState.context)) { Throwable priorE = null; try { @@ -94,12 +93,18 @@ public class Lucene86PointsReader extends PointsReader implements Closeable { BKDReader reader = new BKDReader(metaIn, indexIn, dataIn); readers.put(fieldNumber, reader); } + indexLength = metaIn.readLong(); + dataLength = metaIn.readLong(); } catch (Throwable t) { priorE = t; } finally { CodecUtil.checkFooter(metaIn, priorE); } } + // At this point, checksums of the meta file have been validated so we + // know that indexLength and dataLength are very likely correct. + CodecUtil.retrieveChecksum(indexIn, indexLength); + CodecUtil.retrieveChecksum(dataIn, dataLength); success = true; } finally { if (success == false) { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsWriter.java index 4fceeccd786..6fe35710c50 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsWriter.java @@ -251,9 +251,11 @@ public class Lucene86PointsWriter extends PointsWriter implements Closeable { } finished = true; metaOut.writeInt(-1); - CodecUtil.writeFooter(metaOut); CodecUtil.writeFooter(indexOut); CodecUtil.writeFooter(dataOut); + metaOut.writeLong(indexOut.getFilePointer()); + metaOut.writeLong(dataOut.getFilePointer()); + CodecUtil.writeFooter(metaOut); } @Override diff --git a/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java b/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java index 94c82aa34f3..ea0972d5a2a 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java @@ -26,6 +26,8 @@ import org.apache.lucene.store.ByteBuffersDataOutput; import org.apache.lucene.store.ByteBuffersIndexInput; import org.apache.lucene.store.ByteBuffersIndexOutput; import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.LuceneTestCase; @@ -319,4 +321,47 @@ public class TestCodecUtil extends LuceneTestCase { () -> CodecUtil.retrieveChecksum(input)); assertTrue(e.getMessage(), e.getMessage().contains("misplaced codec footer (file truncated?): length=0 but footerLength==16 (resource")); } + + public void testRetrieveChecksum() throws IOException { + Directory dir = newDirectory(); + try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) { + out.writeByte((byte) 42); + CodecUtil.writeFooter(out); + } + try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) { + CodecUtil.retrieveChecksum(in, in.length()); // no exception + + CorruptIndexException exception = expectThrows(CorruptIndexException.class, + () -> CodecUtil.retrieveChecksum(in, in.length() - 1)); + assertTrue(exception.getMessage().contains("too long")); + assertArrayEquals(new Throwable[0], exception.getSuppressed()); + + exception = expectThrows(CorruptIndexException.class, + () -> CodecUtil.retrieveChecksum(in, in.length() + 1)); + assertTrue(exception.getMessage().contains("truncated")); + assertArrayEquals(new Throwable[0], exception.getSuppressed()); + } + + try (IndexOutput out = dir.createOutput("bar", IOContext.DEFAULT)) { + for (int i = 0; i <= CodecUtil.footerLength(); ++i) { + out.writeByte((byte) i); + } + } + try (IndexInput in = dir.openInput("bar", IOContext.DEFAULT)) { + CorruptIndexException exception = expectThrows(CorruptIndexException.class, + () -> CodecUtil.retrieveChecksum(in, in.length())); + assertTrue(exception.getMessage().contains("codec footer mismatch")); + assertArrayEquals(new Throwable[0], exception.getSuppressed()); + + exception = expectThrows(CorruptIndexException.class, + () -> CodecUtil.retrieveChecksum(in, in.length() - 1)); + assertTrue(exception.getMessage().contains("too long")); + + exception = expectThrows(CorruptIndexException.class, + () -> CodecUtil.retrieveChecksum(in, in.length() + 1)); + assertTrue(exception.getMessage().contains("truncated")); + } + + dir.close(); + } }