LUCENE-9396: Improve truncation detection for points. (#1557)

This commit is contained in:
Adrien Grand 2020-06-16 12:04:41 +02:00 committed by GitHub
parent 75491ab381
commit 2b61b205fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 76 additions and 4 deletions

View File

@ -210,6 +210,8 @@ Improvements
* LUCENE-9397: UniformSplit supports encodable fields metadata. (Bruno Roustant)
* LUCENE-9396: Improved truncation detection for points. (Adrien Grand, Robert Muir)
Optimizations
---------------------

View File

@ -490,7 +490,25 @@ public final class CodecUtil {
validateFooter(in);
return readCRC(in);
}
/**
* Returns (but does not validate) the checksum previously written by {@link #checkFooter}.
* @return actual checksum value
* @throws IOException if the footer is invalid
*/
public static long retrieveChecksum(IndexInput in, long expectedLength) throws IOException {
if (expectedLength < footerLength()) {
throw new IllegalArgumentException("expectedLength cannot be less than the footer length");
}
if (in.length() < expectedLength) {
throw new CorruptIndexException("truncated file: length=" + in.length() + " but expectedLength==" + expectedLength, in);
} else if (in.length() > expectedLength) {
throw new CorruptIndexException("file too long: length=" + in.length() + " but expectedLength==" + expectedLength, in);
}
return retrieveChecksum(in);
}
private static void validateFooter(IndexInput in) throws IOException {
long remaining = in.length() - in.getFilePointer();
long expected = footerLength();

View File

@ -63,7 +63,6 @@ public class Lucene86PointsReader extends PointsReader implements Closeable {
Lucene86PointsFormat.VERSION_CURRENT,
readState.segmentInfo.getId(),
readState.segmentSuffix);
CodecUtil.retrieveChecksum(indexIn);
dataIn = readState.directory.openInput(dataFileName, readState.context);
CodecUtil.checkIndexHeader(dataIn,
@ -72,8 +71,8 @@ public class Lucene86PointsReader extends PointsReader implements Closeable {
Lucene86PointsFormat.VERSION_CURRENT,
readState.segmentInfo.getId(),
readState.segmentSuffix);
CodecUtil.retrieveChecksum(dataIn);
long indexLength = -1, dataLength = -1;
try (ChecksumIndexInput metaIn = readState.directory.openChecksumInput(metaFileName, readState.context)) {
Throwable priorE = null;
try {
@ -94,12 +93,18 @@ public class Lucene86PointsReader extends PointsReader implements Closeable {
BKDReader reader = new BKDReader(metaIn, indexIn, dataIn);
readers.put(fieldNumber, reader);
}
indexLength = metaIn.readLong();
dataLength = metaIn.readLong();
} catch (Throwable t) {
priorE = t;
} finally {
CodecUtil.checkFooter(metaIn, priorE);
}
}
// At this point, checksums of the meta file have been validated so we
// know that indexLength and dataLength are very likely correct.
CodecUtil.retrieveChecksum(indexIn, indexLength);
CodecUtil.retrieveChecksum(dataIn, dataLength);
success = true;
} finally {
if (success == false) {

View File

@ -251,9 +251,11 @@ public class Lucene86PointsWriter extends PointsWriter implements Closeable {
}
finished = true;
metaOut.writeInt(-1);
CodecUtil.writeFooter(metaOut);
CodecUtil.writeFooter(indexOut);
CodecUtil.writeFooter(dataOut);
metaOut.writeLong(indexOut.getFilePointer());
metaOut.writeLong(dataOut.getFilePointer());
CodecUtil.writeFooter(metaOut);
}
@Override

View File

@ -26,6 +26,8 @@ import org.apache.lucene.store.ByteBuffersDataOutput;
import org.apache.lucene.store.ByteBuffersIndexInput;
import org.apache.lucene.store.ByteBuffersIndexOutput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.LuceneTestCase;
@ -319,4 +321,47 @@ public class TestCodecUtil extends LuceneTestCase {
() -> CodecUtil.retrieveChecksum(input));
assertTrue(e.getMessage(), e.getMessage().contains("misplaced codec footer (file truncated?): length=0 but footerLength==16 (resource"));
}
public void testRetrieveChecksum() throws IOException {
Directory dir = newDirectory();
try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
out.writeByte((byte) 42);
CodecUtil.writeFooter(out);
}
try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
CodecUtil.retrieveChecksum(in, in.length()); // no exception
CorruptIndexException exception = expectThrows(CorruptIndexException.class,
() -> CodecUtil.retrieveChecksum(in, in.length() - 1));
assertTrue(exception.getMessage().contains("too long"));
assertArrayEquals(new Throwable[0], exception.getSuppressed());
exception = expectThrows(CorruptIndexException.class,
() -> CodecUtil.retrieveChecksum(in, in.length() + 1));
assertTrue(exception.getMessage().contains("truncated"));
assertArrayEquals(new Throwable[0], exception.getSuppressed());
}
try (IndexOutput out = dir.createOutput("bar", IOContext.DEFAULT)) {
for (int i = 0; i <= CodecUtil.footerLength(); ++i) {
out.writeByte((byte) i);
}
}
try (IndexInput in = dir.openInput("bar", IOContext.DEFAULT)) {
CorruptIndexException exception = expectThrows(CorruptIndexException.class,
() -> CodecUtil.retrieveChecksum(in, in.length()));
assertTrue(exception.getMessage().contains("codec footer mismatch"));
assertArrayEquals(new Throwable[0], exception.getSuppressed());
exception = expectThrows(CorruptIndexException.class,
() -> CodecUtil.retrieveChecksum(in, in.length() - 1));
assertTrue(exception.getMessage().contains("too long"));
exception = expectThrows(CorruptIndexException.class,
() -> CodecUtil.retrieveChecksum(in, in.length() + 1));
assertTrue(exception.getMessage().contains("truncated"));
}
dir.close();
}
}