mirror of https://github.com/apache/lucene.git
LUCENE-5842: Validate checksum footers for postings lists/docvalues/storedfields/vectors on init
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1612845 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
430c264afa
commit
26e5273658
|
@ -129,6 +129,11 @@ New Features
|
||||||
* LUCENE-5825: Benchmark module can use custom postings format, e.g.:
|
* LUCENE-5825: Benchmark module can use custom postings format, e.g.:
|
||||||
codec.postingsFormat=Memory (Varun Shenoy, David Smiley)
|
codec.postingsFormat=Memory (Varun Shenoy, David Smiley)
|
||||||
|
|
||||||
|
* LUCENE-5842: When opening large files (where its to expensive to compare
|
||||||
|
checksum against all the bytes), retrieve checksum to validate structure
|
||||||
|
of footer, this can detect some forms of corruption such as truncation.
|
||||||
|
(Robert Muir)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
* LUCENE-5752: Simplified Automaton API to be immutable. (Mike McCandless)
|
* LUCENE-5752: Simplified Automaton API to be immutable. (Mike McCandless)
|
||||||
|
|
|
@ -128,6 +128,14 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
// Have PostingsReader init itself
|
// Have PostingsReader init itself
|
||||||
postingsReader.init(in);
|
postingsReader.init(in);
|
||||||
|
|
||||||
|
if (version >= BlockTermsWriter.VERSION_CHECKSUM) {
|
||||||
|
// NOTE: data file is too costly to verify checksum against all the bytes on open,
|
||||||
|
// but for now we at least verify proper structure of the checksum footer: which looks
|
||||||
|
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
|
||||||
|
// such as file truncation.
|
||||||
|
CodecUtil.retrieveChecksum(in);
|
||||||
|
}
|
||||||
|
|
||||||
// Read per-field details
|
// Read per-field details
|
||||||
seekDir(in, dirOffset);
|
seekDir(in, dirOffset);
|
||||||
|
|
||||||
|
|
|
@ -104,6 +104,13 @@ public final class OrdsBlockTreeTermsReader extends FieldsProducer {
|
||||||
// Have PostingsReader init itself
|
// Have PostingsReader init itself
|
||||||
postingsReader.init(in);
|
postingsReader.init(in);
|
||||||
|
|
||||||
|
|
||||||
|
// NOTE: data file is too costly to verify checksum against all the bytes on open,
|
||||||
|
// but for now we at least verify proper structure of the checksum footer: which looks
|
||||||
|
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
|
||||||
|
// such as file truncation.
|
||||||
|
CodecUtil.retrieveChecksum(in);
|
||||||
|
|
||||||
// Read per-field details
|
// Read per-field details
|
||||||
seekDir(in, dirOffset);
|
seekDir(in, dirOffset);
|
||||||
seekDir(indexIn, indexDirOffset);
|
seekDir(indexIn, indexDirOffset);
|
||||||
|
|
|
@ -118,6 +118,12 @@ class DirectDocValuesProducer extends DocValuesProducer {
|
||||||
throw new CorruptIndexException("Format versions mismatch");
|
throw new CorruptIndexException("Format versions mismatch");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NOTE: data file is too costly to verify checksum against all the bytes on open,
|
||||||
|
// but for now we at least verify proper structure of the checksum footer: which looks
|
||||||
|
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
|
||||||
|
// such as file truncation.
|
||||||
|
CodecUtil.retrieveChecksum(data);
|
||||||
|
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
|
|
|
@ -137,6 +137,12 @@ class MemoryDocValuesProducer extends DocValuesProducer {
|
||||||
throw new CorruptIndexException("Format versions mismatch");
|
throw new CorruptIndexException("Format versions mismatch");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NOTE: data file is too costly to verify checksum against all the bytes on open,
|
||||||
|
// but for now we at least verify proper structure of the checksum footer: which looks
|
||||||
|
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
|
||||||
|
// such as file truncation.
|
||||||
|
CodecUtil.retrieveChecksum(data);
|
||||||
|
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
|
|
|
@ -123,6 +123,15 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
||||||
// Have PostingsReader init itself
|
// Have PostingsReader init itself
|
||||||
postingsReader.init(in);
|
postingsReader.init(in);
|
||||||
|
|
||||||
|
|
||||||
|
// NOTE: data file is too costly to verify checksum against all the bytes on open,
|
||||||
|
// but for now we at least verify proper structure of the checksum footer: which looks
|
||||||
|
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
|
||||||
|
// such as file truncation.
|
||||||
|
if (version >= BlockTreeTermsWriter.VERSION_CHECKSUM) {
|
||||||
|
CodecUtil.retrieveChecksum(in);
|
||||||
|
}
|
||||||
|
|
||||||
// Read per-field details
|
// Read per-field details
|
||||||
seekDir(in, dirOffset);
|
seekDir(in, dirOffset);
|
||||||
seekDir(indexIn, indexDirOffset);
|
seekDir(indexIn, indexDirOffset);
|
||||||
|
|
|
@ -52,7 +52,6 @@ import org.apache.lucene.store.BufferedChecksumIndexInput;
|
||||||
import org.apache.lucene.store.ByteArrayDataInput;
|
import org.apache.lucene.store.ByteArrayDataInput;
|
||||||
import org.apache.lucene.store.ChecksumIndexInput;
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
import org.apache.lucene.store.DataInput;
|
import org.apache.lucene.store.DataInput;
|
||||||
import org.apache.lucene.store.DataOutput;
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
@ -155,6 +154,14 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
|
||||||
decompressor = compressionMode.newDecompressor();
|
decompressor = compressionMode.newDecompressor();
|
||||||
this.bytes = new BytesRef();
|
this.bytes = new BytesRef();
|
||||||
|
|
||||||
|
if (version >= VERSION_CHECKSUM) {
|
||||||
|
// NOTE: data file is too costly to verify checksum against all the bytes on open,
|
||||||
|
// but for now we at least verify proper structure of the checksum footer: which looks
|
||||||
|
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
|
||||||
|
// such as file truncation.
|
||||||
|
CodecUtil.retrieveChecksum(fieldsStream);
|
||||||
|
}
|
||||||
|
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
|
|
|
@ -132,6 +132,16 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
|
||||||
}
|
}
|
||||||
assert CodecUtil.headerLength(codecNameDat) == vectorsStream.getFilePointer();
|
assert CodecUtil.headerLength(codecNameDat) == vectorsStream.getFilePointer();
|
||||||
|
|
||||||
|
long pos = vectorsStream.getFilePointer();
|
||||||
|
if (version >= VERSION_CHECKSUM) {
|
||||||
|
// NOTE: data file is too costly to verify checksum against all the bytes on open,
|
||||||
|
// but for now we at least verify proper structure of the checksum footer: which looks
|
||||||
|
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
|
||||||
|
// such as file truncation.
|
||||||
|
CodecUtil.retrieveChecksum(vectorsStream);
|
||||||
|
vectorsStream.seek(pos);
|
||||||
|
}
|
||||||
|
|
||||||
packedIntsVersion = vectorsStream.readVInt();
|
packedIntsVersion = vectorsStream.readVInt();
|
||||||
chunkSize = vectorsStream.readVInt();
|
chunkSize = vectorsStream.readVInt();
|
||||||
decompressor = compressionMode.newDecompressor();
|
decompressor = compressionMode.newDecompressor();
|
||||||
|
|
|
@ -80,15 +80,39 @@ public final class Lucene41PostingsReader extends PostingsReaderBase {
|
||||||
Lucene41PostingsWriter.VERSION_CURRENT);
|
Lucene41PostingsWriter.VERSION_CURRENT);
|
||||||
forUtil = new ForUtil(docIn);
|
forUtil = new ForUtil(docIn);
|
||||||
|
|
||||||
|
if (version >= Lucene41PostingsWriter.VERSION_CHECKSUM) {
|
||||||
|
// NOTE: data file is too costly to verify checksum against all the bytes on open,
|
||||||
|
// but for now we at least verify proper structure of the checksum footer: which looks
|
||||||
|
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
|
||||||
|
// such as file truncation.
|
||||||
|
CodecUtil.retrieveChecksum(docIn);
|
||||||
|
}
|
||||||
|
|
||||||
if (fieldInfos.hasProx()) {
|
if (fieldInfos.hasProx()) {
|
||||||
posIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene41PostingsFormat.POS_EXTENSION),
|
posIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene41PostingsFormat.POS_EXTENSION),
|
||||||
ioContext);
|
ioContext);
|
||||||
CodecUtil.checkHeader(posIn, Lucene41PostingsWriter.POS_CODEC, version, version);
|
CodecUtil.checkHeader(posIn, Lucene41PostingsWriter.POS_CODEC, version, version);
|
||||||
|
|
||||||
|
if (version >= Lucene41PostingsWriter.VERSION_CHECKSUM) {
|
||||||
|
// NOTE: data file is too costly to verify checksum against all the bytes on open,
|
||||||
|
// but for now we at least verify proper structure of the checksum footer: which looks
|
||||||
|
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
|
||||||
|
// such as file truncation.
|
||||||
|
CodecUtil.retrieveChecksum(posIn);
|
||||||
|
}
|
||||||
|
|
||||||
if (fieldInfos.hasPayloads() || fieldInfos.hasOffsets()) {
|
if (fieldInfos.hasPayloads() || fieldInfos.hasOffsets()) {
|
||||||
payIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION),
|
payIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION),
|
||||||
ioContext);
|
ioContext);
|
||||||
CodecUtil.checkHeader(payIn, Lucene41PostingsWriter.PAY_CODEC, version, version);
|
CodecUtil.checkHeader(payIn, Lucene41PostingsWriter.PAY_CODEC, version, version);
|
||||||
|
|
||||||
|
if (version >= Lucene41PostingsWriter.VERSION_CHECKSUM) {
|
||||||
|
// NOTE: data file is too costly to verify checksum against all the bytes on open,
|
||||||
|
// but for now we at least verify proper structure of the checksum footer: which looks
|
||||||
|
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
|
||||||
|
// such as file truncation.
|
||||||
|
CodecUtil.retrieveChecksum(payIn);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -139,6 +139,14 @@ class Lucene42DocValuesProducer extends DocValuesProducer {
|
||||||
throw new CorruptIndexException("Format versions mismatch");
|
throw new CorruptIndexException("Format versions mismatch");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (version >= VERSION_CHECKSUM) {
|
||||||
|
// NOTE: data file is too costly to verify checksum against all the bytes on open,
|
||||||
|
// but for now we at least verify proper structure of the checksum footer: which looks
|
||||||
|
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
|
||||||
|
// such as file truncation.
|
||||||
|
CodecUtil.retrieveChecksum(data);
|
||||||
|
}
|
||||||
|
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
|
|
|
@ -139,6 +139,14 @@ class Lucene45DocValuesProducer extends DocValuesProducer implements Closeable {
|
||||||
throw new CorruptIndexException("Format versions mismatch");
|
throw new CorruptIndexException("Format versions mismatch");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (version >= Lucene45DocValuesFormat.VERSION_CHECKSUM) {
|
||||||
|
// NOTE: data file is too costly to verify checksum against all the bytes on open,
|
||||||
|
// but for now we at least verify proper structure of the checksum footer: which looks
|
||||||
|
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
|
||||||
|
// such as file truncation.
|
||||||
|
CodecUtil.retrieveChecksum(data);
|
||||||
|
}
|
||||||
|
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
|
|
|
@ -119,6 +119,12 @@ class Lucene49DocValuesProducer extends DocValuesProducer implements Closeable {
|
||||||
throw new CorruptIndexException("Format versions mismatch");
|
throw new CorruptIndexException("Format versions mismatch");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NOTE: data file is too costly to verify checksum against all the bytes on open,
|
||||||
|
// but for now we at least verify proper structure of the checksum footer: which looks
|
||||||
|
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
|
||||||
|
// such as file truncation.
|
||||||
|
CodecUtil.retrieveChecksum(data);
|
||||||
|
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
|
|
|
@ -93,6 +93,12 @@ class Lucene49NormsProducer extends DocValuesProducer {
|
||||||
throw new CorruptIndexException("Format versions mismatch");
|
throw new CorruptIndexException("Format versions mismatch");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NOTE: data file is too costly to verify checksum against all the bytes on open,
|
||||||
|
// but for now we at least verify proper structure of the checksum footer: which looks
|
||||||
|
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
|
||||||
|
// such as file truncation.
|
||||||
|
CodecUtil.retrieveChecksum(data);
|
||||||
|
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
|
|
|
@ -104,6 +104,14 @@ public final class CompoundFileDirectory extends BaseDirectory {
|
||||||
handle = directory.openInput(fileName, context);
|
handle = directory.openInput(fileName, context);
|
||||||
try {
|
try {
|
||||||
this.entries = readEntries(directory, fileName);
|
this.entries = readEntries(directory, fileName);
|
||||||
|
if (version >= CompoundFileWriter.VERSION_CHECKSUM) {
|
||||||
|
CodecUtil.checkHeader(handle, CompoundFileWriter.DATA_CODEC, version, version);
|
||||||
|
// NOTE: data file is too costly to verify checksum against all the bytes on open,
|
||||||
|
// but for now we at least verify proper structure of the checksum footer: which looks
|
||||||
|
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
|
||||||
|
// such as file truncation.
|
||||||
|
CodecUtil.retrieveChecksum(handle);
|
||||||
|
}
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
|
|
|
@ -97,6 +97,12 @@ public final class VersionBlockTreeTermsReader extends FieldsProducer {
|
||||||
// Have PostingsReader init itself
|
// Have PostingsReader init itself
|
||||||
postingsReader.init(in);
|
postingsReader.init(in);
|
||||||
|
|
||||||
|
// NOTE: data file is too costly to verify checksum against all the bytes on open,
|
||||||
|
// but for now we at least verify proper structure of the checksum footer: which looks
|
||||||
|
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
|
||||||
|
// such as file truncation.
|
||||||
|
CodecUtil.retrieveChecksum(in);
|
||||||
|
|
||||||
// Read per-field details
|
// Read per-field details
|
||||||
seekDir(in, dirOffset);
|
seekDir(in, dirOffset);
|
||||||
seekDir(indexIn, indexDirOffset);
|
seekDir(indexIn, indexDirOffset);
|
||||||
|
|
Loading…
Reference in New Issue