LUCENE-9272: Move checksum verification of the `.tip` file to `checkIntegrity()`. (#1339)

This commit is contained in:
Adrien Grand 2020-03-11 18:15:29 +01:00 committed by GitHub
parent 32a2076c60
commit ed59c3eb33
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 9 additions and 18 deletions

View File

@ -89,10 +89,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {
final static String TERMS_CODEC_NAME = "BlockTreeTermsDict";
/** Initial terms format. */
public static final int VERSION_START = 2;
/** Auto-prefix terms have been superseded by points. */
public static final int VERSION_AUTO_PREFIX_TERMS_REMOVED = 3;
public static final int VERSION_START = 3;
/** The long[] + byte[] metadata has been replaced with a single byte[]. */
public static final int VERSION_META_LONGS_REMOVED = 4;
@ -137,26 +134,17 @@ public final class BlockTreeTermsReader extends FieldsProducer {
termsIn = state.directory.openInput(termsName, state.context);
version = CodecUtil.checkIndexHeader(termsIn, TERMS_CODEC_NAME, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
if (version < VERSION_AUTO_PREFIX_TERMS_REMOVED) {
// pre-6.2 index, records whether auto-prefix terms are enabled in the header
byte b = termsIn.readByte();
if (b != 0) {
throw new CorruptIndexException("Index header pretends the index has auto-prefix terms: " + b, termsIn);
}
}
String indexName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION);
indexIn = state.directory.openInput(indexName, state.context);
CodecUtil.checkIndexHeader(indexIn, TERMS_INDEX_CODEC_NAME, version, version, state.segmentInfo.getId(), state.segmentSuffix);
CodecUtil.checksumEntireFile(indexIn);
// Have PostingsReader init itself
postingsReader.init(termsIn, state);
// NOTE: data file is too costly to verify checksum against all the bytes on open,
// but for now we at least verify proper structure of the checksum footer: which looks
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
// such as file truncation.
// Verifying the checksum against all bytes would be too costly, but for now we at least
// verify proper structure of the checksum footer. This is cheap and can detect some forms
// of corruption such as file truncation.
CodecUtil.retrieveChecksum(indexIn);
CodecUtil.retrieveChecksum(termsIn);
// Read per-field details
@ -307,6 +295,9 @@ public final class BlockTreeTermsReader extends FieldsProducer {
@Override
public void checkIntegrity() throws IOException {
// terms index
CodecUtil.checksumEntireFile(indexIn);
// term dictionary
CodecUtil.checksumEntireFile(termsIn);