LUCENE-9356: Change test to detect mismatched checksums instead of byte flips. (#876)

This makes the test more robust and gives a good sense of whether file formats
are implementing `checkIntegrity` correctly.
This commit is contained in:
Adrien Grand 2022-05-17 14:29:51 +02:00 committed by GitHub
parent 8921b23bcd
commit e65c0c777b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 130 additions and 112 deletions

View File

@ -128,26 +128,34 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
String fileName = String fileName =
IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension); IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension);
IndexInput in = state.directory.openInput(fileName, state.context); IndexInput in = state.directory.openInput(fileName, state.context);
int versionVectorData = boolean success = false;
CodecUtil.checkIndexHeader( try {
in, int versionVectorData =
codecName, CodecUtil.checkIndexHeader(
Lucene90HnswVectorsFormat.VERSION_START, in,
Lucene90HnswVectorsFormat.VERSION_CURRENT, codecName,
state.segmentInfo.getId(), Lucene90HnswVectorsFormat.VERSION_START,
state.segmentSuffix); Lucene90HnswVectorsFormat.VERSION_CURRENT,
if (versionMeta != versionVectorData) { state.segmentInfo.getId(),
throw new CorruptIndexException( state.segmentSuffix);
"Format versions mismatch: meta=" if (versionMeta != versionVectorData) {
+ versionMeta throw new CorruptIndexException(
+ ", " "Format versions mismatch: meta="
+ codecName + versionMeta
+ "=" + ", "
+ versionVectorData, + codecName
in); + "="
+ versionVectorData,
in);
}
checksumRef[0] = CodecUtil.retrieveChecksum(in);
success = true;
return in;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(in);
}
} }
checksumRef[0] = CodecUtil.retrieveChecksum(in);
return in;
} }
private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException { private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {

View File

@ -119,26 +119,34 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
String fileName = String fileName =
IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension); IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension);
IndexInput in = state.directory.openInput(fileName, state.context); IndexInput in = state.directory.openInput(fileName, state.context);
int versionVectorData = boolean success = false;
CodecUtil.checkIndexHeader( try {
in, int versionVectorData =
codecName, CodecUtil.checkIndexHeader(
Lucene91HnswVectorsFormat.VERSION_START, in,
Lucene91HnswVectorsFormat.VERSION_CURRENT, codecName,
state.segmentInfo.getId(), Lucene91HnswVectorsFormat.VERSION_START,
state.segmentSuffix); Lucene91HnswVectorsFormat.VERSION_CURRENT,
if (versionMeta != versionVectorData) { state.segmentInfo.getId(),
throw new CorruptIndexException( state.segmentSuffix);
"Format versions mismatch: meta=" if (versionMeta != versionVectorData) {
+ versionMeta throw new CorruptIndexException(
+ ", " "Format versions mismatch: meta="
+ codecName + versionMeta
+ "=" + ", "
+ versionVectorData, + codecName
in); + "="
+ versionVectorData,
in);
}
CodecUtil.retrieveChecksum(in);
success = true;
return in;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(in);
}
} }
CodecUtil.retrieveChecksum(in);
return in;
} }
private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException { private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {

View File

@ -115,26 +115,34 @@ public final class Lucene92HnswVectorsReader extends KnnVectorsReader {
String fileName = String fileName =
IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension); IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension);
IndexInput in = state.directory.openInput(fileName, state.context); IndexInput in = state.directory.openInput(fileName, state.context);
int versionVectorData = boolean success = false;
CodecUtil.checkIndexHeader( try {
in, int versionVectorData =
codecName, CodecUtil.checkIndexHeader(
Lucene92HnswVectorsFormat.VERSION_START, in,
Lucene92HnswVectorsFormat.VERSION_CURRENT, codecName,
state.segmentInfo.getId(), Lucene92HnswVectorsFormat.VERSION_START,
state.segmentSuffix); Lucene92HnswVectorsFormat.VERSION_CURRENT,
if (versionMeta != versionVectorData) { state.segmentInfo.getId(),
throw new CorruptIndexException( state.segmentSuffix);
"Format versions mismatch: meta=" if (versionMeta != versionVectorData) {
+ versionMeta throw new CorruptIndexException(
+ ", " "Format versions mismatch: meta="
+ codecName + versionMeta
+ "=" + ", "
+ versionVectorData, + codecName
in); + "="
+ versionVectorData,
in);
}
CodecUtil.retrieveChecksum(in);
success = true;
return in;
} finally {
if (success == false) {
IOUtils.closeWhileHandlingException(in);
}
} }
CodecUtil.retrieveChecksum(in);
return in;
} }
private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException { private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {

View File

@ -20,6 +20,16 @@ import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.KnnVectorField;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
@ -27,62 +37,59 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.tests.analysis.MockAnalyzer; import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.store.BaseDirectoryWrapper; import org.apache.lucene.tests.store.BaseDirectoryWrapper;
import org.apache.lucene.tests.util.LineFileDocs;
import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.LuceneTestCase.AwaitsFix;
import org.apache.lucene.tests.util.LuceneTestCase.SuppressFileSystems; import org.apache.lucene.tests.util.LuceneTestCase.SuppressFileSystems;
import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BytesRef;
/** Test that the default codec detects bit flips at open or checkIntegrity time. */ /** Test that the default codec detects mismatched checksums at open or checkIntegrity time. */
@SuppressFileSystems("ExtrasFS") @SuppressFileSystems("ExtrasFS")
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-9356") public class TestAllFilesDetectMismatchedChecksum extends LuceneTestCase {
public class TestAllFilesDetectBitFlips extends LuceneTestCase {
public void test() throws Exception { public void test() throws Exception {
doTest(false);
}
public void testCFS() throws Exception {
doTest(true);
}
public void doTest(boolean cfs) throws Exception {
Directory dir = newDirectory(); Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
conf.setCodec(TestUtil.getDefaultCodec()); conf.setCodec(TestUtil.getDefaultCodec());
// Disable CFS, which makes it harder to test due to its double checksumming
if (cfs == false) { conf.setUseCompoundFile(false);
conf.setUseCompoundFile(false); conf.getMergePolicy().setNoCFSRatio(0.0);
conf.getMergePolicy().setNoCFSRatio(0.0);
}
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf); RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
// Use LineFileDocs so we (hopefully) get most Lucene features Document doc = new Document();
// tested, e.g. IntPoint was recently added to it: FieldType textWithTermVectorsType = new FieldType(TextField.TYPE_STORED);
LineFileDocs docs = new LineFileDocs(random()); textWithTermVectorsType.setStoreTermVectors(true);
Field text = new Field("text", "", textWithTermVectorsType);
doc.add(text);
Field termString = new StringField("string", "", Store.YES);
doc.add(termString);
Field dvString = new SortedDocValuesField("string", new BytesRef());
doc.add(dvString);
Field pointNumber = new LongPoint("long", 0L);
doc.add(pointNumber);
Field dvNumber = new NumericDocValuesField("long", 0L);
doc.add(dvNumber);
KnnVectorField vector = new KnnVectorField("vector", new float[16]);
doc.add(vector);
for (int i = 0; i < 100; i++) { for (int i = 0; i < 100; i++) {
riw.addDocument(docs.nextDoc()); text.setStringValue(TestUtil.randomAnalysisString(random(), 20, true));
if (random().nextInt(7) == 0) { String randomString = TestUtil.randomSimpleString(random(), 5);
riw.commit(); termString.setStringValue(randomString);
} dvString.setBytesValue(new BytesRef(randomString));
if (random().nextInt(20) == 0) { long number = random().nextInt(10);
riw.deleteDocuments(new Term("docid", Integer.toString(i))); pointNumber.setLongValue(number);
} dvNumber.setLongValue(number);
if (random().nextInt(15) == 0) { Arrays.fill(vector.vectorValue(), i % 4);
riw.updateNumericDocValue( riw.addDocument(doc);
new Term("docid", Integer.toString(i)), "docid_intDV", Long.valueOf(i));
}
}
if (TEST_NIGHTLY == false) {
riw.forceMerge(1);
} }
riw.deleteDocuments(LongPoint.newRangeQuery("long", 0, 2));
riw.close(); riw.close();
checkBitFlips(dir); checkMismatchedChecksum(dir);
dir.close(); dir.close();
} }
private void checkBitFlips(Directory dir) throws IOException { private void checkMismatchedChecksum(Directory dir) throws IOException {
for (String name : dir.listAll()) { for (String name : dir.listAll()) {
if (name.equals(IndexWriter.WRITE_LOCK_NAME) == false) { if (name.equals(IndexWriter.WRITE_LOCK_NAME) == false) {
corruptFile(dir, name); corruptFile(dir, name);
@ -95,7 +102,9 @@ public class TestAllFilesDetectBitFlips extends LuceneTestCase {
dirCopy.setCheckIndexOnClose(false); dirCopy.setCheckIndexOnClose(false);
long victimLength = dir.fileLength(victim); long victimLength = dir.fileLength(victim);
long flipOffset = TestUtil.nextLong(random(), 0, victimLength - 1); long flipOffset =
TestUtil.nextLong(
random(), Math.max(0, victimLength - CodecUtil.footerLength()), victimLength - 1);
if (VERBOSE) { if (VERBOSE) {
System.out.println( System.out.println(
@ -118,28 +127,13 @@ public class TestAllFilesDetectBitFlips extends LuceneTestCase {
out.writeByte((byte) (in.readByte() + TestUtil.nextInt(random(), 0x01, 0xFF))); out.writeByte((byte) (in.readByte() + TestUtil.nextInt(random(), 0x01, 0xFF)));
out.copyBytes(in, victimLength - flipOffset - 1); out.copyBytes(in, victimLength - flipOffset - 1);
} }
try (IndexInput in = dirCopy.openInput(name, IOContext.DEFAULT)) {
try {
CodecUtil.checksumEntireFile(in);
System.out.println(
"TEST: changing a byte in " + victim + " did not update the checksum)");
return;
} catch (
@SuppressWarnings("unused")
CorruptIndexException e) {
// ok
}
}
} }
dirCopy.sync(Collections.singleton(name)); dirCopy.sync(Collections.singleton(name));
} }
// corruption must be detected // corruption must be detected
expectThrowsAnyOf( expectThrows(
Arrays.asList( CorruptIndexException.class,
CorruptIndexException.class,
IndexFormatTooOldException.class,
IndexFormatTooNewException.class),
() -> { () -> {
try (IndexReader reader = DirectoryReader.open(dirCopy)) { try (IndexReader reader = DirectoryReader.open(dirCopy)) {
for (LeafReaderContext context : reader.leaves()) { for (LeafReaderContext context : reader.leaves()) {