mirror of https://github.com/apache/lucene.git
LUCENE-9356: Change test to detect mismatched checksums instead of byte flips. (#876)
This makes the test more robust and gives a good sense of whether file formats are implementing `checkIntegrity` correctly.
This commit is contained in:
parent
8921b23bcd
commit
e65c0c777b
|
@ -128,26 +128,34 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
|
||||||
String fileName =
|
String fileName =
|
||||||
IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension);
|
IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension);
|
||||||
IndexInput in = state.directory.openInput(fileName, state.context);
|
IndexInput in = state.directory.openInput(fileName, state.context);
|
||||||
int versionVectorData =
|
boolean success = false;
|
||||||
CodecUtil.checkIndexHeader(
|
try {
|
||||||
in,
|
int versionVectorData =
|
||||||
codecName,
|
CodecUtil.checkIndexHeader(
|
||||||
Lucene90HnswVectorsFormat.VERSION_START,
|
in,
|
||||||
Lucene90HnswVectorsFormat.VERSION_CURRENT,
|
codecName,
|
||||||
state.segmentInfo.getId(),
|
Lucene90HnswVectorsFormat.VERSION_START,
|
||||||
state.segmentSuffix);
|
Lucene90HnswVectorsFormat.VERSION_CURRENT,
|
||||||
if (versionMeta != versionVectorData) {
|
state.segmentInfo.getId(),
|
||||||
throw new CorruptIndexException(
|
state.segmentSuffix);
|
||||||
"Format versions mismatch: meta="
|
if (versionMeta != versionVectorData) {
|
||||||
+ versionMeta
|
throw new CorruptIndexException(
|
||||||
+ ", "
|
"Format versions mismatch: meta="
|
||||||
+ codecName
|
+ versionMeta
|
||||||
+ "="
|
+ ", "
|
||||||
+ versionVectorData,
|
+ codecName
|
||||||
in);
|
+ "="
|
||||||
|
+ versionVectorData,
|
||||||
|
in);
|
||||||
|
}
|
||||||
|
checksumRef[0] = CodecUtil.retrieveChecksum(in);
|
||||||
|
success = true;
|
||||||
|
return in;
|
||||||
|
} finally {
|
||||||
|
if (success == false) {
|
||||||
|
IOUtils.closeWhileHandlingException(in);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
checksumRef[0] = CodecUtil.retrieveChecksum(in);
|
|
||||||
return in;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {
|
private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {
|
||||||
|
|
|
@ -119,26 +119,34 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
|
||||||
String fileName =
|
String fileName =
|
||||||
IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension);
|
IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension);
|
||||||
IndexInput in = state.directory.openInput(fileName, state.context);
|
IndexInput in = state.directory.openInput(fileName, state.context);
|
||||||
int versionVectorData =
|
boolean success = false;
|
||||||
CodecUtil.checkIndexHeader(
|
try {
|
||||||
in,
|
int versionVectorData =
|
||||||
codecName,
|
CodecUtil.checkIndexHeader(
|
||||||
Lucene91HnswVectorsFormat.VERSION_START,
|
in,
|
||||||
Lucene91HnswVectorsFormat.VERSION_CURRENT,
|
codecName,
|
||||||
state.segmentInfo.getId(),
|
Lucene91HnswVectorsFormat.VERSION_START,
|
||||||
state.segmentSuffix);
|
Lucene91HnswVectorsFormat.VERSION_CURRENT,
|
||||||
if (versionMeta != versionVectorData) {
|
state.segmentInfo.getId(),
|
||||||
throw new CorruptIndexException(
|
state.segmentSuffix);
|
||||||
"Format versions mismatch: meta="
|
if (versionMeta != versionVectorData) {
|
||||||
+ versionMeta
|
throw new CorruptIndexException(
|
||||||
+ ", "
|
"Format versions mismatch: meta="
|
||||||
+ codecName
|
+ versionMeta
|
||||||
+ "="
|
+ ", "
|
||||||
+ versionVectorData,
|
+ codecName
|
||||||
in);
|
+ "="
|
||||||
|
+ versionVectorData,
|
||||||
|
in);
|
||||||
|
}
|
||||||
|
CodecUtil.retrieveChecksum(in);
|
||||||
|
success = true;
|
||||||
|
return in;
|
||||||
|
} finally {
|
||||||
|
if (success == false) {
|
||||||
|
IOUtils.closeWhileHandlingException(in);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
CodecUtil.retrieveChecksum(in);
|
|
||||||
return in;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {
|
private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {
|
||||||
|
|
|
@ -115,26 +115,34 @@ public final class Lucene92HnswVectorsReader extends KnnVectorsReader {
|
||||||
String fileName =
|
String fileName =
|
||||||
IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension);
|
IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension);
|
||||||
IndexInput in = state.directory.openInput(fileName, state.context);
|
IndexInput in = state.directory.openInput(fileName, state.context);
|
||||||
int versionVectorData =
|
boolean success = false;
|
||||||
CodecUtil.checkIndexHeader(
|
try {
|
||||||
in,
|
int versionVectorData =
|
||||||
codecName,
|
CodecUtil.checkIndexHeader(
|
||||||
Lucene92HnswVectorsFormat.VERSION_START,
|
in,
|
||||||
Lucene92HnswVectorsFormat.VERSION_CURRENT,
|
codecName,
|
||||||
state.segmentInfo.getId(),
|
Lucene92HnswVectorsFormat.VERSION_START,
|
||||||
state.segmentSuffix);
|
Lucene92HnswVectorsFormat.VERSION_CURRENT,
|
||||||
if (versionMeta != versionVectorData) {
|
state.segmentInfo.getId(),
|
||||||
throw new CorruptIndexException(
|
state.segmentSuffix);
|
||||||
"Format versions mismatch: meta="
|
if (versionMeta != versionVectorData) {
|
||||||
+ versionMeta
|
throw new CorruptIndexException(
|
||||||
+ ", "
|
"Format versions mismatch: meta="
|
||||||
+ codecName
|
+ versionMeta
|
||||||
+ "="
|
+ ", "
|
||||||
+ versionVectorData,
|
+ codecName
|
||||||
in);
|
+ "="
|
||||||
|
+ versionVectorData,
|
||||||
|
in);
|
||||||
|
}
|
||||||
|
CodecUtil.retrieveChecksum(in);
|
||||||
|
success = true;
|
||||||
|
return in;
|
||||||
|
} finally {
|
||||||
|
if (success == false) {
|
||||||
|
IOUtils.closeWhileHandlingException(in);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
CodecUtil.retrieveChecksum(in);
|
|
||||||
return in;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {
|
private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {
|
||||||
|
|
|
@ -20,6 +20,16 @@ import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.Field.Store;
|
||||||
|
import org.apache.lucene.document.FieldType;
|
||||||
|
import org.apache.lucene.document.KnnVectorField;
|
||||||
|
import org.apache.lucene.document.LongPoint;
|
||||||
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
|
import org.apache.lucene.document.SortedDocValuesField;
|
||||||
|
import org.apache.lucene.document.StringField;
|
||||||
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
@ -27,62 +37,59 @@ import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.tests.analysis.MockAnalyzer;
|
import org.apache.lucene.tests.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.tests.index.RandomIndexWriter;
|
import org.apache.lucene.tests.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.tests.store.BaseDirectoryWrapper;
|
import org.apache.lucene.tests.store.BaseDirectoryWrapper;
|
||||||
import org.apache.lucene.tests.util.LineFileDocs;
|
|
||||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||||
import org.apache.lucene.tests.util.LuceneTestCase.AwaitsFix;
|
|
||||||
import org.apache.lucene.tests.util.LuceneTestCase.SuppressFileSystems;
|
import org.apache.lucene.tests.util.LuceneTestCase.SuppressFileSystems;
|
||||||
import org.apache.lucene.tests.util.TestUtil;
|
import org.apache.lucene.tests.util.TestUtil;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
/** Test that the default codec detects bit flips at open or checkIntegrity time. */
|
/** Test that the default codec detects mismatched checksums at open or checkIntegrity time. */
|
||||||
@SuppressFileSystems("ExtrasFS")
|
@SuppressFileSystems("ExtrasFS")
|
||||||
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-9356")
|
public class TestAllFilesDetectMismatchedChecksum extends LuceneTestCase {
|
||||||
public class TestAllFilesDetectBitFlips extends LuceneTestCase {
|
|
||||||
|
|
||||||
public void test() throws Exception {
|
public void test() throws Exception {
|
||||||
doTest(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testCFS() throws Exception {
|
|
||||||
doTest(true);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void doTest(boolean cfs) throws Exception {
|
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
|
|
||||||
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
|
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||||
conf.setCodec(TestUtil.getDefaultCodec());
|
conf.setCodec(TestUtil.getDefaultCodec());
|
||||||
|
// Disable CFS, which makes it harder to test due to its double checksumming
|
||||||
if (cfs == false) {
|
conf.setUseCompoundFile(false);
|
||||||
conf.setUseCompoundFile(false);
|
conf.getMergePolicy().setNoCFSRatio(0.0);
|
||||||
conf.getMergePolicy().setNoCFSRatio(0.0);
|
|
||||||
}
|
|
||||||
|
|
||||||
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
|
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
|
||||||
// Use LineFileDocs so we (hopefully) get most Lucene features
|
Document doc = new Document();
|
||||||
// tested, e.g. IntPoint was recently added to it:
|
FieldType textWithTermVectorsType = new FieldType(TextField.TYPE_STORED);
|
||||||
LineFileDocs docs = new LineFileDocs(random());
|
textWithTermVectorsType.setStoreTermVectors(true);
|
||||||
|
Field text = new Field("text", "", textWithTermVectorsType);
|
||||||
|
doc.add(text);
|
||||||
|
Field termString = new StringField("string", "", Store.YES);
|
||||||
|
doc.add(termString);
|
||||||
|
Field dvString = new SortedDocValuesField("string", new BytesRef());
|
||||||
|
doc.add(dvString);
|
||||||
|
Field pointNumber = new LongPoint("long", 0L);
|
||||||
|
doc.add(pointNumber);
|
||||||
|
Field dvNumber = new NumericDocValuesField("long", 0L);
|
||||||
|
doc.add(dvNumber);
|
||||||
|
KnnVectorField vector = new KnnVectorField("vector", new float[16]);
|
||||||
|
doc.add(vector);
|
||||||
|
|
||||||
for (int i = 0; i < 100; i++) {
|
for (int i = 0; i < 100; i++) {
|
||||||
riw.addDocument(docs.nextDoc());
|
text.setStringValue(TestUtil.randomAnalysisString(random(), 20, true));
|
||||||
if (random().nextInt(7) == 0) {
|
String randomString = TestUtil.randomSimpleString(random(), 5);
|
||||||
riw.commit();
|
termString.setStringValue(randomString);
|
||||||
}
|
dvString.setBytesValue(new BytesRef(randomString));
|
||||||
if (random().nextInt(20) == 0) {
|
long number = random().nextInt(10);
|
||||||
riw.deleteDocuments(new Term("docid", Integer.toString(i)));
|
pointNumber.setLongValue(number);
|
||||||
}
|
dvNumber.setLongValue(number);
|
||||||
if (random().nextInt(15) == 0) {
|
Arrays.fill(vector.vectorValue(), i % 4);
|
||||||
riw.updateNumericDocValue(
|
riw.addDocument(doc);
|
||||||
new Term("docid", Integer.toString(i)), "docid_intDV", Long.valueOf(i));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (TEST_NIGHTLY == false) {
|
|
||||||
riw.forceMerge(1);
|
|
||||||
}
|
}
|
||||||
|
riw.deleteDocuments(LongPoint.newRangeQuery("long", 0, 2));
|
||||||
riw.close();
|
riw.close();
|
||||||
checkBitFlips(dir);
|
checkMismatchedChecksum(dir);
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkBitFlips(Directory dir) throws IOException {
|
private void checkMismatchedChecksum(Directory dir) throws IOException {
|
||||||
for (String name : dir.listAll()) {
|
for (String name : dir.listAll()) {
|
||||||
if (name.equals(IndexWriter.WRITE_LOCK_NAME) == false) {
|
if (name.equals(IndexWriter.WRITE_LOCK_NAME) == false) {
|
||||||
corruptFile(dir, name);
|
corruptFile(dir, name);
|
||||||
|
@ -95,7 +102,9 @@ public class TestAllFilesDetectBitFlips extends LuceneTestCase {
|
||||||
dirCopy.setCheckIndexOnClose(false);
|
dirCopy.setCheckIndexOnClose(false);
|
||||||
|
|
||||||
long victimLength = dir.fileLength(victim);
|
long victimLength = dir.fileLength(victim);
|
||||||
long flipOffset = TestUtil.nextLong(random(), 0, victimLength - 1);
|
long flipOffset =
|
||||||
|
TestUtil.nextLong(
|
||||||
|
random(), Math.max(0, victimLength - CodecUtil.footerLength()), victimLength - 1);
|
||||||
|
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println(
|
System.out.println(
|
||||||
|
@ -118,28 +127,13 @@ public class TestAllFilesDetectBitFlips extends LuceneTestCase {
|
||||||
out.writeByte((byte) (in.readByte() + TestUtil.nextInt(random(), 0x01, 0xFF)));
|
out.writeByte((byte) (in.readByte() + TestUtil.nextInt(random(), 0x01, 0xFF)));
|
||||||
out.copyBytes(in, victimLength - flipOffset - 1);
|
out.copyBytes(in, victimLength - flipOffset - 1);
|
||||||
}
|
}
|
||||||
try (IndexInput in = dirCopy.openInput(name, IOContext.DEFAULT)) {
|
|
||||||
try {
|
|
||||||
CodecUtil.checksumEntireFile(in);
|
|
||||||
System.out.println(
|
|
||||||
"TEST: changing a byte in " + victim + " did not update the checksum)");
|
|
||||||
return;
|
|
||||||
} catch (
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
CorruptIndexException e) {
|
|
||||||
// ok
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
dirCopy.sync(Collections.singleton(name));
|
dirCopy.sync(Collections.singleton(name));
|
||||||
}
|
}
|
||||||
|
|
||||||
// corruption must be detected
|
// corruption must be detected
|
||||||
expectThrowsAnyOf(
|
expectThrows(
|
||||||
Arrays.asList(
|
CorruptIndexException.class,
|
||||||
CorruptIndexException.class,
|
|
||||||
IndexFormatTooOldException.class,
|
|
||||||
IndexFormatTooNewException.class),
|
|
||||||
() -> {
|
() -> {
|
||||||
try (IndexReader reader = DirectoryReader.open(dirCopy)) {
|
try (IndexReader reader = DirectoryReader.open(dirCopy)) {
|
||||||
for (LeafReaderContext context : reader.leaves()) {
|
for (LeafReaderContext context : reader.leaves()) {
|
Loading…
Reference in New Issue