mirror of https://github.com/apache/lucene.git
LUCENE-9247: Add tests for `checkIntegrity`. (#1284)
This adds a test to `BaseIndexFileFormatTestCase` that the combination of opening a reader and calling `checkIntegrity` on it reads all bytes of all files (including index headers and footers). This would help detect most cases when `checkIntegrity` is not implemented correctly.
This commit is contained in:
parent
05830b463b
commit
a703c6e334
|
@ -665,6 +665,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void checkIntegrity() throws IOException {
|
public void checkIntegrity() throws IOException {
|
||||||
|
indexReader.checkIntegrity();
|
||||||
CodecUtil.checksumEntireFile(fieldsStream);
|
CodecUtil.checksumEntireFile(fieldsStream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1109,6 +1109,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void checkIntegrity() throws IOException {
|
public void checkIntegrity() throws IOException {
|
||||||
|
indexReader.checkIntegrity();
|
||||||
CodecUtil.checksumEntireFile(vectorsStream);
|
CodecUtil.checksumEntireFile(vectorsStream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
package org.apache.lucene.codecs.compressing;
|
package org.apache.lucene.codecs.compressing;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.util.Accountable;
|
import org.apache.lucene.util.Accountable;
|
||||||
|
|
||||||
|
@ -25,6 +26,9 @@ abstract class FieldsIndex implements Accountable, Cloneable, Closeable {
|
||||||
/** Get the start pointer for the block that contains the given docID. */
|
/** Get the start pointer for the block that contains the given docID. */
|
||||||
abstract long getStartPointer(int docID);
|
abstract long getStartPointer(int docID);
|
||||||
|
|
||||||
|
/** Check the integrity of the index. */
|
||||||
|
abstract void checkIntegrity() throws IOException;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public abstract FieldsIndex clone();
|
public abstract FieldsIndex clone();
|
||||||
|
|
||||||
|
|
|
@ -136,4 +136,9 @@ final class FieldsIndexReader extends FieldsIndex {
|
||||||
public long getMaxPointer() {
|
public long getMaxPointer() {
|
||||||
return maxPointer;
|
return maxPointer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
void checkIntegrity() throws IOException {
|
||||||
|
CodecUtil.checksumEntireFile(indexInput);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -216,4 +216,9 @@ final class LegacyFieldsIndexReader extends FieldsIndex {
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
// nothing to do
|
// nothing to do
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
void checkIntegrity() throws IOException {
|
||||||
|
// nothing to do, the index is checked at open time
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,6 +32,8 @@ import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.function.IntConsumer;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
@ -47,19 +49,24 @@ import org.apache.lucene.codecs.StoredFieldsWriter;
|
||||||
import org.apache.lucene.codecs.TermVectorsReader;
|
import org.apache.lucene.codecs.TermVectorsReader;
|
||||||
import org.apache.lucene.codecs.TermVectorsWriter;
|
import org.apache.lucene.codecs.TermVectorsWriter;
|
||||||
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
|
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
|
||||||
|
import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
import org.apache.lucene.document.NumericDocValuesField;
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.store.AlreadyClosedException;
|
import org.apache.lucene.store.AlreadyClosedException;
|
||||||
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.FilterDirectory;
|
||||||
import org.apache.lucene.store.FlushInfo;
|
import org.apache.lucene.store.FlushInfo;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.CloseableThreadLocal;
|
import org.apache.lucene.util.CloseableThreadLocal;
|
||||||
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.InfoStream;
|
import org.apache.lucene.util.InfoStream;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
@ -720,4 +727,189 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static class ReadBytesIndexInputWrapper extends IndexInput {
|
||||||
|
|
||||||
|
private final IndexInput in;
|
||||||
|
private final IntConsumer readByte;
|
||||||
|
|
||||||
|
ReadBytesIndexInputWrapper(IndexInput in, IntConsumer readByte) {
|
||||||
|
super(in.toString());
|
||||||
|
this.in = in;
|
||||||
|
this.readByte = readByte;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexInput clone() {
|
||||||
|
return new ReadBytesIndexInputWrapper(in.clone(), readByte);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
in.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getFilePointer() {
|
||||||
|
return in.getFilePointer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void seek(long pos) throws IOException {
|
||||||
|
in.seek(pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long length() {
|
||||||
|
return in.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexInput slice(String sliceDescription, long offset, long length) throws IOException {
|
||||||
|
IndexInput slice = in.slice(sliceDescription, offset, length);
|
||||||
|
return new ReadBytesIndexInputWrapper(slice, o -> readByte.accept(Math.toIntExact(offset + o)));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public byte readByte() throws IOException {
|
||||||
|
readByte.accept(Math.toIntExact(getFilePointer()));
|
||||||
|
return in.readByte();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void readBytes(byte[] b, int offset, int len) throws IOException {
|
||||||
|
final int fp = Math.toIntExact(getFilePointer());
|
||||||
|
for (int i = 0; i < len; ++i) {
|
||||||
|
readByte.accept(Math.addExact(fp, i));
|
||||||
|
}
|
||||||
|
in.readBytes(b, offset, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class ReadBytesDirectoryWrapper extends FilterDirectory {
|
||||||
|
|
||||||
|
ReadBytesDirectoryWrapper(Directory in) {
|
||||||
|
super(in);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final Map<String, FixedBitSet> readBytes = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
|
Map<String, FixedBitSet> getReadBytes() {
|
||||||
|
return Collections.unmodifiableMap(new HashMap<>(readBytes));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexInput openInput(String name, IOContext context) throws IOException {
|
||||||
|
IndexInput in = super.openInput(name, context);
|
||||||
|
final FixedBitSet set = readBytes.computeIfAbsent(name, n -> new FixedBitSet(Math.toIntExact(in.length())));
|
||||||
|
if (set.length() != in.length()) {
|
||||||
|
throw new IllegalStateException();
|
||||||
|
}
|
||||||
|
return new ReadBytesIndexInputWrapper(in, set::set);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ChecksumIndexInput openChecksumInput(String name, IOContext context) throws IOException {
|
||||||
|
ChecksumIndexInput in = super.openChecksumInput(name, context);
|
||||||
|
final FixedBitSet set = readBytes.computeIfAbsent(name, n -> new FixedBitSet(Math.toIntExact(in.length())));
|
||||||
|
if (set.length() != in.length()) {
|
||||||
|
throw new IllegalStateException();
|
||||||
|
}
|
||||||
|
return new ChecksumIndexInput(in.toString()) {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void readBytes(byte[] b, int offset, int len) throws IOException {
|
||||||
|
final int fp = Math.toIntExact(getFilePointer());
|
||||||
|
set.set(fp, Math.addExact(fp, len));
|
||||||
|
in.readBytes(b, offset, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public byte readByte() throws IOException {
|
||||||
|
set.set(Math.toIntExact(getFilePointer()));
|
||||||
|
return in.readByte();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexInput slice(String sliceDescription, long offset, long length) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long length() {
|
||||||
|
return in.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getFilePointer() {
|
||||||
|
return in.getFilePointer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
in.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getChecksum() throws IOException {
|
||||||
|
return in.getChecksum();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexOutput createOutput(String name, IOContext context) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** This test is a best effort at verifying that checkIntegrity doesn't miss any files. It tests that the
|
||||||
|
* combination of opening a reader and calling checkIntegrity on it reads all bytes of all files. */
|
||||||
|
public void testCheckIntegrityReadsAllBytes() throws Exception {
|
||||||
|
assumeFalse("SimpleText doesn't store checksums of its files", getCodec() instanceof SimpleTextCodec);
|
||||||
|
Directory dir = applyCreatedVersionMajor(newDirectory());
|
||||||
|
|
||||||
|
IndexWriterConfig cfg = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||||
|
IndexWriter w = new IndexWriter(dir, cfg);
|
||||||
|
final int numDocs = atLeast(100);
|
||||||
|
for (int i = 0; i < numDocs; ++i) {
|
||||||
|
Document d = new Document();
|
||||||
|
addRandomFields(d);
|
||||||
|
w.addDocument(d);
|
||||||
|
}
|
||||||
|
w.forceMerge(1);
|
||||||
|
w.commit();
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
ReadBytesDirectoryWrapper readBytesWrapperDir = new ReadBytesDirectoryWrapper(dir);
|
||||||
|
IndexReader reader = DirectoryReader.open(readBytesWrapperDir);
|
||||||
|
LeafReader leafReader = getOnlyLeafReader(reader);
|
||||||
|
leafReader.checkIntegrity();
|
||||||
|
|
||||||
|
Map<String, FixedBitSet> readBytesMap = readBytesWrapperDir.getReadBytes();
|
||||||
|
|
||||||
|
Set<String> unreadFiles = new HashSet<>(Arrays.asList(readBytesWrapperDir.listAll()));
|
||||||
|
unreadFiles.removeAll(readBytesMap.keySet());
|
||||||
|
assertTrue("Some files have not been open: " + unreadFiles, unreadFiles.isEmpty());
|
||||||
|
|
||||||
|
List<String> messages = new ArrayList<>();
|
||||||
|
for (Map.Entry<String, FixedBitSet> entry : readBytesMap.entrySet()) {
|
||||||
|
String name = entry.getKey();
|
||||||
|
FixedBitSet unreadBytes = entry.getValue().clone();
|
||||||
|
unreadBytes.flip(0, unreadBytes.length());
|
||||||
|
int unread = unreadBytes.nextSetBit(0);
|
||||||
|
if (unread != Integer.MAX_VALUE) {
|
||||||
|
messages.add("Offset " + unread + " of file " + name + "(" + unreadBytes.length() + "bytes) was not read.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertTrue(String.join("\n", messages), messages.isEmpty());
|
||||||
|
reader.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue