diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java index 3bbf0d714e7..bea89eb0791 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java @@ -662,6 +662,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader { @Override public void checkIntegrity() throws IOException { + indexReader.checkIntegrity(); CodecUtil.checksumEntireFile(fieldsStream); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndex.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndex.java index e86e7491b67..f826774fb08 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndex.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndex.java @@ -17,6 +17,7 @@ package org.apache.lucene.codecs.compressing; import java.io.Closeable; +import java.io.IOException; import org.apache.lucene.util.Accountable; @@ -25,6 +26,9 @@ abstract class FieldsIndex implements Accountable, Cloneable, Closeable { /** Get the start pointer for the block that contains the given docID. */ abstract long getStartPointer(int docID); + /** Check the integrity of the index. */ + abstract void checkIntegrity() throws IOException; + @Override public abstract FieldsIndex clone(); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndexReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndexReader.java index ea888de89c8..082fe092fdf 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndexReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndexReader.java @@ -136,4 +136,9 @@ final class FieldsIndexReader extends FieldsIndex { public long getMaxPointer() { return maxPointer; } + + @Override + void checkIntegrity() throws IOException { + CodecUtil.checksumEntireFile(indexInput); + } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/LegacyFieldsIndexReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/LegacyFieldsIndexReader.java index 3e0bc484455..1b04a9a5869 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/LegacyFieldsIndexReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/LegacyFieldsIndexReader.java @@ -216,4 +216,9 @@ final class LegacyFieldsIndexReader extends FieldsIndex { public void close() throws IOException { // nothing to do } + + @Override + void checkIntegrity() throws IOException { + // nothing to do, the index is checked at open time + } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java index a0ad890ae2f..f59a3999f0b 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java @@ -32,6 +32,8 @@ import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.IntConsumer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; @@ -47,19 +49,24 @@ import org.apache.lucene.codecs.StoredFieldsWriter; import org.apache.lucene.codecs.TermVectorsReader; import org.apache.lucene.codecs.TermVectorsWriter; import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat; +import org.apache.lucene.codecs.simpletext.SimpleTextCodec; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.TextField; import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.store.FlushInfo; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CloseableThreadLocal; +import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.LuceneTestCase; @@ -720,4 +727,189 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase { } return r; } + + private static class ReadBytesIndexInputWrapper extends IndexInput { + + private final IndexInput in; + private final IntConsumer readByte; + + ReadBytesIndexInputWrapper(IndexInput in, IntConsumer readByte) { + super(in.toString()); + this.in = in; + this.readByte = readByte; + } + + @Override + public IndexInput clone() { + return new ReadBytesIndexInputWrapper(in.clone(), readByte); + } + + @Override + public void close() throws IOException { + in.close(); + } + + @Override + public long getFilePointer() { + return in.getFilePointer(); + } + + @Override + public void seek(long pos) throws IOException { + in.seek(pos); + } + + @Override + public long length() { + return in.length(); + } + + @Override + public IndexInput slice(String sliceDescription, long offset, long length) throws IOException { + IndexInput slice = in.slice(sliceDescription, offset, length); + return new ReadBytesIndexInputWrapper(slice, o -> readByte.accept(Math.toIntExact(offset + o))); + } + + @Override + public byte readByte() throws IOException { + readByte.accept(Math.toIntExact(getFilePointer())); + return in.readByte(); + } + + @Override + public void readBytes(byte[] b, int offset, int len) throws IOException { + final int fp = Math.toIntExact(getFilePointer()); + for (int i = 0; i < len; ++i) { + readByte.accept(Math.addExact(fp, i)); + } + in.readBytes(b, offset, len); + } + + } + + public static class ReadBytesDirectoryWrapper extends FilterDirectory { + + ReadBytesDirectoryWrapper(Directory in) { + super(in); + } + + private final Map readBytes = new ConcurrentHashMap<>(); + + Map getReadBytes() { + return Map.copyOf(readBytes); + } + + @Override + public IndexInput openInput(String name, IOContext context) throws IOException { + IndexInput in = super.openInput(name, context); + final FixedBitSet set = readBytes.computeIfAbsent(name, n -> new FixedBitSet(Math.toIntExact(in.length()))); + if (set.length() != in.length()) { + throw new IllegalStateException(); + } + return new ReadBytesIndexInputWrapper(in, set::set); + } + + @Override + public ChecksumIndexInput openChecksumInput(String name, IOContext context) throws IOException { + ChecksumIndexInput in = super.openChecksumInput(name, context); + final FixedBitSet set = readBytes.computeIfAbsent(name, n -> new FixedBitSet(Math.toIntExact(in.length()))); + if (set.length() != in.length()) { + throw new IllegalStateException(); + } + return new ChecksumIndexInput(in.toString()) { + + @Override + public void readBytes(byte[] b, int offset, int len) throws IOException { + final int fp = Math.toIntExact(getFilePointer()); + set.set(fp, Math.addExact(fp, len)); + in.readBytes(b, offset, len); + } + + @Override + public byte readByte() throws IOException { + set.set(Math.toIntExact(getFilePointer())); + return in.readByte(); + } + + @Override + public IndexInput slice(String sliceDescription, long offset, long length) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public long length() { + return in.length(); + } + + @Override + public long getFilePointer() { + return in.getFilePointer(); + } + + @Override + public void close() throws IOException { + in.close(); + } + + @Override + public long getChecksum() throws IOException { + return in.getChecksum(); + } + }; + } + + @Override + public IndexOutput createOutput(String name, IOContext context) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException { + throw new UnsupportedOperationException(); + } + } + + /** This test is a best effort at verifying that checkIntegrity doesn't miss any files. It tests that the + * combination of opening a reader and calling checkIntegrity on it reads all bytes of all files. */ + public void testCheckIntegrityReadsAllBytes() throws Exception { + assumeFalse("SimpleText doesn't store checksums of its files", getCodec() instanceof SimpleTextCodec); + Directory dir = applyCreatedVersionMajor(newDirectory()); + + IndexWriterConfig cfg = new IndexWriterConfig(new MockAnalyzer(random())); + IndexWriter w = new IndexWriter(dir, cfg); + final int numDocs = atLeast(100); + for (int i = 0; i < numDocs; ++i) { + Document d = new Document(); + addRandomFields(d); + w.addDocument(d); + } + w.forceMerge(1); + w.commit(); + w.close(); + + ReadBytesDirectoryWrapper readBytesWrapperDir = new ReadBytesDirectoryWrapper(dir); + IndexReader reader = DirectoryReader.open(readBytesWrapperDir); + LeafReader leafReader = getOnlyLeafReader(reader); + leafReader.checkIntegrity(); + + Map readBytesMap = readBytesWrapperDir.getReadBytes(); + + Set unreadFiles = new HashSet<>(Arrays.asList(readBytesWrapperDir.listAll())); + unreadFiles.removeAll(readBytesMap.keySet()); + assertTrue("Some files have not been open: " + unreadFiles, unreadFiles.isEmpty()); + + List messages = new ArrayList<>(); + for (Map.Entry entry : readBytesMap.entrySet()) { + String name = entry.getKey(); + FixedBitSet unreadBytes = entry.getValue().clone(); + unreadBytes.flip(0, unreadBytes.length()); + int unread = unreadBytes.nextSetBit(0); + if (unread != Integer.MAX_VALUE) { + messages.add("Offset " + unread + " of file " + name + "(" + unreadBytes.length() + "bytes) was not read."); + } + } + assertTrue(String.join("\n", messages), messages.isEmpty()); + reader.close(); + dir.close(); + } }