From a703c6e334ee736bd6c7d255ee5cbcc26154f9a0 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 28 Feb 2020 14:19:56 +0100 Subject: [PATCH] LUCENE-9247: Add tests for `checkIntegrity`. (#1284) This adds a test to `BaseIndexFileFormatTestCase` that the combination of opening a reader and calling `checkIntegrity` on it reads all bytes of all files (including index headers and footers). This would help detect most cases when `checkIntegrity` is not implemented correctly. --- .../CompressingStoredFieldsReader.java | 1 + .../CompressingTermVectorsReader.java | 1 + .../codecs/compressing/FieldsIndex.java | 4 + .../codecs/compressing/FieldsIndexReader.java | 5 + .../compressing/LegacyFieldsIndexReader.java | 5 + .../index/BaseIndexFileFormatTestCase.java | 192 ++++++++++++++++++ 6 files changed, 208 insertions(+) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java index 1af41337582..c6476a509a6 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java @@ -665,6 +665,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader { @Override public void checkIntegrity() throws IOException { + indexReader.checkIntegrity(); CodecUtil.checksumEntireFile(fieldsStream); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java index 4b045f9d113..6b66e24c07f 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java @@ -1109,6 +1109,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem @Override public void checkIntegrity() throws IOException { + indexReader.checkIntegrity(); CodecUtil.checksumEntireFile(vectorsStream); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndex.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndex.java index e86e7491b67..f826774fb08 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndex.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndex.java @@ -17,6 +17,7 @@ package org.apache.lucene.codecs.compressing; import java.io.Closeable; +import java.io.IOException; import org.apache.lucene.util.Accountable; @@ -25,6 +26,9 @@ abstract class FieldsIndex implements Accountable, Cloneable, Closeable { /** Get the start pointer for the block that contains the given docID. */ abstract long getStartPointer(int docID); + /** Check the integrity of the index. */ + abstract void checkIntegrity() throws IOException; + @Override public abstract FieldsIndex clone(); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndexReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndexReader.java index 566c49c19a3..d0e180c17e1 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndexReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndexReader.java @@ -136,4 +136,9 @@ final class FieldsIndexReader extends FieldsIndex { public long getMaxPointer() { return maxPointer; } + + @Override + void checkIntegrity() throws IOException { + CodecUtil.checksumEntireFile(indexInput); + } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/LegacyFieldsIndexReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/LegacyFieldsIndexReader.java index 3e0bc484455..1b04a9a5869 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/LegacyFieldsIndexReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/LegacyFieldsIndexReader.java @@ -216,4 +216,9 @@ final class LegacyFieldsIndexReader extends FieldsIndex { public void close() throws IOException { // nothing to do } + + @Override + void checkIntegrity() throws IOException { + // nothing to do, the index is checked at open time + } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java index a0ad890ae2f..a47b027e5a4 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java @@ -32,6 +32,8 @@ import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.IntConsumer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; @@ -47,19 +49,24 @@ import org.apache.lucene.codecs.StoredFieldsWriter; import org.apache.lucene.codecs.TermVectorsReader; import org.apache.lucene.codecs.TermVectorsWriter; import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat; +import org.apache.lucene.codecs.simpletext.SimpleTextCodec; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.TextField; import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.store.FlushInfo; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CloseableThreadLocal; +import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.LuceneTestCase; @@ -720,4 +727,189 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase { } return r; } + + private static class ReadBytesIndexInputWrapper extends IndexInput { + + private final IndexInput in; + private final IntConsumer readByte; + + ReadBytesIndexInputWrapper(IndexInput in, IntConsumer readByte) { + super(in.toString()); + this.in = in; + this.readByte = readByte; + } + + @Override + public IndexInput clone() { + return new ReadBytesIndexInputWrapper(in.clone(), readByte); + } + + @Override + public void close() throws IOException { + in.close(); + } + + @Override + public long getFilePointer() { + return in.getFilePointer(); + } + + @Override + public void seek(long pos) throws IOException { + in.seek(pos); + } + + @Override + public long length() { + return in.length(); + } + + @Override + public IndexInput slice(String sliceDescription, long offset, long length) throws IOException { + IndexInput slice = in.slice(sliceDescription, offset, length); + return new ReadBytesIndexInputWrapper(slice, o -> readByte.accept(Math.toIntExact(offset + o))); + } + + @Override + public byte readByte() throws IOException { + readByte.accept(Math.toIntExact(getFilePointer())); + return in.readByte(); + } + + @Override + public void readBytes(byte[] b, int offset, int len) throws IOException { + final int fp = Math.toIntExact(getFilePointer()); + for (int i = 0; i < len; ++i) { + readByte.accept(Math.addExact(fp, i)); + } + in.readBytes(b, offset, len); + } + + } + + private static class ReadBytesDirectoryWrapper extends FilterDirectory { + + ReadBytesDirectoryWrapper(Directory in) { + super(in); + } + + private final Map readBytes = new ConcurrentHashMap<>(); + + Map getReadBytes() { + return Collections.unmodifiableMap(new HashMap<>(readBytes)); + } + + @Override + public IndexInput openInput(String name, IOContext context) throws IOException { + IndexInput in = super.openInput(name, context); + final FixedBitSet set = readBytes.computeIfAbsent(name, n -> new FixedBitSet(Math.toIntExact(in.length()))); + if (set.length() != in.length()) { + throw new IllegalStateException(); + } + return new ReadBytesIndexInputWrapper(in, set::set); + } + + @Override + public ChecksumIndexInput openChecksumInput(String name, IOContext context) throws IOException { + ChecksumIndexInput in = super.openChecksumInput(name, context); + final FixedBitSet set = readBytes.computeIfAbsent(name, n -> new FixedBitSet(Math.toIntExact(in.length()))); + if (set.length() != in.length()) { + throw new IllegalStateException(); + } + return new ChecksumIndexInput(in.toString()) { + + @Override + public void readBytes(byte[] b, int offset, int len) throws IOException { + final int fp = Math.toIntExact(getFilePointer()); + set.set(fp, Math.addExact(fp, len)); + in.readBytes(b, offset, len); + } + + @Override + public byte readByte() throws IOException { + set.set(Math.toIntExact(getFilePointer())); + return in.readByte(); + } + + @Override + public IndexInput slice(String sliceDescription, long offset, long length) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public long length() { + return in.length(); + } + + @Override + public long getFilePointer() { + return in.getFilePointer(); + } + + @Override + public void close() throws IOException { + in.close(); + } + + @Override + public long getChecksum() throws IOException { + return in.getChecksum(); + } + }; + } + + @Override + public IndexOutput createOutput(String name, IOContext context) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException { + throw new UnsupportedOperationException(); + } + } + + /** This test is a best effort at verifying that checkIntegrity doesn't miss any files. It tests that the + * combination of opening a reader and calling checkIntegrity on it reads all bytes of all files. */ + public void testCheckIntegrityReadsAllBytes() throws Exception { + assumeFalse("SimpleText doesn't store checksums of its files", getCodec() instanceof SimpleTextCodec); + Directory dir = applyCreatedVersionMajor(newDirectory()); + + IndexWriterConfig cfg = new IndexWriterConfig(new MockAnalyzer(random())); + IndexWriter w = new IndexWriter(dir, cfg); + final int numDocs = atLeast(100); + for (int i = 0; i < numDocs; ++i) { + Document d = new Document(); + addRandomFields(d); + w.addDocument(d); + } + w.forceMerge(1); + w.commit(); + w.close(); + + ReadBytesDirectoryWrapper readBytesWrapperDir = new ReadBytesDirectoryWrapper(dir); + IndexReader reader = DirectoryReader.open(readBytesWrapperDir); + LeafReader leafReader = getOnlyLeafReader(reader); + leafReader.checkIntegrity(); + + Map readBytesMap = readBytesWrapperDir.getReadBytes(); + + Set unreadFiles = new HashSet<>(Arrays.asList(readBytesWrapperDir.listAll())); + unreadFiles.removeAll(readBytesMap.keySet()); + assertTrue("Some files have not been open: " + unreadFiles, unreadFiles.isEmpty()); + + List messages = new ArrayList<>(); + for (Map.Entry entry : readBytesMap.entrySet()) { + String name = entry.getKey(); + FixedBitSet unreadBytes = entry.getValue().clone(); + unreadBytes.flip(0, unreadBytes.length()); + int unread = unreadBytes.nextSetBit(0); + if (unread != Integer.MAX_VALUE) { + messages.add("Offset " + unread + " of file " + name + "(" + unreadBytes.length() + "bytes) was not read."); + } + } + assertTrue(String.join("\n", messages), messages.isEmpty()); + reader.close(); + dir.close(); + } }