From cd984e2dc051d6b9415aa411fbd4332b25ddfbd8 Mon Sep 17 00:00:00 2001
From: Adrien Grand <jpountz@gmail.com>
Date: Fri, 28 Feb 2020 14:19:56 +0100
Subject: [PATCH] LUCENE-9247: Add tests for `checkIntegrity`. (#1284)

This adds a test to `BaseIndexFileFormatTestCase` that the combination
of opening a reader and calling `checkIntegrity` on it reads all bytes
of all files (including index headers and footers). This would help
detect most cases when `checkIntegrity` is not implemented correctly.
---
 .../CompressingStoredFieldsReader.java        |   1 +
 .../codecs/compressing/FieldsIndex.java       |   4 +
 .../codecs/compressing/FieldsIndexReader.java |   5 +
 .../compressing/LegacyFieldsIndexReader.java  |   5 +
 .../index/BaseIndexFileFormatTestCase.java    | 192 ++++++++++++++++++
 5 files changed, 207 insertions(+)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
index 3bbf0d714e7..bea89eb0791 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
@@ -662,6 +662,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
 
   @Override
   public void checkIntegrity() throws IOException {
+    indexReader.checkIntegrity();
     CodecUtil.checksumEntireFile(fieldsStream);
   }
 
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndex.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndex.java
index e86e7491b67..f826774fb08 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndex.java
@@ -17,6 +17,7 @@
 package org.apache.lucene.codecs.compressing;
 
 import java.io.Closeable;
+import java.io.IOException;
 
 import org.apache.lucene.util.Accountable;
 
@@ -25,6 +26,9 @@ abstract class FieldsIndex implements Accountable, Cloneable, Closeable {
   /** Get the start pointer for the block that contains the given docID. */
   abstract long getStartPointer(int docID);
 
+  /** Check the integrity of the index. */
+  abstract void checkIntegrity() throws IOException;
+
   @Override
   public abstract FieldsIndex clone();
 
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndexReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndexReader.java
index ea888de89c8..082fe092fdf 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndexReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/FieldsIndexReader.java
@@ -136,4 +136,9 @@ final class FieldsIndexReader extends FieldsIndex {
   public long getMaxPointer() {
     return maxPointer;
   }
+
+  @Override
+  void checkIntegrity() throws IOException {
+    CodecUtil.checksumEntireFile(indexInput);
+  }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/LegacyFieldsIndexReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/LegacyFieldsIndexReader.java
index 3e0bc484455..1b04a9a5869 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/LegacyFieldsIndexReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/LegacyFieldsIndexReader.java
@@ -216,4 +216,9 @@ final class LegacyFieldsIndexReader extends FieldsIndex {
   public void close() throws IOException {
     // nothing to do
   }
+
+  @Override
+  void checkIntegrity() throws IOException {
+    // nothing to do, the index is checked at open time
+  }
 }
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java
index a0ad890ae2f..f59a3999f0b 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java
@@ -32,6 +32,8 @@ import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.function.IntConsumer;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
@@ -47,19 +49,24 @@ import org.apache.lucene.codecs.StoredFieldsWriter;
 import org.apache.lucene.codecs.TermVectorsReader;
 import org.apache.lucene.codecs.TermVectorsWriter;
 import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
+import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FilterDirectory;
 import org.apache.lucene.store.FlushInfo;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.MockDirectoryWrapper;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CloseableThreadLocal;
+import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.InfoStream;
 import org.apache.lucene.util.LuceneTestCase;
@@ -720,4 +727,189 @@ abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
     }
     return r;
   }
+
+  private static class ReadBytesIndexInputWrapper extends IndexInput {
+
+    private final IndexInput in;
+    private final IntConsumer readByte;
+
+    ReadBytesIndexInputWrapper(IndexInput in, IntConsumer readByte) {
+      super(in.toString());
+      this.in = in;
+      this.readByte = readByte;
+    }
+
+    @Override
+    public IndexInput clone() {
+      return new ReadBytesIndexInputWrapper(in.clone(), readByte);
+    }
+
+    @Override
+    public void close() throws IOException {
+      in.close();
+    }
+
+    @Override
+    public long getFilePointer() {
+      return in.getFilePointer();
+    }
+
+    @Override
+    public void seek(long pos) throws IOException {
+      in.seek(pos);
+    }
+
+    @Override
+    public long length() {
+      return in.length();
+    }
+
+    @Override
+    public IndexInput slice(String sliceDescription, long offset, long length) throws IOException {
+      IndexInput slice = in.slice(sliceDescription, offset, length);
+      return new ReadBytesIndexInputWrapper(slice, o -> readByte.accept(Math.toIntExact(offset + o)));
+    }
+
+    @Override
+    public byte readByte() throws IOException {
+      readByte.accept(Math.toIntExact(getFilePointer()));
+      return in.readByte();
+    }
+
+    @Override
+    public void readBytes(byte[] b, int offset, int len) throws IOException {
+      final int fp = Math.toIntExact(getFilePointer());
+      for (int i = 0; i < len; ++i) {
+        readByte.accept(Math.addExact(fp, i));
+      }
+      in.readBytes(b, offset, len);
+    }
+
+  }
+
+  public static class ReadBytesDirectoryWrapper extends FilterDirectory {
+
+    ReadBytesDirectoryWrapper(Directory in) {
+      super(in);
+    }
+
+    private final Map<String, FixedBitSet> readBytes = new ConcurrentHashMap<>();
+
+    Map<String, FixedBitSet> getReadBytes() {
+      return Map.copyOf(readBytes);
+    }
+
+    @Override
+    public IndexInput openInput(String name, IOContext context) throws IOException {
+      IndexInput in = super.openInput(name, context);
+      final FixedBitSet set = readBytes.computeIfAbsent(name, n -> new FixedBitSet(Math.toIntExact(in.length())));
+      if (set.length() != in.length()) {
+        throw new IllegalStateException();
+      }
+      return new ReadBytesIndexInputWrapper(in, set::set);
+    }
+
+    @Override
+    public ChecksumIndexInput openChecksumInput(String name, IOContext context) throws IOException {
+      ChecksumIndexInput in = super.openChecksumInput(name, context);
+      final FixedBitSet set = readBytes.computeIfAbsent(name, n -> new FixedBitSet(Math.toIntExact(in.length())));
+      if (set.length() != in.length()) {
+        throw new IllegalStateException();
+      }
+      return new ChecksumIndexInput(in.toString()) {
+
+        @Override
+        public void readBytes(byte[] b, int offset, int len) throws IOException {
+          final int fp = Math.toIntExact(getFilePointer());
+          set.set(fp, Math.addExact(fp, len));
+          in.readBytes(b, offset, len);
+        }
+
+        @Override
+        public byte readByte() throws IOException {
+          set.set(Math.toIntExact(getFilePointer()));
+          return in.readByte();
+        }
+
+        @Override
+        public IndexInput slice(String sliceDescription, long offset, long length) throws IOException {
+          throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public long length() {
+          return in.length();
+        }
+
+        @Override
+        public long getFilePointer() {
+          return in.getFilePointer();
+        }
+
+        @Override
+        public void close() throws IOException {
+          in.close();
+        }
+
+        @Override
+        public long getChecksum() throws IOException {
+          return in.getChecksum();
+        }
+      };
+    }
+
+    @Override
+    public IndexOutput createOutput(String name, IOContext context) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  /** This test is a best effort at verifying that checkIntegrity doesn't miss any files. It tests that the
+   *  combination of opening a reader and calling checkIntegrity on it reads all bytes of all files. */
+  public void testCheckIntegrityReadsAllBytes() throws Exception {
+    assumeFalse("SimpleText doesn't store checksums of its files", getCodec() instanceof SimpleTextCodec);
+    Directory dir = applyCreatedVersionMajor(newDirectory());
+
+    IndexWriterConfig cfg = new IndexWriterConfig(new MockAnalyzer(random()));
+    IndexWriter w = new IndexWriter(dir, cfg);
+    final int numDocs = atLeast(100);
+    for (int i = 0; i < numDocs; ++i) {
+      Document d = new Document();
+      addRandomFields(d);
+      w.addDocument(d);
+    }
+    w.forceMerge(1);
+    w.commit();
+    w.close();
+
+    ReadBytesDirectoryWrapper readBytesWrapperDir = new ReadBytesDirectoryWrapper(dir);
+    IndexReader reader = DirectoryReader.open(readBytesWrapperDir);
+    LeafReader leafReader = getOnlyLeafReader(reader);
+    leafReader.checkIntegrity();
+
+    Map<String, FixedBitSet> readBytesMap = readBytesWrapperDir.getReadBytes();
+
+    Set<String> unreadFiles = new HashSet<>(Arrays.asList(readBytesWrapperDir.listAll()));
+    unreadFiles.removeAll(readBytesMap.keySet());
+    assertTrue("Some files have not been open: " + unreadFiles, unreadFiles.isEmpty());
+
+    List<String> messages = new ArrayList<>();
+    for (Map.Entry<String, FixedBitSet> entry : readBytesMap.entrySet()) {
+      String name = entry.getKey();
+      FixedBitSet unreadBytes = entry.getValue().clone();
+      unreadBytes.flip(0, unreadBytes.length());
+      int unread = unreadBytes.nextSetBit(0);
+      if (unread != Integer.MAX_VALUE) {
+        messages.add("Offset " + unread + " of file " + name + "(" + unreadBytes.length() + "bytes) was not read.");
+      }
+    }
+    assertTrue(String.join("\n", messages), messages.isEmpty());
+    reader.close();
+    dir.close();
+  }
 }