LUCENE-7323: compound file writing now verifies checksum and segment ID for the incoming sub-files, to catch hardware issues or filesystem bugs earlier

2016-06-09 14:48:58 -04:00 · 2016-06-09 14:48:58 -04:00 · 067fb25e43
parent 7e86ba8c73
commit 067fb25e43
19 changed files with 231 additions and 101 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -12,6 +12,12 @@ Bug Fixes

 * LUCENE-6662: Fixed potential resource leaks. (Rishabh Patel via Adrien Grand)

+Improvements
+
+* LUCENE-7323: Compound file writing now verifies the incoming
+  sub-files' checkums and segment IDs, to catch hardware issues or
+  filesytem bugs earlier (Robert Muir, Mike McCandless)
+
 Other

 * LUCENE-4787: Fixed some highlighting javadocs. (Michael Dodsworth via Adrien
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java
@ -122,7 +122,7 @@ import org.apache.lucene.index.SegmentWriteState;
 *  and saving the offset/etc for each field. 
 *  @lucene.experimental
 */
-public class SimpleTextDocValuesFormat extends DocValuesFormat {
+class SimpleTextDocValuesFormat extends DocValuesFormat {
  
  public SimpleTextDocValuesFormat() {
    super("SimpleText");
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPostingsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPostingsFormat.java
@ -34,7 +34,7 @@ import org.apache.lucene.index.SegmentWriteState;
 *  any text editor, and even edit it to alter your index.
 *
 *  @lucene.experimental */
-public final class SimpleTextPostingsFormat extends PostingsFormat {
+final class SimpleTextPostingsFormat extends PostingsFormat {
  
  public SimpleTextPostingsFormat() {
    super("SimpleText");
--- a/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
+++ b/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
@ -15,4 +15,3 @@

 org.apache.lucene.codecs.memory.MemoryDocValuesFormat
 org.apache.lucene.codecs.memory.DirectDocValuesFormat
-org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat
--- a/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
+++ b/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
@ -19,5 +19,4 @@ org.apache.lucene.codecs.memory.DirectPostingsFormat
 org.apache.lucene.codecs.memory.FSTOrdPostingsFormat
 org.apache.lucene.codecs.memory.FSTPostingsFormat
 org.apache.lucene.codecs.memory.MemoryPostingsFormat
-org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat
 org.apache.lucene.codecs.autoprefix.AutoPrefixPostingsFormat
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextCompoundFormat.java
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextCompoundFormat.java
@ -27,4 +27,14 @@ public class TestSimpleTextCompoundFormat extends BaseCompoundFormatTestCase {
  protected Codec getCodec() {
    return codec;
  }
+
+  @Override
+  public void testCorruptFilesAreCaught() {
+    // SimpleText does not catch broken sub-files in CFS!
+  }
+
+  @Override
+  public void testMissingCodecHeadersAreCaught() {
+    // SimpleText does not catch broken sub-files in CFS!
+  }
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
@ -258,6 +258,57 @@ public final class CodecUtil {
    return version;
  }

+  /**
+   * Expert: verifies the incoming {@link IndexInput} has an index header
+   * and that its segment ID matches the expected one, and then copies
+   * that index header into the provided {@link DataOutput}.  This is
+   * useful when building compound files.
+   *
+   * @param in Input stream, positioned at the point where the
+   *        index header was previously written. Typically this is located
+   *        at the beginning of the file.
+   * @param out Output stream, where the header will be copied to.
+   * @param expectedID Expected segment ID
+   * @throws CorruptIndexException If the first four bytes are not
+   *         {@link #CODEC_MAGIC}, or if the <code>expectedID</code>
+   *         does not match.
+   * @throws IOException If there is an I/O error reading from the underlying medium.
+   *
+   * @lucene.internal 
+   */
+  public static void verifyAndCopyIndexHeader(IndexInput in, DataOutput out, byte[] expectedID) throws IOException {
+    // make sure it's large enough to have a header and footer
+    if (in.length() < footerLength() + headerLength("")) {
+      throw new CorruptIndexException("compound sub-files must have a valid codec header and footer: file is too small (" + in.length() + " bytes)", in);
+    }
+
+    int actualHeader = in.readInt();
+    if (actualHeader != CODEC_MAGIC) {
+      throw new CorruptIndexException("compound sub-files must have a valid codec header and footer: codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CodecUtil.CODEC_MAGIC, in);
+    }
+
+    // we can't verify these, so we pass-through:
+    String codec = in.readString();
+    int version = in.readInt();
+
+    // verify id:
+    checkIndexHeaderID(in, expectedID);
+
+    // we can't verify extension either, so we pass-through:
+    int suffixLength = in.readByte() & 0xFF;
+    byte[] suffixBytes = new byte[suffixLength];
+    in.readBytes(suffixBytes, 0, suffixLength);
+
+    // now write the header we just verified
+    out.writeInt(CodecUtil.CODEC_MAGIC);
+    out.writeString(codec);
+    out.writeInt(version);
+    out.writeBytes(expectedID, 0, expectedID.length);
+    out.writeByte((byte) suffixLength);
+    out.writeBytes(suffixBytes, 0, suffixLength);
+  }
+
+
  /** Retrieves the full index header from the provided {@link IndexInput}.
   *  This throws {@link CorruptIndexException} if this file does
   * not appear to be an index file. */
@ -474,7 +525,7 @@ public final class CodecUtil {
   * @throws CorruptIndexException if CRC is formatted incorrectly (wrong bits set)
   * @throws IOException if an i/o error occurs
   */
-  public static long readCRC(IndexInput input) throws IOException {
+  static long readCRC(IndexInput input) throws IOException {
    long value = input.readLong();
    if ((value & 0xFFFFFFFF00000000L) != 0) {
      throw new CorruptIndexException("Illegal CRC-32 checksum: " + value, input);
@ -487,7 +538,7 @@ public final class CodecUtil {
   * @throws IllegalStateException if CRC is formatted incorrectly (wrong bits set)
   * @throws IOException if an i/o error occurs
   */
-  public static void writeCRC(IndexOutput output) throws IOException {
+  static void writeCRC(IndexOutput output) throws IOException {
    long value = output.getChecksum();
    if ((value & 0xFFFFFFFF00000000L) != 0) {
      throw new IllegalStateException("Illegal CRC-32 checksum: " + value + " (resource=" + output + ")");
--- a/lucene/core/src/java/org/apache/lucene/codecs/CompoundFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/CompoundFormat.java
@ -43,7 +43,9 @@ public abstract class CompoundFormat {
  public abstract Directory getCompoundReader(Directory dir, SegmentInfo si, IOContext context) throws IOException;
  
  /**
-   * Packs the provided segment's files into a compound format.
+   * Packs the provided segment's files into a compound format.  All files referenced
+   * by the provided {@link SegmentInfo} must have {@link CodecUtil#writeIndexHeader}
+   * and {@link CodecUtil#writeFooter}.
   */
  public abstract void write(Directory dir, SegmentInfo si, IOContext context) throws IOException;
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundFormat.java
@ -18,17 +18,17 @@ package org.apache.lucene.codecs.lucene50;


 import java.io.IOException;
-import java.util.Collection;

 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.CompoundFormat;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.StringHelper;

 /**
 * Lucene 5.0 compound file format
@ -76,6 +76,9 @@ public final class Lucene50CompoundFormat extends CompoundFormat {
    String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION);
    String entriesFile = IndexFileNames.segmentFileName(si.name, "", ENTRIES_EXTENSION);
    
+    byte[] expectedID = si.getId();
+    byte[] id = new byte[StringHelper.ID_LENGTH];
+
    try (IndexOutput data =    dir.createOutput(dataFile, context);
         IndexOutput entries = dir.createOutput(entriesFile, context)) {
      CodecUtil.writeIndexHeader(data,    DATA_CODEC, VERSION_CURRENT, si.getId(), "");
@ -87,8 +90,23 @@ public final class Lucene50CompoundFormat extends CompoundFormat {
        
        // write bytes for file
        long startOffset = data.getFilePointer();
-        try (IndexInput in = dir.openInput(file, IOContext.READONCE)) {
-          data.copyBytes(in, in.length());
+        try (ChecksumIndexInput in = dir.openChecksumInput(file, IOContext.READONCE)) {
+
+          // just copies the index header, verifying that its id matches what we expect
+          CodecUtil.verifyAndCopyIndexHeader(in, data, si.getId());
+          
+          // copy all bytes except the footer
+          long numBytesToCopy = in.length() - CodecUtil.footerLength() - in.getFilePointer();
+          data.copyBytes(in, numBytesToCopy);
+
+          // verify footer (checksum) matches for the incoming file we are copying
+          long checksum = CodecUtil.checkFooter(in);
+
+          // this is poached from CodecUtil.writeFooter, but we need to use our own checksum, not data.getChecksum(), but I think
+          // adding a public method to CodecUtil to do that is somewhat dangerous:
+          data.writeInt(CodecUtil.FOOTER_MAGIC);
+          data.writeInt(0);
+          data.writeLong(checksum);
        }
        long endOffset = data.getFilePointer();
        
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java
@ -100,7 +100,7 @@ final class Lucene50CompoundReader extends Directory {
  }

  /** Helper method that reads CFS entries from an input stream */
-  private final Map<String, FileEntry> readEntries(byte[] segmentID, Directory dir, String entriesFileName) throws IOException {
+  private Map<String, FileEntry> readEntries(byte[] segmentID, Directory dir, String entriesFileName) throws IOException {
    Map<String,FileEntry> mapping = null;
    try (ChecksumIndexInput entriesStream = dir.openChecksumInput(entriesFileName, IOContext.READONCE)) {
      Throwable priorE = null;
@ -140,7 +140,8 @@ final class Lucene50CompoundReader extends Directory {
    final String id = IndexFileNames.stripSegmentName(name);
    final FileEntry entry = entries.get(id);
    if (entry == null) {
-      throw new FileNotFoundException("No sub-file with id " + id + " found (fileName=" + name + " files: " + entries.keySet() + ")");
+      String datFileName = IndexFileNames.segmentFileName(segmentName, "", Lucene50CompoundFormat.DATA_EXTENSION);
+      throw new FileNotFoundException("No sub-file with id " + id + " found in compound file \"" + datFileName + "\" (fileName=" + name + " files: " + entries.keySet() + ")");
    }
    return handle.slice(name, entry.offset, entry.length);
  }
--- a/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java
@ -14,13 +14,14 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.apache.lucene.index;

+package org.apache.lucene.codecs;

 import java.io.IOException;
 import java.util.concurrent.atomic.AtomicLong;

 import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.store.BufferedChecksumIndexInput;
 import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.IndexInput;
--- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java
@ -79,7 +79,7 @@ public class TestPerFieldDocValuesFormat extends BaseDocValuesFormatTestCase {
    // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    final DocValuesFormat fast = TestUtil.getDefaultDocValuesFormat();
-    final DocValuesFormat slow = DocValuesFormat.forName("SimpleText");
+    final DocValuesFormat slow = DocValuesFormat.forName("Memory");
    iwc.setCodec(new AssertingCodec() {
      @Override
      public DocValuesFormat getDocValuesFormatForField(String field) {
--- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
@ -24,8 +24,8 @@ import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.asserting.AssertingCodec;
 import org.apache.lucene.codecs.blockterms.LuceneVarGapFixedInterval;
+import org.apache.lucene.codecs.memory.DirectPostingsFormat;
 import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
-import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
@ -33,8 +33,8 @@ import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.LogDocMergePolicy;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
@ -202,13 +202,13 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {

  public static class MockCodec extends AssertingCodec {
    final PostingsFormat luceneDefault = TestUtil.getDefaultPostingsFormat();
-    final PostingsFormat simpleText = new SimpleTextPostingsFormat();
+    final PostingsFormat direct = new DirectPostingsFormat();
    final PostingsFormat memory = new MemoryPostingsFormat();
    
    @Override
    public PostingsFormat getPostingsFormatForField(String field) {
      if (field.equals("id")) {
-        return simpleText;
+        return direct;
      } else if (field.equals("content")) {
        return memory;
      } else {
@ -219,12 +219,12 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {

  public static class MockCodec2 extends AssertingCodec {
    final PostingsFormat luceneDefault = TestUtil.getDefaultPostingsFormat();
-    final PostingsFormat simpleText = new SimpleTextPostingsFormat();
+    final PostingsFormat direct = new DirectPostingsFormat();
    
    @Override
    public PostingsFormat getPostingsFormatForField(String field) {
      if (field.equals("id")) {
-        return simpleText;
+        return direct;
      } else {
        return luceneDefault;
      }
--- a/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java
@ -1086,14 +1086,14 @@ public class TestAddIndexes extends LuceneTestCase {
  }

  private static final class CustomPerFieldCodec extends AssertingCodec {
-    private final PostingsFormat simpleTextFormat = PostingsFormat.forName("SimpleText");
+    private final PostingsFormat directFormat = PostingsFormat.forName("Direct");
    private final PostingsFormat defaultFormat = TestUtil.getDefaultPostingsFormat();
    private final PostingsFormat memoryFormat = PostingsFormat.forName("Memory");

    @Override
    public PostingsFormat getPostingsFormatForField(String field) {
      if (field.equals("id")) {
-        return simpleTextFormat;
+        return directFormat;
      } else if (field.equals("content")) {
        return memoryFormat;
      } else {
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
@ -19,6 +19,7 @@ package org.apache.lucene.codecs.mockrandom;
 import java.io.IOException;
 import java.util.Random;

+import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.FieldsConsumer;
 import org.apache.lucene.codecs.FieldsProducer;
 import org.apache.lucene.codecs.PostingsFormat;
@ -47,6 +48,7 @@ import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.BytesRef;
@ -107,11 +109,10 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
    }

    final String seedFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEED_EXT);
-    final IndexOutput out = state.directory.createOutput(seedFileName, state.context);
-    try {
+    try(IndexOutput out = state.directory.createOutput(seedFileName, state.context)) {
+      CodecUtil.writeIndexHeader(out, "MockRandomSeed", 0, state.segmentInfo.getId(), state.segmentSuffix);
      out.writeLong(seed);
-    } finally {
-      out.close();
+      CodecUtil.writeFooter(out);
    }

    final Random random = new Random(seed);
@ -267,8 +268,10 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
  public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {

    final String seedFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEED_EXT);
-    final IndexInput in = state.directory.openInput(seedFileName, state.context);
+    final ChecksumIndexInput in = state.directory.openChecksumInput(seedFileName, state.context);
+    CodecUtil.checkIndexHeader(in, "MockRandomSeed", 0, 0, state.segmentInfo.getId(), state.segmentSuffix);
    final long seed = in.readLong();
+    CodecUtil.checkFooter(in);
    if (LuceneTestCase.VERBOSE) {
      System.out.println("MockRandomCodec: reading from seg=" + state.segmentInfo.name + " formatID=" + state.segmentSuffix + " seed=" + seed);
    }
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java
@ -25,6 +25,7 @@ import java.util.List;
 import java.util.Random;

 import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.StoredField;
@ -72,9 +73,9 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
    for (int i=0; i<data.length; i++) {
      String testfile = "_" + i + ".test";
      Directory dir = newDirectory();
-      createSequenceFile(dir, testfile, (byte) 0, data[i]);
-      
      SegmentInfo si = newSegmentInfo(dir, "_" + i);
+      createSequenceFile(dir, testfile, (byte) 0, data[i], si.getId(), "suffix");
+      
      si.setFiles(Collections.singleton(testfile));
      si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
      Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
@ -96,10 +97,10 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
  public void testTwoFiles() throws IOException {
    String files[] = { "_123.d1", "_123.d2" };
    Directory dir = newDirectory();
-    createSequenceFile(dir, files[0], (byte) 0, 15);
-    createSequenceFile(dir, files[1], (byte) 0, 114);
-    
    SegmentInfo si = newSegmentInfo(dir, "_123");
+    createSequenceFile(dir, files[0], (byte) 0, 15, si.getId(), "suffix");
+    createSequenceFile(dir, files[1], (byte) 0, 114, si.getId(), "suffix");
+    
    si.setFiles(Arrays.asList(files));
    si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
    Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
@ -122,11 +123,13 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
    final String testfile = "_123.test";

    Directory dir = newDirectory();
-    IndexOutput out = dir.createOutput(testfile, IOContext.DEFAULT);
-    out.writeInt(3);
-    out.close();
-    
    SegmentInfo si = newSegmentInfo(dir, "_123");
+    try (IndexOutput out = dir.createOutput(testfile, IOContext.DEFAULT)) {
+      CodecUtil.writeIndexHeader(out, "Foo", 0, si.getId(), "suffix");
+      out.writeInt(3);
+      CodecUtil.writeFooter(out);
+    }
+    
    si.setFiles(Collections.singleton(testfile));
    si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
    Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
@ -148,11 +151,13 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
        return super.createOutput(name, context);
      }
    };
-    IndexOutput out = dir.createOutput(testfile, myContext);
-    out.writeInt(3);
-    out.close();
-    
    SegmentInfo si = newSegmentInfo(dir, "_123");
+    try (IndexOutput out = dir.createOutput(testfile, myContext)) {
+      CodecUtil.writeIndexHeader(out, "Foo", 0, si.getId(), "suffix");
+      out.writeInt(3);
+      CodecUtil.writeFooter(out);
+    }
+    
    si.setFiles(Collections.singleton(testfile));
    si.getCodec().compoundFormat().write(dir, si, myContext);
    dir.close();
@ -165,14 +170,16 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest

    Directory dir = new NRTCachingDirectory(newFSDirectory(createTempDir()), 2.0, 25.0);

-    IndexOutput out = dir.createOutput(testfile, context);
-    byte[] bytes = new byte[512];
-    for(int i=0;i<1024*1024;i++) {
-      out.writeBytes(bytes, 0, bytes.length);
-    }
-    out.close();
-    
    SegmentInfo si = newSegmentInfo(dir, "_123");
+    try (IndexOutput out = dir.createOutput(testfile, context)) {
+      CodecUtil.writeIndexHeader(out, "Foo", 0, si.getId(), "suffix");
+      byte[] bytes = new byte[512];
+      for(int i=0;i<1024*1024;i++) {
+        out.writeBytes(bytes, 0, bytes.length);
+      }
+      CodecUtil.writeFooter(out);
+    }
+    
    si.setFiles(Collections.singleton(testfile));
    si.getCodec().compoundFormat().write(dir, si, context);

@ -326,17 +333,19 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
    // Setup the test segment
    String segment = "_123";
    int chunk = 1024; // internal buffer size used by the stream
-    createRandomFile(dir, segment + ".zero", 0);
-    createRandomFile(dir, segment + ".one", 1);
-    createRandomFile(dir, segment + ".ten", 10);
-    createRandomFile(dir, segment + ".hundred", 100);
-    createRandomFile(dir, segment + ".big1", chunk);
-    createRandomFile(dir, segment + ".big2", chunk - 1);
-    createRandomFile(dir, segment + ".big3", chunk + 1);
-    createRandomFile(dir, segment + ".big4", 3 * chunk);
-    createRandomFile(dir, segment + ".big5", 3 * chunk - 1);
-    createRandomFile(dir, segment + ".big6", 3 * chunk + 1);
-    createRandomFile(dir, segment + ".big7", 1000 * chunk);
+    SegmentInfo si = newSegmentInfo(dir, "_123");
+    byte[] segId = si.getId();
+    createRandomFile(dir, segment + ".zero", 0, segId);
+    createRandomFile(dir, segment + ".one", 1, segId);
+    createRandomFile(dir, segment + ".ten", 10, segId);
+    createRandomFile(dir, segment + ".hundred", 100, segId);
+    createRandomFile(dir, segment + ".big1", chunk, segId);
+    createRandomFile(dir, segment + ".big2", chunk - 1, segId);
+    createRandomFile(dir, segment + ".big3", chunk + 1, segId);
+    createRandomFile(dir, segment + ".big4", 3 * chunk, segId);
+    createRandomFile(dir, segment + ".big5", 3 * chunk - 1, segId);
+    createRandomFile(dir, segment + ".big6", 3 * chunk + 1, segId);
+    createRandomFile(dir, segment + ".big7", 1000 * chunk, segId);
    
    List<String> files = new ArrayList<>();
    for (String file : dir.listAll()) {
@ -345,7 +354,6 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
      }
    }
    
-    SegmentInfo si = newSegmentInfo(dir, "_123");
    si.setFiles(files);
    si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
    Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
@ -370,17 +378,19 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
    final int FILE_COUNT = atLeast(500);
    
    List<String> files = new ArrayList<>();
+    SegmentInfo si = newSegmentInfo(dir, "_123");
    for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) {
      String file = "_123." + fileIdx;
      files.add(file);
-      IndexOutput out = dir.createOutput(file, newIOContext(random()));
-      out.writeByte((byte) fileIdx);
-      out.close();
+      try (IndexOutput out = dir.createOutput(file, newIOContext(random()))) {
+        CodecUtil.writeIndexHeader(out, "Foo", 0, si.getId(), "suffix");
+        out.writeByte((byte) fileIdx);
+        CodecUtil.writeFooter(out);
+      }
    }
    
    assertEquals(0, dir.getFileHandleCount());
    
-    SegmentInfo si = newSegmentInfo(dir, "_123");
    si.setFiles(files);
    si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
    Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
@ -388,6 +398,7 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
    final IndexInput[] ins = new IndexInput[FILE_COUNT];
    for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) {
      ins[fileIdx] = cfs.openInput("_123." + fileIdx, newIOContext(random()));
+      CodecUtil.checkIndexHeader(ins[fileIdx], "Foo", 0, 0, si.getId(), "suffix");
    }
    
    assertEquals(1, dir.getFileHandleCount());
@ -631,27 +642,31 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
  }
  
  /** Creates a file of the specified size with random data. */
-  protected static void createRandomFile(Directory dir, String name, int size) throws IOException {
-    IndexOutput os = dir.createOutput(name, newIOContext(random()));
+  protected static void createRandomFile(Directory dir, String name, int size, byte[] segId) throws IOException {
    Random rnd = random();
-    for (int i=0; i<size; i++) {
-      byte b = (byte) rnd.nextInt(256);
-      os.writeByte(b);
+    try (IndexOutput os = dir.createOutput(name, newIOContext(random()))) {
+      CodecUtil.writeIndexHeader(os, "Foo", 0, segId, "suffix");
+      for (int i=0; i<size; i++) {
+        byte b = (byte) rnd.nextInt(256);
+        os.writeByte(b);
+      }
+      CodecUtil.writeFooter(os);
    }
-    os.close();
  }
  
  /** Creates a file of the specified size with sequential data. The first
   *  byte is written as the start byte provided. All subsequent bytes are
   *  computed as start + offset where offset is the number of the byte.
   */
-  protected static void createSequenceFile(Directory dir, String name, byte start, int size) throws IOException {
-    IndexOutput os = dir.createOutput(name, newIOContext(random()));
-    for (int i=0; i < size; i++) {
-      os.writeByte(start);
-      start ++;
+  protected static void createSequenceFile(Directory dir, String name, byte start, int size, byte[] segID, String segSuffix) throws IOException {
+    try (IndexOutput os = dir.createOutput(name, newIOContext(random()))) {
+      CodecUtil.writeIndexHeader(os, "Foo", 0, segID, segSuffix);
+      for (int i=0; i < size; i++) {
+        os.writeByte(start);
+        start ++;
+      }
+      CodecUtil.writeFooter(os);
    }
-    os.close();
  }
  
  protected static void assertSameStreams(String msg, IndexInput expected, IndexInput test) throws IOException {
@ -724,12 +739,12 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
   */
  protected static Directory createLargeCFS(Directory dir) throws IOException {
    List<String> files = new ArrayList<>();
+    SegmentInfo si = newSegmentInfo(dir, "_123");
    for (int i = 0; i < 20; i++) {
-      createSequenceFile(dir, "_123.f" + i, (byte) 0, 2000);
+      createSequenceFile(dir, "_123.f" + i, (byte) 0, 2000, si.getId(), "suffix");
      files.add("_123.f" + i);
    }
    
-    SegmentInfo si = newSegmentInfo(dir, "_123");
    si.setFiles(files);
    si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
    Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
@ -750,9 +765,9 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
  public void testResourceNameInsideCompoundFile() throws Exception {
    Directory dir = newDirectory();
    String subFile = "_123.xyz";
-    createSequenceFile(dir, subFile, (byte) 0, 10);
-    
    SegmentInfo si = newSegmentInfo(dir, "_123");
+    createSequenceFile(dir, subFile, (byte) 0, 10, si.getId(), "suffix");
+    
    si.setFiles(Collections.singletonList(subFile));
    si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
    Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
@ -762,4 +777,48 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
    cfs.close();
    dir.close();
  }
+
+  public void testMissingCodecHeadersAreCaught() throws Exception {
+    Directory dir = newDirectory();
+    String subFile = "_123.xyz";
+
+    // missing codec header
+    try (IndexOutput os = dir.createOutput(subFile, newIOContext(random()))) {
+      for (int i=0; i < 1024; i++) {
+        os.writeByte((byte) i);
+      }
+    }
+
+    SegmentInfo si = newSegmentInfo(dir, "_123");
+    si.setFiles(Collections.singletonList(subFile));
+    Exception e = expectThrows(CorruptIndexException.class, () -> si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT));
+    assertTrue(e.getMessage().contains("codec header mismatch"));
+    dir.close();
+  }
+
+  public void testCorruptFilesAreCaught() throws Exception {
+    Directory dir = newDirectory();
+    String subFile = "_123.xyz";
+
+    // wrong checksum
+    SegmentInfo si = newSegmentInfo(dir, "_123");
+    try (IndexOutput os = dir.createOutput(subFile, newIOContext(random()))) {
+      CodecUtil.writeIndexHeader(os, "Foo", 0, si.getId(), "suffix");
+      for (int i=0; i < 1024; i++) {
+        os.writeByte((byte) i);
+      }
+
+      // write footer w/ wrong checksum
+      os.writeInt(CodecUtil.FOOTER_MAGIC);
+      os.writeInt(0);
+
+      long checksum = os.getChecksum();
+      os.writeLong(checksum+1);
+    }
+
+    si.setFiles(Collections.singletonList(subFile));
+    Exception e = expectThrows(CorruptIndexException.class, () -> si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT));
+    assertTrue(e.getMessage().contains("checksum failed (hardware problem?)"));
+    dir.close();
+  }
 }
--- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
@ -22,7 +22,6 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
-import java.util.Locale;
 import java.util.Map;
 import java.util.Random;
 import java.util.Set;
@ -50,8 +49,6 @@ import org.apache.lucene.codecs.memory.FSTPostingsFormat;
 import org.apache.lucene.codecs.memory.MemoryDocValuesFormat;
 import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
 import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
-import org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat;
-import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat;
 import org.apache.lucene.index.PointValues.IntersectVisitor;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
@ -159,10 +156,6 @@ public class RandomCodec extends AssertingCodec {
    PostingsFormat codec = previousMappings.get(name);
    if (codec == null) {
      codec = formats.get(Math.abs(perFieldSeed ^ name.hashCode()) % formats.size());
-      if (codec instanceof SimpleTextPostingsFormat && perFieldSeed % 5 != 0) {
-        // make simpletext rarer, choose again
-        codec = formats.get(Math.abs(perFieldSeed ^ name.toUpperCase(Locale.ROOT).hashCode()) % formats.size());
-      }
      previousMappings.put(name, codec);
      // Safety:
      assert previousMappings.size() < 10000: "test went insane";
@ -175,10 +168,6 @@ public class RandomCodec extends AssertingCodec {
    DocValuesFormat codec = previousDVMappings.get(name);
    if (codec == null) {
      codec = dvFormats.get(Math.abs(perFieldSeed ^ name.hashCode()) % dvFormats.size());
-      if (codec instanceof SimpleTextDocValuesFormat && perFieldSeed % 5 != 0) {
-        // make simpletext rarer, choose again
-        codec = dvFormats.get(Math.abs(perFieldSeed ^ name.toUpperCase(Locale.ROOT).hashCode()) % dvFormats.size());
-      }
      previousDVMappings.put(name, codec);
      // Safety:
      assert previousDVMappings.size() < 10000: "test went insane";
@ -214,7 +203,7 @@ public class RandomCodec extends AssertingCodec {
        new LuceneFixedGap(TestUtil.nextInt(random, 1, 1000)),
        new LuceneVarGapFixedInterval(TestUtil.nextInt(random, 1, 1000)),
        new LuceneVarGapDocFreqInterval(TestUtil.nextInt(random, 1, 100), TestUtil.nextInt(random, 1, 1000)),
-        random.nextInt(10) == 0 ? new SimpleTextPostingsFormat() : TestUtil.getDefaultPostingsFormat(),
+        TestUtil.getDefaultPostingsFormat(),
        new AssertingPostingsFormat(),
        new MemoryPostingsFormat(true, random.nextFloat()),
        new MemoryPostingsFormat(false, random.nextFloat()));
@ -223,7 +212,7 @@ public class RandomCodec extends AssertingCodec {
        TestUtil.getDefaultDocValuesFormat(),
        new DirectDocValuesFormat(), // maybe not a great idea...
        new MemoryDocValuesFormat(),
-        random.nextInt(10) == 0 ? new SimpleTextDocValuesFormat() : TestUtil.getDefaultDocValuesFormat(),
+        TestUtil.getDefaultDocValuesFormat(),
        new AssertingDocValuesFormat());

    Collections.shuffle(formats, random);
--- a/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml
@ -17,7 +17,6 @@
 -->
 <schema name="codec" version="1.2">
  <fieldType name="string_direct" class="solr.StrField" postingsFormat="Direct"/>
-  <fieldType name="string_simpletext" class="solr.StrField" postingsFormat="SimpleText"/>
  <fieldType name="string_standard" class="solr.StrField" postingsFormat="Lucene50"/>

  <fieldType name="string_disk" class="solr.StrField" docValuesFormat="Lucene54"/>
@ -37,7 +36,6 @@
  </fieldType>

  <field name="string_direct_f" type="string_direct" indexed="true" stored="true"/>
-  <field name="string_simpletext_f" type="string_simpletext" indexed="true" stored="true"/>
  <field name="string_standard_f" type="string_standard" indexed="true" stored="true"/>

  <field name="string_disk_f" type="string_disk" indexed="false" stored="false" docValues="true" default=""/>
@ -46,7 +44,6 @@
  <field name="string_f" type="string" indexed="true" stored="true" docValues="true" required="true"/>
  <field name="text" type="text_general" indexed="true" stored="true"/>

-  <dynamicField name="*_simple" type="string_simpletext" indexed="true" stored="true"/>
  <dynamicField name="*_direct" type="string_direct" indexed="true" stored="true"/>
  <dynamicField name="*_standard" type="string_standard" indexed="true" stored="true"/>

--- a/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java
+++ b/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java
@ -51,9 +51,6 @@ public class TestCodecSupport extends SolrTestCaseJ4 {
    SchemaField schemaField = fields.get("string_direct_f");
    PerFieldPostingsFormat format = (PerFieldPostingsFormat) codec.postingsFormat();
    assertEquals("Direct", format.getPostingsFormatForField(schemaField.getName()).getName());
-    schemaField = fields.get("string_simpletext_f");
-    assertEquals("SimpleText",
-        format.getPostingsFormatForField(schemaField.getName()).getName());
    schemaField = fields.get("string_standard_f");
    assertEquals(TestUtil.getDefaultPostingsFormat().getName(), format.getPostingsFormatForField(schemaField.getName()).getName());
    schemaField = fields.get("string_f");
@ -78,8 +75,6 @@ public class TestCodecSupport extends SolrTestCaseJ4 {
    Codec codec = h.getCore().getCodec();
    PerFieldPostingsFormat format = (PerFieldPostingsFormat) codec.postingsFormat();

-    assertEquals("SimpleText", format.getPostingsFormatForField("foo_simple").getName());
-    assertEquals("SimpleText", format.getPostingsFormatForField("bar_simple").getName());
    assertEquals("Direct", format.getPostingsFormatForField("foo_direct").getName());
    assertEquals("Direct", format.getPostingsFormatForField("bar_direct").getName());
    assertEquals(TestUtil.getDefaultPostingsFormat().getName(), format.getPostingsFormatForField("foo_standard").getName());