LUCENE-4051: Use Codec File Headers for Lucene40 StoredFields, DocValues, Norms & TermVectors

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1341768 13f79535-47bb-0310-9956-ffa450edef68
2025-02-09 03:25:15 +00:00 · 2012-05-23 07:51:56 +00:00 · 2012-05-23 07:51:56 +00:00 · 88b483cbbd
commit 88b483cbbd
parent 8c037c2115
13 changed files with 143 additions and 137 deletions
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java
@ -28,6 +28,7 @@ import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.store.DataOutput; // javadocs
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.util.CodecUtil;
 /** 
 * Lucene 4.0 Stored Fields Format.
@ -42,7 +43,8 @@ import org.apache.lucene.store.IOContext;
 * <p>This contains, for each document, a pointer to its field data, as
 * follows:</p>
 * <ul>
- * <li>FieldIndex (.fdx) --&gt; &lt;FieldValuesPosition&gt; <sup>SegSize</sup></li>
+ * <li>FieldIndex (.fdx) --&gt; &lt;Header&gt;, &lt;FieldValuesPosition&gt; <sup>SegSize</sup></li>
 * <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
 * <li>FieldValuesPosition --&gt; {@link DataOutput#writeLong Uint64}</li>
 * </ul>
 * </li>
@ -50,7 +52,8 @@ import org.apache.lucene.store.IOContext;
 * <p><a name="field_data" id="field_data"></a>The field data, or <tt>.fdt</tt> file.</p>
 * <p>This contains the stored fields of each document, as follows:</p>
 * <ul>
- * <li>FieldData (.fdt) --&gt; &lt;DocFieldData&gt; <sup>SegSize</sup></li>
+ * <li>FieldData (.fdt) --&gt; &lt;Header&gt;, &lt;DocFieldData&gt; <sup>SegSize</sup></li>
 * <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
 * <li>DocFieldData --&gt; FieldCount, &lt;FieldNum, Bits, Value&gt;
 * <sup>FieldCount</sup></li>
 * <li>FieldCount --&gt; {@link DataOutput#writeVInt VInt}</li>
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java
@ -30,11 +30,14 @@ import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.CodecUtil;
 import org.apache.lucene.util.IOUtils;
 import java.io.Closeable;
 import java.util.Set;
 import static org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsWriter.*;
 /**
 * Class responsible for access to stored document fields.
 * <p/>
@ -44,8 +47,6 @@ import java.util.Set;
 * @lucene.internal
 */
 public final class Lucene40StoredFieldsReader extends StoredFieldsReader implements Cloneable, Closeable {
  private final static int FORMAT_SIZE = 4;
  private final FieldInfos fieldInfos;
  private final IndexInput fieldsStream;
  private final IndexInput indexStream;
@ -78,17 +79,15 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
    boolean success = false;
    fieldInfos = fn;
    try {
-      fieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION), context);
+      fieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context);
-      final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);
+      final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION);
      indexStream = d.openInput(indexStreamFN, context);
-      // its a 4.0 codec: so its not too-old, its corrupt.
+      CodecUtil.checkHeader(indexStream, CODEC_NAME_IDX, VERSION_START, VERSION_CURRENT);
-      // TODO: change this to CodecUtil.checkHeader
+      CodecUtil.checkHeader(fieldsStream, CODEC_NAME_DAT, VERSION_START, VERSION_CURRENT);
-      if (Lucene40StoredFieldsWriter.FORMAT_CURRENT != indexStream.readInt()) {
+      assert HEADER_LENGTH_DAT == fieldsStream.getFilePointer();
-        throw new CorruptIndexException("unexpected fdx header: " + indexStream);
+      assert HEADER_LENGTH_IDX == indexStream.getFilePointer();
-      }
+      final long indexSize = indexStream.length() - HEADER_LENGTH_IDX;
      final long indexSize = indexStream.length() - FORMAT_SIZE;
      this.size = (int) (indexSize >> 3);
      // Verify two sources of "maxDoc" agree:
      if (this.size != si.docCount) {
@ -135,7 +134,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
  }
  private void seekIndex(int docID) throws IOException {
-    indexStream.seek(FORMAT_SIZE + docID * 8L);
+    indexStream.seek(HEADER_LENGTH_IDX + docID * 8L);
  }
  public final void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
@ -148,7 +147,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
      FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
      int bits = fieldsStream.readByte() & 0xFF;
-      assert bits <= (Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK | Lucene40StoredFieldsWriter.FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
+      assert bits <= (FIELD_IS_NUMERIC_MASK | FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
      switch(visitor.needsField(fieldInfo)) {
        case YES:
@ -164,19 +163,19 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
  }
  private void readField(StoredFieldVisitor visitor, FieldInfo info, int bits) throws IOException {
-    final int numeric = bits & Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK;
+    final int numeric = bits & FIELD_IS_NUMERIC_MASK;
    if (numeric != 0) {
      switch(numeric) {
-        case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_INT:
+        case FIELD_IS_NUMERIC_INT:
          visitor.intField(info, fieldsStream.readInt());
          return;
-        case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_LONG:
+        case FIELD_IS_NUMERIC_LONG:
          visitor.longField(info, fieldsStream.readLong());
          return;
-        case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_FLOAT:
+        case FIELD_IS_NUMERIC_FLOAT:
          visitor.floatField(info, Float.intBitsToFloat(fieldsStream.readInt()));
          return;
-        case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
+        case FIELD_IS_NUMERIC_DOUBLE:
          visitor.doubleField(info, Double.longBitsToDouble(fieldsStream.readLong()));
          return;
        default:
@ -186,7 +185,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
      final int length = fieldsStream.readVInt();
      byte bytes[] = new byte[length];
      fieldsStream.readBytes(bytes, 0, length);
-      if ((bits & Lucene40StoredFieldsWriter.FIELD_IS_BINARY) != 0) {
+      if ((bits & FIELD_IS_BINARY) != 0) {
        visitor.binaryField(info, bytes, 0, bytes.length);
      } else {
        visitor.stringField(info, new String(bytes, 0, bytes.length, IOUtils.CHARSET_UTF_8));
@ -195,15 +194,15 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
  }
  private void skipField(int bits) throws IOException {
-    final int numeric = bits & Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK;
+    final int numeric = bits & FIELD_IS_NUMERIC_MASK;
    if (numeric != 0) {
      switch(numeric) {
-        case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_INT:
+        case FIELD_IS_NUMERIC_INT:
-        case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_FLOAT:
+        case FIELD_IS_NUMERIC_FLOAT:
          fieldsStream.readInt();
          return;
-        case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_LONG:
+        case FIELD_IS_NUMERIC_LONG:
-        case Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
+        case FIELD_IS_NUMERIC_DOUBLE:
          fieldsStream.readLong();
          return;
        default: 
@ -242,7 +241,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
  }
  public static void files(SegmentInfo info, Set<String> files) throws IOException {
-    files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION));
+    files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_INDEX_EXTENSION));
-    files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION));
+    files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_EXTENSION));
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
@ -34,6 +34,7 @@ import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CodecUtil;
 import org.apache.lucene.util.IOUtils;
 /** 
@ -62,16 +63,14 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
  // currently unused: static final int FIELD_IS_NUMERIC_SHORT = 5 << _NUMERIC_BIT_SHIFT;
  // currently unused: static final int FIELD_IS_NUMERIC_BYTE = 6 << _NUMERIC_BIT_SHIFT;
-  // (Happens to be the same as for now) Lucene 3.2: NumericFields are stored in binary format
+  static final String CODEC_NAME_IDX = "Lucene40StoredFieldsIndex";
-  static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3;
+  static final String CODEC_NAME_DAT = "Lucene40StoredFieldsData";
  static final int VERSION_START = 0;
  static final int VERSION_CURRENT = VERSION_START;
  static final long HEADER_LENGTH_IDX = CodecUtil.headerLength(CODEC_NAME_IDX);
  static final long HEADER_LENGTH_DAT = CodecUtil.headerLength(CODEC_NAME_DAT);
  // NOTE: if you introduce a new format, make it 1 higher
  // than the current one, and always change this if you
  // switch to a new format!
  static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
  // when removing support for old versions, leave the last supported version here
  static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
  /** Extension of stored fields file */
  public static final String FIELDS_EXTENSION = "fdt";
@ -94,9 +93,10 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
      fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context);
      indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION), context);
-      fieldsStream.writeInt(FORMAT_CURRENT);
+      CodecUtil.writeHeader(fieldsStream, CODEC_NAME_DAT, VERSION_CURRENT);
-      indexStream.writeInt(FORMAT_CURRENT);
+      CodecUtil.writeHeader(indexStream, CODEC_NAME_IDX, VERSION_CURRENT);
-
+      assert HEADER_LENGTH_DAT == fieldsStream.getFilePointer();
      assert HEADER_LENGTH_IDX == indexStream.getFilePointer();
      success = true;
    } finally {
      if (!success) {
@ -209,7 +209,7 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
  @Override
  public void finish(int numDocs) throws IOException {
-    if (4+((long) numDocs)*8 != indexStream.getFilePointer())
+    if (HEADER_LENGTH_IDX+((long) numDocs)*8 != indexStream.getFilePointer())
      // This is most likely a bug in Sun JRE 1.6.0_04/_05;
      // we detect that the bug has struck, here, and
      // throw an exception to prevent the corruption from
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java
@ -28,6 +28,7 @@ import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.store.DataOutput; // javadocs
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.util.CodecUtil;
 /**
 * Lucene 4.0 Term Vectors format.
@ -38,10 +39,10 @@ import org.apache.lucene.store.IOContext;
 * <p>The Document Index or .tvx file.</p>
 * <p>For each document, this stores the offset into the document data (.tvd) and
 * field data (.tvf) files.</p>
- * <p>DocumentIndex (.tvx) --&gt; TVXVersion&lt;DocumentPosition,FieldPosition&gt;
+ * <p>DocumentIndex (.tvx) --&gt; Header,&lt;DocumentPosition,FieldPosition&gt;
 * <sup>NumDocs</sup></p>
 * <ul>
- *   <li>TVXVersion --&gt; {@link DataOutput#writeInt Int32} (<code>Lucene40TermVectorsReader.FORMAT_CURRENT</code>)</li>
+ *   <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
 *   <li>DocumentPosition --&gt; {@link DataOutput#writeLong UInt64} (offset in the .tvd file)</li>
 *   <li>FieldPosition --&gt; {@link DataOutput#writeLong UInt64} (offset in the .tvf file)</li>
 * </ul>
@ -53,10 +54,10 @@ import org.apache.lucene.store.IOContext;
 * in the .tvf (Term Vector Fields) file.</p>
 * <p>The .tvd file is used to map out the fields that have term vectors stored
 * and where the field information is in the .tvf file.</p>
- * <p>Document (.tvd) --&gt; TVDVersion&lt;NumFields, FieldNums,
+ * <p>Document (.tvd) --&gt; Header,&lt;NumFields, FieldNums,
 * FieldPositions&gt; <sup>NumDocs</sup></p>
 * <ul>
- *   <li>TVDVersion --&gt; {@link DataOutput#writeInt Int32} (<code>Lucene40TermVectorsReader.FORMAT_CURRENT</code>)</li>
+ *   <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
 *   <li>NumFields --&gt; {@link DataOutput#writeVInt VInt}</li>
 *   <li>FieldNums --&gt; &lt;FieldNumDelta&gt; <sup>NumFields</sup></li>
 *   <li>FieldNumDelta --&gt; {@link DataOutput#writeVInt VInt}</li>
@ -69,10 +70,10 @@ import org.apache.lucene.store.IOContext;
 * <p>This file contains, for each field that has a term vector stored, a list of
 * the terms, their frequencies and, optionally, position and offset
 * information.</p>
- * <p>Field (.tvf) --&gt; TVFVersion&lt;NumTerms, Position/Offset, TermFreqs&gt;
+ * <p>Field (.tvf) --&gt; Header,&lt;NumTerms, Position/Offset, TermFreqs&gt;
 * <sup>NumFields</sup></p>
 * <ul>
- *   <li>TVFVersion --&gt; {@link DataOutput#writeInt Int32} (<code>Lucene40TermVectorsReader.FORMAT_CURRENT</code>)</li>
+ *   <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
 *   <li>NumTerms --&gt; {@link DataOutput#writeVInt VInt}</li>
 *   <li>Position/Offset --&gt; {@link DataOutput#writeByte Byte}</li>
 *   <li>TermFreqs --&gt; &lt;TermText, TermFreq, Positions?, Offsets?&gt;
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
@ -33,8 +33,6 @@ import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.IndexFormatTooNewException;
 import org.apache.lucene.index.IndexFormatTooOldException;
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
@ -43,8 +41,10 @@ import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CodecUtil;
 import org.apache.lucene.util.IOUtils;
 /**
 * Lucene 4.0 Term Vectors reader.
 * <p>
@ -54,22 +54,6 @@ import org.apache.lucene.util.IOUtils;
 */
 public class Lucene40TermVectorsReader extends TermVectorsReader {
  // NOTE: if you make a new format, it must be larger than
  // the current format
  // Changed strings to UTF8 with length-in-bytes not length-in-chars
  static final int FORMAT_UTF8_LENGTH_IN_BYTES = 4;
  // NOTE: always change this if you switch to a new format!
  // whenever you add a new format, make it 1 larger (positive version logic)!
  static final int FORMAT_CURRENT = FORMAT_UTF8_LENGTH_IN_BYTES;
  // when removing support for old versions, leave the last supported version here
  static final int FORMAT_MINIMUM = FORMAT_UTF8_LENGTH_IN_BYTES;
  //The size in bytes that the FORMAT_VERSION will take up at the beginning of each file 
  static final int FORMAT_SIZE = 4;
  static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x1;
  static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x2;
@ -83,6 +67,17 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
  /** Extension of vectors index file */
  static final String VECTORS_INDEX_EXTENSION = "tvx";
  static final String CODEC_NAME_FIELDS = "Lucene40TermVectorsFields";
  static final String CODEC_NAME_DOCS = "Lucene40TermVectorsDocs";
  static final String CODEC_NAME_INDEX = "Lucene40TermVectorsIndex";
  static final int VERSION_START = 0;
  static final int VERSION_CURRENT = VERSION_START;
  static final long HEADER_LENGTH_FIELDS = CodecUtil.headerLength(CODEC_NAME_FIELDS);
  static final long HEADER_LENGTH_DOCS = CodecUtil.headerLength(CODEC_NAME_DOCS);
  static final long HEADER_LENGTH_INDEX = CodecUtil.headerLength(CODEC_NAME_INDEX);
  private FieldInfos fieldInfos;
  private IndexInput tvx;
@ -91,17 +86,15 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
  private int size;
  private int numTotalDocs;
  private final int format;
  // used by clone
-  Lucene40TermVectorsReader(FieldInfos fieldInfos, IndexInput tvx, IndexInput tvd, IndexInput tvf, int size, int numTotalDocs, int format) {
+  Lucene40TermVectorsReader(FieldInfos fieldInfos, IndexInput tvx, IndexInput tvd, IndexInput tvf, int size, int numTotalDocs) {
    this.fieldInfos = fieldInfos;
    this.tvx = tvx;
    this.tvd = tvd;
    this.tvf = tvf;
    this.size = size;
    this.numTotalDocs = numTotalDocs;
    this.format = format;
  }
  public Lucene40TermVectorsReader(Directory d, SegmentInfo si, FieldInfos fieldInfos, IOContext context)
@ -114,18 +107,21 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
    try {
      String idxName = IndexFileNames.segmentFileName(segment, "", VECTORS_INDEX_EXTENSION);
      tvx = d.openInput(idxName, context);
-      format = checkValidFormat(tvx);
+      final int tvxVersion = CodecUtil.checkHeader(tvx, CODEC_NAME_INDEX, VERSION_START, VERSION_CURRENT);
      String fn = IndexFileNames.segmentFileName(segment, "", VECTORS_DOCUMENTS_EXTENSION);
      tvd = d.openInput(fn, context);
-      final int tvdFormat = checkValidFormat(tvd);
+      final int tvdVersion = CodecUtil.checkHeader(tvd, CODEC_NAME_DOCS, VERSION_START, VERSION_CURRENT);
      fn = IndexFileNames.segmentFileName(segment, "", VECTORS_FIELDS_EXTENSION);
      tvf = d.openInput(fn, context);
-      final int tvfFormat = checkValidFormat(tvf);
+      final int tvfVersion = CodecUtil.checkHeader(tvf, CODEC_NAME_FIELDS, VERSION_START, VERSION_CURRENT);
      assert HEADER_LENGTH_INDEX == tvx.getFilePointer();
      assert HEADER_LENGTH_DOCS == tvd.getFilePointer();
      assert HEADER_LENGTH_FIELDS == tvf.getFilePointer();
      assert tvxVersion == tvdVersion;
      assert tvxVersion == tvfVersion;
-      assert format == tvdFormat;
+      numTotalDocs = (int) (tvx.length()-HEADER_LENGTH_INDEX >> 4);
      assert format == tvfFormat;
      numTotalDocs = (int) (tvx.length() >> 4);
      this.size = numTotalDocs;
      assert size == 0 || numTotalDocs == size;
@ -156,13 +152,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
  // Not private to avoid synthetic access$NNN methods
  void seekTvx(final int docNum) throws IOException {
-    tvx.seek(docNum * 16L + FORMAT_SIZE);
+    tvx.seek(docNum * 16L + HEADER_LENGTH_INDEX);
  }
  boolean canReadRawDocs() {
    // we can always read raw docs, unless the term vectors
    // didn't exist
    return format != 0;
  }
  /** Retrieve the length (in bytes) of the tvd and tvf
@ -210,16 +200,6 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
    }
  }
  private int checkValidFormat(IndexInput in) throws CorruptIndexException, IOException
  {
    int format = in.readInt();
    if (format < FORMAT_MINIMUM)
      throw new IndexFormatTooOldException(in, format, FORMAT_MINIMUM, FORMAT_CURRENT);
    if (format > FORMAT_CURRENT)
      throw new IndexFormatTooNewException(in, format, FORMAT_MINIMUM, FORMAT_CURRENT);
    return format;
  }
  public void close() throws IOException {
    IOUtils.close(tvx, tvd, tvf);
  }
@ -708,7 +688,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
      cloneTvf = (IndexInput) tvf.clone();
    }
-    return new Lucene40TermVectorsReader(fieldInfos, cloneTvx, cloneTvd, cloneTvf, size, numTotalDocs, format);
+    return new Lucene40TermVectorsReader(fieldInfos, cloneTvx, cloneTvd, cloneTvf, size, numTotalDocs);
  }
  public static void files(SegmentInfo info, Set<String> files) throws IOException {
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
@ -35,9 +35,13 @@ import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CodecUtil;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.StringHelper;
 import static org.apache.lucene.codecs.lucene40.Lucene40TermVectorsReader.*;
 // TODO: make a new 4.0 TV format that encodes better
 //   - use startOffset (not endOffset) as base for delta on
 //     next startOffset because today for syns or ngrams or
@ -59,6 +63,8 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
  private final String segment;
  private IndexOutput tvx = null, tvd = null, tvf = null;
  public Lucene40TermVectorsWriter(Directory directory, String segment, IOContext context) throws IOException {
    this.directory = directory;
    this.segment = segment;
@ -66,11 +72,14 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
    try {
      // Open files for TermVector storage
      tvx = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_INDEX_EXTENSION), context);
-      tvx.writeInt(Lucene40TermVectorsReader.FORMAT_CURRENT);
+      CodecUtil.writeHeader(tvx, CODEC_NAME_INDEX, VERSION_CURRENT);
      tvd = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_DOCUMENTS_EXTENSION), context);
-      tvd.writeInt(Lucene40TermVectorsReader.FORMAT_CURRENT);
+      CodecUtil.writeHeader(tvd, CODEC_NAME_DOCS, VERSION_CURRENT);
      tvf = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_FIELDS_EXTENSION), context);
-      tvf.writeInt(Lucene40TermVectorsReader.FORMAT_CURRENT);
+      CodecUtil.writeHeader(tvf, CODEC_NAME_FIELDS, VERSION_CURRENT);
      assert HEADER_LENGTH_INDEX == tvx.getFilePointer();
      assert HEADER_LENGTH_DOCS == tvd.getFilePointer();
      assert HEADER_LENGTH_FIELDS == tvf.getFilePointer();
      success = true;
    } finally {
      if (!success) {
@ -252,12 +261,9 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
        TermVectorsReader vectorsReader = matchingSegmentReader.getTermVectorsReader();
        if (vectorsReader != null && vectorsReader instanceof Lucene40TermVectorsReader) {
          // If the TV* files are an older format then they cannot read raw docs:
          if (((Lucene40TermVectorsReader)vectorsReader).canReadRawDocs()) {
            matchingVectorsReader = (Lucene40TermVectorsReader) vectorsReader;
        }
      }
      }
      if (reader.liveDocs != null) {
        numDocs += copyVectorsWithDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2);
      } else {
@ -356,7 +362,7 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
  @Override
  public void finish(int numDocs) throws IOException {
-    if (4+((long) numDocs)*16 != tvx.getFilePointer())
+    if (HEADER_LENGTH_INDEX+((long) numDocs)*16 != tvx.getFilePointer())
      // This is most likely a bug in Sun JRE 1.6.0_04/_05;
      // we detect that the bug has struck, here, and
      // throw an exception to prevent the corruption from
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
@ -236,27 +236,34 @@ public final class Bytes {
    private IndexOutput datOut;
    protected BytesRef bytesRef = new BytesRef();
    private final Directory dir;
-    private final String codecName;
+    private final String codecNameIdx;
    private final String codecNameDat;
    private final int version;
    private final IOContext context;
-    protected BytesWriterBase(Directory dir, String id, String codecName,
+    protected BytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
        int version, Counter bytesUsed, IOContext context, Type type) throws IOException {
      super(bytesUsed, type);
      this.id = id;
      this.dir = dir;
-      this.codecName = codecName;
+      this.codecNameIdx = codecNameIdx;
      this.codecNameDat = codecNameDat;
      this.version = version;
      this.context = context;
      assert codecNameDat != null || codecNameIdx != null: "both codec names are null";
      assert (codecNameDat != null && !codecNameDat.equals(codecNameIdx)) 
      || (codecNameIdx != null && !codecNameIdx.equals(codecNameDat)):
        "index and data codec names must not be equal";
    }
    protected IndexOutput getOrCreateDataOut() throws IOException {
      if (datOut == null) {
        boolean success = false;
        assert codecNameDat != null;
        try {
          datOut = dir.createOutput(IndexFileNames.segmentFileName(id, DV_SEGMENT_SUFFIX,
              DocValuesWriterBase.DATA_EXTENSION), context);
-          CodecUtil.writeHeader(datOut, codecName, version);
+          CodecUtil.writeHeader(datOut, codecNameDat, version);
          success = true;
        } finally {
          if (!success) {
@ -279,9 +286,10 @@ public final class Bytes {
      boolean success = false;
      try {
        if (idxOut == null) {
          assert codecNameIdx != null;
          idxOut = dir.createOutput(IndexFileNames.segmentFileName(id, DV_SEGMENT_SUFFIX,
              DocValuesWriterBase.INDEX_EXTENSION), context);
-          CodecUtil.writeHeader(idxOut, codecName, version);
+          CodecUtil.writeHeader(idxOut, codecNameIdx, version);
        }
        success = true;
      } finally {
@ -309,7 +317,7 @@ public final class Bytes {
    protected final String id;
    protected final Type type;
-    protected BytesReaderBase(Directory dir, String id, String codecName,
+    protected BytesReaderBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
        int maxVersion, boolean doIndex, IOContext context, Type type) throws IOException {
      IndexInput dataIn = null;
      IndexInput indexIn = null;
@ -317,11 +325,11 @@ public final class Bytes {
      try {
        dataIn = dir.openInput(IndexFileNames.segmentFileName(id, DV_SEGMENT_SUFFIX,
                                                              DocValuesWriterBase.DATA_EXTENSION), context);
-        version = CodecUtil.checkHeader(dataIn, codecName, maxVersion, maxVersion);
+        version = CodecUtil.checkHeader(dataIn, codecNameDat, maxVersion, maxVersion);
        if (doIndex) {
          indexIn = dir.openInput(IndexFileNames.segmentFileName(id, DV_SEGMENT_SUFFIX,
                                                                 DocValuesWriterBase.INDEX_EXTENSION), context);
-          final int version2 = CodecUtil.checkHeader(indexIn, codecName,
+          final int version2 = CodecUtil.checkHeader(indexIn, codecNameIdx,
                                                     maxVersion, maxVersion);
          assert version == version2;
        }
@ -377,23 +385,23 @@ public final class Bytes {
    protected final boolean fasterButMoreRam;
    protected long maxBytes = 0;
-    protected DerefBytesWriterBase(Directory dir, String id, String codecName,
+    protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
        int codecVersion, Counter bytesUsed, IOContext context, Type type)
        throws IOException {
-      this(dir, id, codecName, codecVersion, new DirectTrackingAllocator(
+      this(dir, id, codecNameIdx, codecNameDat, codecVersion, new DirectTrackingAllocator(
          ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, false, type);
    }
-    protected DerefBytesWriterBase(Directory dir, String id, String codecName,
+    protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat,
                                   int codecVersion, Counter bytesUsed, IOContext context, boolean fasterButMoreRam, Type type)
        throws IOException {
-      this(dir, id, codecName, codecVersion, new DirectTrackingAllocator(
+      this(dir, id, codecNameIdx, codecNameDat, codecVersion, new DirectTrackingAllocator(
          ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context, fasterButMoreRam,type);
    }
-    protected DerefBytesWriterBase(Directory dir, String id, String codecName, int codecVersion, Allocator allocator,
+    protected DerefBytesWriterBase(Directory dir, String id, String codecNameIdx, String codecNameDat, int codecVersion, Allocator allocator,
        Counter bytesUsed, IOContext context, boolean fasterButMoreRam, Type type) throws IOException {
-      super(dir, id, codecName, codecVersion, bytesUsed, context, type);
+      super(dir, id, codecNameIdx, codecNameDat, codecVersion, bytesUsed, context, type);
      hash = new BytesRefHash(new ByteBlockPool(allocator),
          BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
              BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedDerefBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedDerefBytesImpl.java
@ -39,14 +39,16 @@ import org.apache.lucene.util.packed.PackedInts;
 */
 class FixedDerefBytesImpl {
-  static final String CODEC_NAME = "FixedDerefBytes";
+  static final String CODEC_NAME_IDX = "FixedDerefBytesIdx";
  static final String CODEC_NAME_DAT = "FixedDerefBytesDat";
  static final int VERSION_START = 0;
  static final int VERSION_CURRENT = VERSION_START;
  public static class Writer extends DerefBytesWriterBase {
    public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
        throws IOException {
-      super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, Type.BYTES_FIXED_DEREF);
+      super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, Type.BYTES_FIXED_DEREF);
    }
    @Override
@ -71,7 +73,7 @@ class FixedDerefBytesImpl {
    private final int size;
    private final int numValuesStored;
    FixedDerefReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
-      super(dir, id, CODEC_NAME, VERSION_START, true, context, Type.BYTES_FIXED_DEREF);
+      super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_START, true, context, Type.BYTES_FIXED_DEREF);
      size = datIn.readInt();
      numValuesStored = idxIn.readInt();
    }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java
@ -49,7 +49,8 @@ import org.apache.lucene.util.packed.PackedInts;
 */
 class FixedSortedBytesImpl {
-  static final String CODEC_NAME = "FixedSortedBytes";
+  static final String CODEC_NAME_IDX = "FixedSortedBytesIdx";
  static final String CODEC_NAME_DAT = "FixedSortedBytesDat";
  static final int VERSION_START = 0;
  static final int VERSION_CURRENT = VERSION_START;
@ -58,7 +59,7 @@ class FixedSortedBytesImpl {
    public Writer(Directory dir, String id, Comparator<BytesRef> comp,
        Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
-      super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_FIXED_SORTED);
+      super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_FIXED_SORTED);
      this.comp = comp;
    }
@ -127,7 +128,7 @@ class FixedSortedBytesImpl {
    public Reader(Directory dir, String id, int maxDoc, IOContext context,
        Type type, Comparator<BytesRef> comparator) throws IOException {
-      super(dir, id, CODEC_NAME, VERSION_START, true, context, type);
+      super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_START, true, context, type);
      size = datIn.readInt();
      valueCount = idxIn.readInt();
      this.comparator = comparator;
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
@ -61,14 +61,14 @@ class FixedStraightBytesImpl {
    private final int byteBlockSize = BYTE_BLOCK_SIZE;
    private final ByteBlockPool pool;
-    protected FixedBytesWriterBase(Directory dir, String id, String codecName,
+    protected FixedBytesWriterBase(Directory dir, String id, String codecNameDat,
        int version, Counter bytesUsed, IOContext context) throws IOException {
-     this(dir, id, codecName, version, bytesUsed, context, Type.BYTES_FIXED_STRAIGHT);
+     this(dir, id, codecNameDat, version, bytesUsed, context, Type.BYTES_FIXED_STRAIGHT);
    }
-    protected FixedBytesWriterBase(Directory dir, String id, String codecName,
+    protected FixedBytesWriterBase(Directory dir, String id, String codecNameDat,
        int version, Counter bytesUsed, IOContext context, Type type) throws IOException {
-      super(dir, id, codecName, version, bytesUsed, context, type);
+      super(dir, id, null, codecNameDat, version, bytesUsed, context, type);
      pool = new ByteBlockPool(new DirectTrackingAllocator(bytesUsed));
      pool.nextBuffer();
    }
@ -139,8 +139,8 @@ class FixedStraightBytesImpl {
      super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
    }
-    public Writer(Directory dir, String id, String codecName, int version, Counter bytesUsed, IOContext context) throws IOException {
+    public Writer(Directory dir, String id, String codecNameDat, int version, Counter bytesUsed, IOContext context) throws IOException {
-      super(dir, id, codecName, version, bytesUsed, context);
+      super(dir, id, codecNameDat, version, bytesUsed, context);
    }
@ -268,8 +268,8 @@ class FixedStraightBytesImpl {
      this(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc, context, Type.BYTES_FIXED_STRAIGHT);
    }
-    protected FixedStraightReader(Directory dir, String id, String codec, int version, int maxDoc, IOContext context, Type type) throws IOException {
+    protected FixedStraightReader(Directory dir, String id, String codecNameDat, int version, int maxDoc, IOContext context, Type type) throws IOException {
-      super(dir, id, codec, version, false, context, type);
+      super(dir, id, null, codecNameDat, version, false, context, type);
      size = datIn.readInt();
      this.maxDoc = maxDoc;
    }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarDerefBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarDerefBytesImpl.java
@ -41,7 +41,9 @@ import org.apache.lucene.util.packed.PackedInts;
 */
 class VarDerefBytesImpl {
-  static final String CODEC_NAME = "VarDerefBytes";
+  static final String CODEC_NAME_IDX = "VarDerefBytesIdx";
  static final String CODEC_NAME_DAT = "VarDerefBytesDat";
  static final int VERSION_START = 0;
  static final int VERSION_CURRENT = VERSION_START;
@ -57,7 +59,7 @@ class VarDerefBytesImpl {
  static class Writer extends DerefBytesWriterBase {
    public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
        throws IOException {
-      super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, Type.BYTES_VAR_DEREF);
+      super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, Type.BYTES_VAR_DEREF);
      size = 0;
    }
@ -93,7 +95,7 @@ class VarDerefBytesImpl {
  public static class VarDerefReader extends BytesReaderBase {
    private final long totalBytes;
    VarDerefReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
-      super(dir, id, CODEC_NAME, VERSION_START, true, context, Type.BYTES_VAR_DEREF);
+      super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_START, true, context, Type.BYTES_VAR_DEREF);
      totalBytes = idxIn.readLong();
    }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java
@ -50,7 +50,9 @@ import org.apache.lucene.util.packed.PackedInts;
 */
 final class VarSortedBytesImpl {
-  static final String CODEC_NAME = "VarDerefBytes";
+  static final String CODEC_NAME_IDX = "VarDerefBytesIdx";
  static final String CODEC_NAME_DAT = "VarDerefBytesDat";
  static final int VERSION_START = 0;
  static final int VERSION_CURRENT = VERSION_START;
@ -59,7 +61,7 @@ final class VarSortedBytesImpl {
    public Writer(Directory dir, String id, Comparator<BytesRef> comp,
        Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
-      super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_VAR_SORTED);
+      super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, fasterButMoreRam, Type.BYTES_VAR_SORTED);
      this.comp = comp;
      size = 0;
    }
@ -154,7 +156,7 @@ final class VarSortedBytesImpl {
    Reader(Directory dir, String id, int maxDoc,
        IOContext context, Type type, Comparator<BytesRef> comparator)
        throws IOException {
-      super(dir, id, CODEC_NAME, VERSION_START, true, context, type);
+      super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_START, true, context, type);
      this.comparator = comparator;
    }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java
@ -50,7 +50,9 @@ import org.apache.lucene.util.packed.PackedInts;
 */
 class VarStraightBytesImpl {
-  static final String CODEC_NAME = "VarStraightBytes";
+  static final String CODEC_NAME_IDX = "VarStraightBytesIdx";
  static final String CODEC_NAME_DAT = "VarStraightBytesDat";
  static final int VERSION_START = 0;
  static final int VERSION_CURRENT = VERSION_START;
@ -64,7 +66,7 @@ class VarStraightBytesImpl {
    private boolean merge = false;
    public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
        throws IOException {
-      super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, Type.BYTES_VAR_STRAIGHT);
+      super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, Type.BYTES_VAR_STRAIGHT);
      pool = new ByteBlockPool(new DirectTrackingAllocator(bytesUsed));
      docToAddress = new long[1];
      pool.nextBuffer(); // init
@ -236,7 +238,7 @@ class VarStraightBytesImpl {
    final int maxDoc;
    VarStraightReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
-      super(dir, id, CODEC_NAME, VERSION_START, true, context, Type.BYTES_VAR_STRAIGHT);
+      super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_START, true, context, Type.BYTES_VAR_STRAIGHT);
      this.maxDoc = maxDoc;
    }