From c99756201d332853232a8872c4c0bc5c196ce8d0 Mon Sep 17 00:00:00 2001
From: Robert Muir <rmuir@apache.org>
Date: Mon, 30 Jan 2012 15:19:32 +0000
Subject: [PATCH] LUCENE-3728: split stored fields into 3x/4x so more docstore
 stuff can go in 3x

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237713 13f79535-47bb-0310-9956-ffa450edef68
---
 .../lucene/codecs/lucene3x/Lucene3xCodec.java |  10 +-
 .../lucene3x/Lucene3xSegmentInfosReader.java  |   2 +-
 .../lucene3x/Lucene3xStoredFieldsFormat.java  |  51 +++
 .../lucene3x/Lucene3xStoredFieldsReader.java  | 333 ++++++++++++++++++
 .../lucene40/Lucene40StoredFieldsReader.java  |  90 +----
 .../lucene40/Lucene40StoredFieldsWriter.java  |   7 +-
 .../codecs/preflexrw/PreFlexRWCodec.java      |   3 +-
 .../PreFlexRWStoredFieldsFormat.java          |  17 +
 .../PreFlexRWStoredFieldsWriter.java          | 156 ++++++++
 9 files changed, 580 insertions(+), 89 deletions(-)
 create mode 100644 lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsFormat.java
 create mode 100644 lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java
 create mode 100644 lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsFormat.java
 create mode 100644 lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsWriter.java

diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java
index e144b29ac5e..f636b2956ff 100644
--- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java
+++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java
@@ -29,10 +29,8 @@ import org.apache.lucene.codecs.PerDocProducer;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.SegmentInfosFormat;
 import org.apache.lucene.codecs.StoredFieldsFormat;
-import org.apache.lucene.codecs.StoredFieldsWriter;
 import org.apache.lucene.codecs.TermVectorsFormat;
 import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat;
-import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.PerDocWriteState;
 import org.apache.lucene.index.SegmentInfo;
@@ -53,13 +51,7 @@ public class Lucene3xCodec extends Codec {
 
   private final PostingsFormat postingsFormat = new Lucene3xPostingsFormat();
   
-  // TODO: this should really be a different impl
-  private final StoredFieldsFormat fieldsFormat = new Lucene40StoredFieldsFormat() {
-    @Override
-    public StoredFieldsWriter fieldsWriter(Directory directory, String segment, IOContext context) throws IOException {
-      throw new UnsupportedOperationException("this codec can only be used for reading");
-    }
-  };
+  private final StoredFieldsFormat fieldsFormat = new Lucene3xStoredFieldsFormat();
   
   private final TermVectorsFormat vectorsFormat = new Lucene3xTermVectorsFormat();
   
diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java
index 7737456d831..53e2f3efb44 100644
--- a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java
+++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSegmentInfosReader.java
@@ -66,7 +66,7 @@ public class Lucene3xSegmentInfosReader extends SegmentInfosReader {
         }
 
         try {
-          Lucene40StoredFieldsReader.checkCodeVersion(dir, si.getDocStoreSegment());
+          Lucene3xStoredFieldsReader.checkCodeVersion(dir, si.getDocStoreSegment());
         } finally {
           // If we opened the directory, close it
           if (dir != directory) dir.close();
diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsFormat.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsFormat.java
new file mode 100644
index 00000000000..dae458b95f3
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsFormat.java
@@ -0,0 +1,51 @@
+package org.apache.lucene.codecs.lucene3x;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Set;
+
+import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.StoredFieldsReader;
+import org.apache.lucene.codecs.StoredFieldsWriter;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+
+/** @deprecated */
+@Deprecated
+public class Lucene3xStoredFieldsFormat extends StoredFieldsFormat {
+
+  @Override
+  public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si,
+      FieldInfos fn, IOContext context) throws IOException {
+    return new Lucene3xStoredFieldsReader(directory, si, fn, context);
+  }
+
+  @Override
+  public StoredFieldsWriter fieldsWriter(Directory directory, String segment,
+      IOContext context) throws IOException {
+    throw new UnsupportedOperationException("this codec can only be used for reading");
+  }
+
+  @Override
+  public void files(SegmentInfo info, Set<String> files) throws IOException {
+    Lucene3xStoredFieldsReader.files(info, files);
+  }
+}
diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java
new file mode 100644
index 00000000000..b3bf47e3f67
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xStoredFieldsReader.java
@@ -0,0 +1,333 @@
+package org.apache.lucene.codecs.lucene3x;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.StoredFieldsReader;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexFormatTooNewException;
+import org.apache.lucene.index.IndexFormatTooOldException;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.StoredFieldVisitor;
+import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.IOUtils;
+
+import java.io.Closeable;
+import java.nio.charset.Charset;
+import java.util.Set;
+
+/**
+ * Class responsible for access to stored document fields.
+ * <p/>
+ * It uses &lt;segment&gt;.fdt and &lt;segment&gt;.fdx; files.
+ * 
+ * @deprecated
+ */
+@Deprecated
+public final class Lucene3xStoredFieldsReader extends StoredFieldsReader implements Cloneable, Closeable {
+  private final static int FORMAT_SIZE = 4;
+
+  /** Extension of stored fields file */
+  public static final String FIELDS_EXTENSION = "fdt";
+  
+  /** Extension of stored fields index file */
+  public static final String FIELDS_INDEX_EXTENSION = "fdx";
+  
+  // Lucene 3.0: Removal of compressed fields
+  static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2;
+
+  // Lucene 3.2: NumericFields are stored in binary format
+  static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3;
+
+  // NOTE: if you introduce a new format, make it 1 higher
+  // than the current one, and always change this if you
+  // switch to a new format!
+  public static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
+
+  // when removing support for old versions, leave the last supported version here
+  static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
+  
+  // NOTE: bit 0 is free here!  You can steal it!
+  public static final int FIELD_IS_BINARY = 1 << 1;
+
+  // the old bit 1 << 2 was compressed, is now left out
+
+  private static final int _NUMERIC_BIT_SHIFT = 3;
+  static final int FIELD_IS_NUMERIC_MASK = 0x07 << _NUMERIC_BIT_SHIFT;
+
+  public static final int FIELD_IS_NUMERIC_INT = 1 << _NUMERIC_BIT_SHIFT;
+  public static final int FIELD_IS_NUMERIC_LONG = 2 << _NUMERIC_BIT_SHIFT;
+  public static final int FIELD_IS_NUMERIC_FLOAT = 3 << _NUMERIC_BIT_SHIFT;
+  public static final int FIELD_IS_NUMERIC_DOUBLE = 4 << _NUMERIC_BIT_SHIFT;
+
+  private final FieldInfos fieldInfos;
+  private final IndexInput fieldsStream;
+  private final IndexInput indexStream;
+  private int numTotalDocs;
+  private int size;
+  private boolean closed;
+  private final int format;
+
+  // The docID offset where our docs begin in the index
+  // file.  This will be 0 if we have our own private file.
+  private int docStoreOffset;
+
+  /** Returns a cloned FieldsReader that shares open
+   *  IndexInputs with the original one.  It is the caller's
+   *  job not to close the original FieldsReader until all
+   *  clones are called (eg, currently SegmentReader manages
+   *  this logic). */
+  @Override
+  public Lucene3xStoredFieldsReader clone() {
+    ensureOpen();
+    return new Lucene3xStoredFieldsReader(fieldInfos, numTotalDocs, size, format, docStoreOffset, (IndexInput)fieldsStream.clone(), (IndexInput)indexStream.clone());
+  }
+
+  /** Verifies that the code version which wrote the segment is supported. */
+  public static void checkCodeVersion(Directory dir, String segment) throws IOException {
+    final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION);
+    IndexInput idxStream = dir.openInput(indexStreamFN, IOContext.DEFAULT);
+    
+    try {
+      int format = idxStream.readInt();
+      if (format < FORMAT_MINIMUM)
+        throw new IndexFormatTooOldException(idxStream, format, FORMAT_MINIMUM, FORMAT_CURRENT);
+      if (format > FORMAT_CURRENT)
+        throw new IndexFormatTooNewException(idxStream, format, FORMAT_MINIMUM, FORMAT_CURRENT);
+    } finally {
+      idxStream.close();
+    }
+  }
+  
+  // Used only by clone
+  private Lucene3xStoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset,
+                       IndexInput fieldsStream, IndexInput indexStream) {
+    this.fieldInfos = fieldInfos;
+    this.numTotalDocs = numTotalDocs;
+    this.size = size;
+    this.format = format;
+    this.docStoreOffset = docStoreOffset;
+    this.fieldsStream = fieldsStream;
+    this.indexStream = indexStream;
+  }
+
+  public Lucene3xStoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
+    final String segment = si.getDocStoreSegment();
+    final int docStoreOffset = si.getDocStoreOffset();
+    final int size = si.docCount;
+    boolean success = false;
+    fieldInfos = fn;
+    try {
+      fieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context);
+      final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION);
+      indexStream = d.openInput(indexStreamFN, context);
+      
+      format = indexStream.readInt();
+
+      if (format < FORMAT_MINIMUM)
+        throw new IndexFormatTooOldException(indexStream, format, FORMAT_MINIMUM, FORMAT_CURRENT);
+      if (format > FORMAT_CURRENT)
+        throw new IndexFormatTooNewException(indexStream, format, FORMAT_MINIMUM, FORMAT_CURRENT);
+
+      final long indexSize = indexStream.length() - FORMAT_SIZE;
+      
+      if (docStoreOffset != -1) {
+        // We read only a slice out of this shared fields file
+        this.docStoreOffset = docStoreOffset;
+        this.size = size;
+
+        // Verify the file is long enough to hold all of our
+        // docs
+        assert ((int) (indexSize / 8)) >= size + this.docStoreOffset: "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset;
+      } else {
+        this.docStoreOffset = 0;
+        this.size = (int) (indexSize >> 3);
+        // Verify two sources of "maxDoc" agree:
+        if (this.size != si.docCount) {
+          throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.size + " but segmentInfo shows " + si.docCount);
+        }
+      }
+      numTotalDocs = (int) (indexSize >> 3);
+      success = true;
+    } finally {
+      // With lock-less commits, it's entirely possible (and
+      // fine) to hit a FileNotFound exception above. In
+      // this case, we want to explicitly close any subset
+      // of things that were opened so that we don't have to
+      // wait for a GC to do so.
+      if (!success) {
+        close();
+      }
+    }
+  }
+
+  /**
+   * @throws AlreadyClosedException if this FieldsReader is closed
+   */
+  private void ensureOpen() throws AlreadyClosedException {
+    if (closed) {
+      throw new AlreadyClosedException("this FieldsReader is closed");
+    }
+  }
+
+  /**
+   * Closes the underlying {@link org.apache.lucene.store.IndexInput} streams.
+   * This means that the Fields values will not be accessible.
+   *
+   * @throws IOException
+   */
+  public final void close() throws IOException {
+    if (!closed) {
+      IOUtils.close(fieldsStream, indexStream);
+      closed = true;
+    }
+  }
+
+  public final int size() {
+    return size;
+  }
+
+  private void seekIndex(int docID) throws IOException {
+    indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L);
+  }
+
+  public final void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
+    seekIndex(n);
+    fieldsStream.seek(indexStream.readLong());
+
+    final int numFields = fieldsStream.readVInt();
+    for (int fieldIDX = 0; fieldIDX < numFields; fieldIDX++) {
+      int fieldNumber = fieldsStream.readVInt();
+      FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
+      
+      int bits = fieldsStream.readByte() & 0xFF;
+      assert bits <= (FIELD_IS_NUMERIC_MASK | FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
+
+      switch(visitor.needsField(fieldInfo)) {
+        case YES:
+          readField(visitor, fieldInfo, bits);
+          break;
+        case NO: 
+          skipField(bits);
+          break;
+        case STOP: 
+          return;
+      }
+    }
+  }
+
+  private void readField(StoredFieldVisitor visitor, FieldInfo info, int bits) throws IOException {
+    final int numeric = bits & FIELD_IS_NUMERIC_MASK;
+    if (numeric != 0) {
+      switch(numeric) {
+        case FIELD_IS_NUMERIC_INT:
+          visitor.intField(info, fieldsStream.readInt());
+          return;
+        case FIELD_IS_NUMERIC_LONG:
+          visitor.longField(info, fieldsStream.readLong());
+          return;
+        case FIELD_IS_NUMERIC_FLOAT:
+          visitor.floatField(info, Float.intBitsToFloat(fieldsStream.readInt()));
+          return;
+        case FIELD_IS_NUMERIC_DOUBLE:
+          visitor.doubleField(info, Double.longBitsToDouble(fieldsStream.readLong()));
+          return;
+        default:
+          throw new CorruptIndexException("Invalid numeric type: " + Integer.toHexString(numeric));
+      }
+    } else { 
+      final int length = fieldsStream.readVInt();
+      byte bytes[] = new byte[length];
+      fieldsStream.readBytes(bytes, 0, length);
+      if ((bits & FIELD_IS_BINARY) != 0) {
+        visitor.binaryField(info, bytes, 0, bytes.length);
+      } else {
+        visitor.stringField(info, new String(bytes, 0, bytes.length, IOUtils.CHARSET_UTF_8));
+      }
+    }
+  }
+  
+  private void skipField(int bits) throws IOException {
+    final int numeric = bits & FIELD_IS_NUMERIC_MASK;
+    if (numeric != 0) {
+      switch(numeric) {
+        case FIELD_IS_NUMERIC_INT:
+        case FIELD_IS_NUMERIC_FLOAT:
+          fieldsStream.readInt();
+          return;
+        case FIELD_IS_NUMERIC_LONG:
+        case FIELD_IS_NUMERIC_DOUBLE:
+          fieldsStream.readLong();
+          return;
+        default: 
+          throw new CorruptIndexException("Invalid numeric type: " + Integer.toHexString(numeric));
+      }
+    } else {
+      final int length = fieldsStream.readVInt();
+      fieldsStream.seek(fieldsStream.getFilePointer() + length);
+    }
+  }
+
+  /** Returns the length in bytes of each raw document in a
+   *  contiguous range of length numDocs starting with
+   *  startDocID.  Returns the IndexInput (the fieldStream),
+   *  already seeked to the starting point for startDocID.*/
+  public final IndexInput rawDocs(int[] lengths, int startDocID, int numDocs) throws IOException {
+    seekIndex(startDocID);
+    long startOffset = indexStream.readLong();
+    long lastOffset = startOffset;
+    int count = 0;
+    while (count < numDocs) {
+      final long offset;
+      final int docID = docStoreOffset + startDocID + count + 1;
+      assert docID <= numTotalDocs;
+      if (docID < numTotalDocs) 
+        offset = indexStream.readLong();
+      else
+        offset = fieldsStream.length();
+      lengths[count++] = (int) (offset-lastOffset);
+      lastOffset = offset;
+    }
+
+    fieldsStream.seek(startOffset);
+
+    return fieldsStream;
+  }
+  
+  // TODO: split into PreFlexFieldsReader so it can handle this shared docstore crap?
+  // only preflex segments refer to these?
+  public static void files(SegmentInfo info, Set<String> files) throws IOException {
+    if (info.getDocStoreOffset() != -1) {
+      assert info.getDocStoreSegment() != null;
+      if (!info.getDocStoreIsCompoundFile()) {
+        files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", FIELDS_INDEX_EXTENSION));
+        files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", FIELDS_EXTENSION));
+      }
+    } else {
+      files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_INDEX_EXTENSION));
+      files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_EXTENSION));
+    }
+  }
+}
diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java
index ca9991b3afa..a7457404333 100644
--- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java
+++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java
@@ -24,8 +24,6 @@ import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.IndexFormatTooNewException;
-import org.apache.lucene.index.IndexFormatTooOldException;
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.StoredFieldVisitor;
 import org.apache.lucene.store.AlreadyClosedException;
@@ -35,7 +33,6 @@ import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.IOUtils;
 
 import java.io.Closeable;
-import java.nio.charset.Charset;
 import java.util.Set;
 
 /**
@@ -54,11 +51,6 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
   private int numTotalDocs;
   private int size;
   private boolean closed;
-  private final int format;
-
-  // The docID offset where our docs begin in the index
-  // file.  This will be 0 if we have our own private file.
-  private int docStoreOffset;
 
   /** Returns a cloned FieldsReader that shares open
    *  IndexInputs with the original one.  It is the caller's
@@ -68,41 +60,20 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
   @Override
   public Lucene40StoredFieldsReader clone() {
     ensureOpen();
-    return new Lucene40StoredFieldsReader(fieldInfos, numTotalDocs, size, format, docStoreOffset, (IndexInput)fieldsStream.clone(), (IndexInput)indexStream.clone());
-  }
-
-  /** Verifies that the code version which wrote the segment is supported. */
-  public static void checkCodeVersion(Directory dir, String segment) throws IOException {
-    final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);
-    IndexInput idxStream = dir.openInput(indexStreamFN, IOContext.DEFAULT);
-    
-    try {
-      int format = idxStream.readInt();
-      if (format < Lucene40StoredFieldsWriter.FORMAT_MINIMUM)
-        throw new IndexFormatTooOldException(idxStream, format, Lucene40StoredFieldsWriter.FORMAT_MINIMUM, Lucene40StoredFieldsWriter.FORMAT_CURRENT);
-      if (format > Lucene40StoredFieldsWriter.FORMAT_CURRENT)
-        throw new IndexFormatTooNewException(idxStream, format, Lucene40StoredFieldsWriter.FORMAT_MINIMUM, Lucene40StoredFieldsWriter.FORMAT_CURRENT);
-    } finally {
-      idxStream.close();
-    }
+    return new Lucene40StoredFieldsReader(fieldInfos, numTotalDocs, size, (IndexInput)fieldsStream.clone(), (IndexInput)indexStream.clone());
   }
   
   // Used only by clone
-  private Lucene40StoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset,
-                       IndexInput fieldsStream, IndexInput indexStream) {
+  private Lucene40StoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, IndexInput fieldsStream, IndexInput indexStream) {
     this.fieldInfos = fieldInfos;
     this.numTotalDocs = numTotalDocs;
     this.size = size;
-    this.format = format;
-    this.docStoreOffset = docStoreOffset;
     this.fieldsStream = fieldsStream;
     this.indexStream = indexStream;
   }
 
   public Lucene40StoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
-    final String segment = si.getDocStoreSegment();
-    final int docStoreOffset = si.getDocStoreOffset();
-    final int size = si.docCount;
+    final String segment = si.name;
     boolean success = false;
     fieldInfos = fn;
     try {
@@ -110,30 +81,17 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
       final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);
       indexStream = d.openInput(indexStreamFN, context);
       
-      format = indexStream.readInt();
-
-      if (format < Lucene40StoredFieldsWriter.FORMAT_MINIMUM)
-        throw new IndexFormatTooOldException(indexStream, format, Lucene40StoredFieldsWriter.FORMAT_MINIMUM, Lucene40StoredFieldsWriter.FORMAT_CURRENT);
-      if (format > Lucene40StoredFieldsWriter.FORMAT_CURRENT)
-        throw new IndexFormatTooNewException(indexStream, format, Lucene40StoredFieldsWriter.FORMAT_MINIMUM, Lucene40StoredFieldsWriter.FORMAT_CURRENT);
+      // its a 4.0 codec: so its not too-old, its corrupt.
+      // TODO: change this to CodecUtil.checkHeader
+      if (Lucene40StoredFieldsWriter.FORMAT_CURRENT != indexStream.readInt()) {
+        throw new CorruptIndexException("unexpected fdx header: " + indexStream);
+      }
 
       final long indexSize = indexStream.length() - FORMAT_SIZE;
-      
-      if (docStoreOffset != -1) {
-        // We read only a slice out of this shared fields file
-        this.docStoreOffset = docStoreOffset;
-        this.size = size;
-
-        // Verify the file is long enough to hold all of our
-        // docs
-        assert ((int) (indexSize / 8)) >= size + this.docStoreOffset: "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset;
-      } else {
-        this.docStoreOffset = 0;
-        this.size = (int) (indexSize >> 3);
-        // Verify two sources of "maxDoc" agree:
-        if (this.size != si.docCount) {
-          throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.size + " but segmentInfo shows " + si.docCount);
-        }
+      this.size = (int) (indexSize >> 3);
+      // Verify two sources of "maxDoc" agree:
+      if (this.size != si.docCount) {
+        throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.size + " but segmentInfo shows " + si.docCount);
       }
       numTotalDocs = (int) (indexSize >> 3);
       success = true;
@@ -176,7 +134,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
   }
 
   private void seekIndex(int docID) throws IOException {
-    indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L);
+    indexStream.seek(FORMAT_SIZE + docID * 8L);
   }
 
   public final void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
@@ -203,8 +161,6 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
       }
     }
   }
-  
-  static final Charset UTF8 = Charset.forName("UTF-8");
 
   private void readField(StoredFieldVisitor visitor, FieldInfo info, int bits) throws IOException {
     final int numeric = bits & Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK;
@@ -232,7 +188,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
       if ((bits & Lucene40StoredFieldsWriter.FIELD_IS_BINARY) != 0) {
         visitor.binaryField(info, bytes, 0, bytes.length);
       } else {
-        visitor.stringField(info, new String(bytes, 0, bytes.length, UTF8));
+        visitor.stringField(info, new String(bytes, 0, bytes.length, IOUtils.CHARSET_UTF_8));
       }
     }
   }
@@ -269,7 +225,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
     int count = 0;
     while (count < numDocs) {
       final long offset;
-      final int docID = docStoreOffset + startDocID + count + 1;
+      final int docID = startDocID + count + 1;
       assert docID <= numTotalDocs;
       if (docID < numTotalDocs) 
         offset = indexStream.readLong();
@@ -283,19 +239,9 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
 
     return fieldsStream;
   }
-  
-  // TODO: split into PreFlexFieldsReader so it can handle this shared docstore crap?
-  // only preflex segments refer to these?
+
   public static void files(SegmentInfo info, Set<String> files) throws IOException {
-    if (info.getDocStoreOffset() != -1) {
-      assert info.getDocStoreSegment() != null;
-      if (!info.getDocStoreIsCompoundFile()) {
-        files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION));
-        files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION));
-      }
-    } else {
-      files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION));
-      files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION));
-    }
+    files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION));
+    files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION));
   }
 }
diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
index 952a92c0a24..868864833cf 100644
--- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
+++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
@@ -55,10 +55,7 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
   // currently unused: static final int FIELD_IS_NUMERIC_SHORT = 5 << _NUMERIC_BIT_SHIFT;
   // currently unused: static final int FIELD_IS_NUMERIC_BYTE = 6 << _NUMERIC_BIT_SHIFT;
 
-  // Lucene 3.0: Removal of compressed fields
-  static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2;
-
-  // Lucene 3.2: NumericFields are stored in binary format
+  // (Happens to be the same as for now) Lucene 3.2: NumericFields are stored in binary format
   static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3;
 
   // NOTE: if you introduce a new format, make it 1 higher
@@ -67,7 +64,7 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
   static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
 
   // when removing support for old versions, leave the last supported version here
-  static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
+  static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
 
   /** Extension of stored fields file */
   public static final String FIELDS_EXTENSION = "fdt";
diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java
index d946ad72c77..2013d91167c 100644
--- a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java
+++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java
@@ -45,10 +45,9 @@ public class PreFlexRWCodec extends Lucene3xCodec {
   private final FieldInfosFormat fieldInfos = new PreFlexRWFieldInfosFormat();
   private final TermVectorsFormat termVectors = new PreFlexRWTermVectorsFormat();
   private final SegmentInfosFormat segmentInfos = new PreFlexRWSegmentInfosFormat();
+  private final StoredFieldsFormat storedFields = new PreFlexRWStoredFieldsFormat();
   // TODO: this should really be a different impl
   private final LiveDocsFormat liveDocs = new Lucene40LiveDocsFormat();
-  // TODO: this should really be a different impl
-  private final StoredFieldsFormat storedFields = new Lucene40StoredFieldsFormat();
   
   @Override
   public PostingsFormat postingsFormat() {
diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsFormat.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsFormat.java
new file mode 100644
index 00000000000..76b1a224a46
--- /dev/null
+++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsFormat.java
@@ -0,0 +1,17 @@
+package org.apache.lucene.codecs.preflexrw;
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.StoredFieldsWriter;
+import org.apache.lucene.codecs.lucene3x.Lucene3xStoredFieldsFormat;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+
+public class PreFlexRWStoredFieldsFormat extends Lucene3xStoredFieldsFormat {
+
+  @Override
+  public StoredFieldsWriter fieldsWriter(Directory directory, String segment, IOContext context) throws IOException {
+    return new PreFlexRWStoredFieldsWriter(directory, segment, context);
+  }
+  
+}
diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsWriter.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsWriter.java
new file mode 100644
index 00000000000..7888b792c9a
--- /dev/null
+++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWStoredFieldsWriter.java
@@ -0,0 +1,156 @@
+package org.apache.lucene.codecs.preflexrw;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.StoredFieldsWriter;
+import org.apache.lucene.codecs.lucene3x.Lucene3xStoredFieldsReader;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+
+/** @lucene.experimental */
+public final class PreFlexRWStoredFieldsWriter extends StoredFieldsWriter {
+  private final Directory directory;
+  private final String segment;
+  private IndexOutput fieldsStream;
+  private IndexOutput indexStream;
+
+  public PreFlexRWStoredFieldsWriter(Directory directory, String segment, IOContext context) throws IOException {
+    assert directory != null;
+    this.directory = directory;
+    this.segment = segment;
+
+    boolean success = false;
+    try {
+      fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION), context);
+      indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION), context);
+
+      fieldsStream.writeInt(Lucene3xStoredFieldsReader.FORMAT_CURRENT);
+      indexStream.writeInt(Lucene3xStoredFieldsReader.FORMAT_CURRENT);
+
+      success = true;
+    } finally {
+      if (!success) {
+        abort();
+      }
+    }
+  }
+
+  // Writes the contents of buffer into the fields stream
+  // and adds a new entry for this document into the index
+  // stream.  This assumes the buffer was already written
+  // in the correct fields format.
+  public void startDocument(int numStoredFields) throws IOException {
+    indexStream.writeLong(fieldsStream.getFilePointer());
+    fieldsStream.writeVInt(numStoredFields);
+  }
+
+  public void close() throws IOException {
+    try {
+      IOUtils.close(fieldsStream, indexStream);
+    } finally {
+      fieldsStream = indexStream = null;
+    }
+  }
+
+  public void abort() {
+    try {
+      close();
+    } catch (IOException ignored) {}
+    IOUtils.deleteFilesIgnoringExceptions(directory,
+        IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION),
+        IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION));
+  }
+
+  public void writeField(FieldInfo info, IndexableField field) throws IOException {
+    fieldsStream.writeVInt(info.number);
+    int bits = 0;
+    final BytesRef bytes;
+    final String string;
+    // TODO: maybe a field should serialize itself?
+    // this way we don't bake into indexer all these
+    // specific encodings for different fields?  and apps
+    // can customize...
+
+    Number number = field.numericValue();
+    if (number != null) {
+      if (number instanceof Byte || number instanceof Short || number instanceof Integer) {
+        bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_INT;
+      } else if (number instanceof Long) {
+        bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_LONG;
+      } else if (number instanceof Float) {
+        bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_FLOAT;
+      } else if (number instanceof Double) {
+        bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_DOUBLE;
+      } else {
+        throw new IllegalArgumentException("cannot store numeric type " + number.getClass());
+      }
+      string = null;
+      bytes = null;
+    } else {
+      bytes = field.binaryValue();
+      if (bytes != null) {
+        bits |= Lucene3xStoredFieldsReader.FIELD_IS_BINARY;
+        string = null;
+      } else {
+        string = field.stringValue();
+        if (string == null) {
+          throw new IllegalArgumentException("field " + field.name() + " is stored but does not have binaryValue, stringValue nor numericValue");
+        }
+      }
+    }
+
+    fieldsStream.writeByte((byte) bits);
+
+    if (bytes != null) {
+      fieldsStream.writeVInt(bytes.length);
+      fieldsStream.writeBytes(bytes.bytes, bytes.offset, bytes.length);
+    } else if (string != null) {
+      fieldsStream.writeString(field.stringValue());
+    } else {
+      if (number instanceof Byte || number instanceof Short || number instanceof Integer) {
+        fieldsStream.writeInt(number.intValue());
+      } else if (number instanceof Long) {
+        fieldsStream.writeLong(number.longValue());
+      } else if (number instanceof Float) {
+        fieldsStream.writeInt(Float.floatToIntBits(number.floatValue()));
+      } else if (number instanceof Double) {
+        fieldsStream.writeLong(Double.doubleToLongBits(number.doubleValue()));
+      } else {
+        assert false;
+      }
+    }
+  }
+
+  @Override
+  public void finish(int numDocs) throws IOException {
+    if (4+((long) numDocs)*8 != indexStream.getFilePointer())
+      // This is most likely a bug in Sun JRE 1.6.0_04/_05;
+      // we detect that the bug has struck, here, and
+      // throw an exception to prevent the corruption from
+      // entering the index.  See LUCENE-1282 for
+      // details.
+      throw new RuntimeException("fdx size mismatch: docCount is " + numDocs + " but fdx file size is " + indexStream.getFilePointer() + " file=" + indexStream.toString() + "; now aborting this merge to prevent index corruption");
+  }
+}