LUCENE-843: speed up IndexWriter performance

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@553236 13f79535-47bb-0310-9956-ffa450edef68
2007-07-04 15:16:38 +00:00 · 2007-07-04 15:16:38 +00:00 · cff5767e44
parent 4fe90e4086
commit cff5767e44
36 changed files with 4755 additions and 711 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -105,6 +105,13 @@ API Changes
    to be public because it implements the public interface TermPositionVector.
    (Michael Busch)

+14. LUCENE-843: Added IndexWriter.setRAMBufferSizeMB(...) to have
+    IndexWriter flush whenever the buffered documents are using more
+    than the specified amount of RAM.  Also added new APIs to Token
+    that allow one to set a char[] plus offset and length to specify a
+    token (to avoid creating a new String() for each Token).  (Mike
+    McCandless)
+ 
 Bug fixes

 1. LUCENE-804: Fixed build.xml to pack a fully compilable src dist.  (Doron Cohen)
@ -267,6 +274,12 @@ Optimizations
    lists. For average AND queries the speedup is about 20%, for queries that 
    contain very frequent and very unique terms the speedup can be over 80%.
    (Michael Busch)
+
+ 8. LUCENE-843: Substantial optimizations to improve how IndexWriter
+    uses RAM for buffering documents and to speed up indexing (2X-8X
+    faster).  A single shared hash table now records the in-memory
+    postings per unique term and is directly flushed into a single
+    segment.  (Mike McCandless)
 
 Documentation

--- a/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java
+++ b/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java
@ -77,6 +77,7 @@ public class TestParser extends TestCase {
 				line=d.readLine();
 			}			
 			d.close();
+                        writer.close();
 		}
 		reader=IndexReader.open(dir);
 		searcher=new IndexSearcher(reader);
--- a/docs/fileformats.html
+++ b/docs/fileformats.html
@ -380,10 +380,18 @@ document.write("Last Published: " + document.lastModified);
                But note that once a commit has occurred, pre-2.1
                Lucene will not be able to read the index.
            </p>
+<p>
+                In version 2.3, the file format was changed to allow
+		segments to share a single set of doc store (vectors &amp;
+		stored fields) files.  This allows for faster indexing
+		in certain cases.  The change is fully backwards
+		compatible (in the same way as the lock-less commits
+		change in 2.1).
+            </p>
 </div>

        
-<a name="N10032"></a><a name="Definitions"></a>
+<a name="N10035"></a><a name="Definitions"></a>
 <h2 class="boxed">Definitions</h2>
 <div class="section">
 <p>
@ -424,7 +432,7 @@ document.write("Last Published: " + document.lastModified);
                strings, the first naming the field, and the second naming text
                within the field.
            </p>
-<a name="N10052"></a><a name="Inverted Indexing"></a>
+<a name="N10055"></a><a name="Inverted Indexing"></a>
 <h3 class="boxed">Inverted Indexing</h3>
 <p>
                    The index stores statistics about terms in order
@ -434,7 +442,7 @@ document.write("Last Published: " + document.lastModified);
                    it.  This is the inverse of the natural relationship, in which
                    documents list terms.
                </p>
-<a name="N1005E"></a><a name="Types of Fields"></a>
+<a name="N10061"></a><a name="Types of Fields"></a>
 <h3 class="boxed">Types of Fields</h3>
 <p>
                    In Lucene, fields may be <i>stored</i>, in which
@ -448,7 +456,7 @@ document.write("Last Published: " + document.lastModified);
                    to be indexed literally.
                </p>
 <p>See the <a href="http://lucene.apache.org/java/docs/api/org/apache/lucene/document/Field.html">Field</a> java docs for more information on Fields.</p>
-<a name="N1007B"></a><a name="Segments"></a>
+<a name="N1007E"></a><a name="Segments"></a>
 <h3 class="boxed">Segments</h3>
 <p>
                    Lucene indexes may be composed of multiple sub-indexes, or
@ -474,7 +482,7 @@ document.write("Last Published: " + document.lastModified);
                    Searches may involve multiple segments and/or multiple indexes, each
                    index potentially composed of a set of segments.
                </p>
-<a name="N10099"></a><a name="Document Numbers"></a>
+<a name="N1009C"></a><a name="Document Numbers"></a>
 <h3 class="boxed">Document Numbers</h3>
 <p>
                    Internally, Lucene refers to documents by an integer <i>document
@ -529,7 +537,7 @@ document.write("Last Published: " + document.lastModified);
 </div>

        
-<a name="N100C0"></a><a name="Overview"></a>
+<a name="N100C3"></a><a name="Overview"></a>
 <h2 class="boxed">Overview</h2>
 <div class="section">
 <p>
@ -626,7 +634,7 @@ document.write("Last Published: " + document.lastModified);
 </div>

        
-<a name="N10103"></a><a name="File Naming"></a>
+<a name="N10106"></a><a name="File Naming"></a>
 <h2 class="boxed">File Naming</h2>
 <div class="section">
 <p>
@ -654,10 +662,10 @@ document.write("Last Published: " + document.lastModified);
 </div>

        
-<a name="N10112"></a><a name="Primitive Types"></a>
+<a name="N10115"></a><a name="Primitive Types"></a>
 <h2 class="boxed">Primitive Types</h2>
 <div class="section">
-<a name="N10117"></a><a name="Byte"></a>
+<a name="N1011A"></a><a name="Byte"></a>
 <h3 class="boxed">Byte</h3>
 <p>
                    The most primitive type
@ -665,7 +673,7 @@ document.write("Last Published: " + document.lastModified);
                    other data types are defined as sequences
                    of bytes, so file formats are byte-order independent.
                </p>
-<a name="N10120"></a><a name="UInt32"></a>
+<a name="N10123"></a><a name="UInt32"></a>
 <h3 class="boxed">UInt32</h3>
 <p>
                    32-bit unsigned integers are written as four
@ -675,7 +683,7 @@ document.write("Last Published: " + document.lastModified);
                    UInt32    --&gt; &lt;Byte&gt;<sup>4</sup>
                
 </p>
-<a name="N1012F"></a><a name="Uint64"></a>
+<a name="N10132"></a><a name="Uint64"></a>
 <h3 class="boxed">Uint64</h3>
 <p>
                    64-bit unsigned integers are written as eight
@ -684,7 +692,7 @@ document.write("Last Published: " + document.lastModified);
 <p>UInt64    --&gt; &lt;Byte&gt;<sup>8</sup>
                
 </p>
-<a name="N1013E"></a><a name="VInt"></a>
+<a name="N10141"></a><a name="VInt"></a>
 <h3 class="boxed">VInt</h3>
 <p>
                    A variable-length format for positive integers is
@ -1234,7 +1242,7 @@ document.write("Last Published: " + document.lastModified);
                    This provides compression while still being
                    efficient to decode.
                </p>
-<a name="N10423"></a><a name="Chars"></a>
+<a name="N10426"></a><a name="Chars"></a>
 <h3 class="boxed">Chars</h3>
 <p>
                    Lucene writes unicode
@ -1243,7 +1251,7 @@ document.write("Last Published: " + document.lastModified);
                        UTF-8 encoding"</a>
                    .
                </p>
-<a name="N10430"></a><a name="String"></a>
+<a name="N10433"></a><a name="String"></a>
 <h3 class="boxed">String</h3>
 <p>
                    Lucene writes strings as a VInt representing the length, followed by
@ -1255,13 +1263,13 @@ document.write("Last Published: " + document.lastModified);
 </div>

        
-<a name="N1043D"></a><a name="Per-Index Files"></a>
+<a name="N10440"></a><a name="Per-Index Files"></a>
 <h2 class="boxed">Per-Index Files</h2>
 <div class="section">
 <p>
                The files in this section exist one-per-index.
            </p>
-<a name="N10445"></a><a name="Segments File"></a>
+<a name="N10448"></a><a name="Segments File"></a>
 <h3 class="boxed">Segments File</h3>
 <p>
                    The active segments in the index are stored in the
@ -1316,16 +1324,24 @@ document.write("Last Published: " + document.lastModified);
                
 </p>
 <p>
-                    Format, NameCounter, SegCount, SegSize, NumField --&gt; Int32
+                    
+<b>2.3 and above:</b>
+                    Segments --&gt; Format, Version, NameCounter, SegCount, &lt;SegName, SegSize, DelGen, DocStoreOffset, [DocStoreSegment, DocStoreIsCompoundFile], HasSingleNormFile, NumField,
+                    NormGen<sup>NumField</sup>,
+                    IsCompoundFile&gt;<sup>SegCount</sup>
+                
+</p>
+<p>
+                    Format, NameCounter, SegCount, SegSize, NumField, DocStoreOffset --&gt; Int32
                </p>
 <p>
                    Version, DelGen, NormGen --&gt; Int64
                </p>
 <p>
-                    SegName --&gt; String
+                    SegName, DocStoreSegment --&gt; String
                </p>
 <p>
-                    IsCompoundFile, HasSingleNormFile --&gt; Int8
+                    IsCompoundFile, HasSingleNormFile, DocStoreIsCompoundFile --&gt; Int8
                </p>
 <p>
                    Format is -1 as of Lucene 1.4 and -3 (SemgentInfos.FORMAT_SINGLE_NORM_FILE) as of Lucene 2.1.
@ -1380,7 +1396,28 @@ document.write("Last Published: " + document.lastModified);
                    are stored as separate <tt>.fN</tt> files.  See
                    "Normalization Factors" below for details.
                </p>
-<a name="N104A9"></a><a name="Lock File"></a>
+<p>
+		    DocStoreOffset, DocStoreSegment,
+                    DocStoreIsCompoundFile: If DocStoreOffset is -1,
+                    this segment has its own doc store (stored fields
+                    values and term vectors) files and DocStoreSegment
+                    and DocStoreIsCompoundFile are not stored.  In
+                    this case all files for stored field values
+                    (<tt>*.fdt</tt> and <tt>*.fdx</tt>) and term
+                    vectors (<tt>*.tvf</tt>, <tt>*.tvd</tt> and
+                    <tt>*.tvx</tt>) will be stored with this segment.
+                    Otherwise, DocStoreSegment is the name of the
+                    segment that has the shared doc store files;
+                    DocStoreIsCompoundFile is 1 if that segment is
+                    stored in compound file format (as a <tt>.cfx</tt>
+                    file); and DocStoreOffset is the starting document
+                    in the shared doc store files where this segment's
+                    documents begin.  In this case, this segment does
+                    not store its own doc store files but instead
+                    shares a single set of these files with other
+                    segments.
+                </p>
+<a name="N104CD"></a><a name="Lock File"></a>
 <h3 class="boxed">Lock File</h3>
 <p>
                    The write lock, which is stored in the index
@ -1398,7 +1435,7 @@ document.write("Last Published: " + document.lastModified);
                    Note that prior to version 2.1, Lucene also used a
                    commit lock. This was removed in 2.1.
                </p>
-<a name="N104B5"></a><a name="Deletable File"></a>
+<a name="N104D9"></a><a name="Deletable File"></a>
 <h3 class="boxed">Deletable File</h3>
 <p>
                    Prior to Lucene 2.1 there was a file "deletable"
@ -1407,7 +1444,7 @@ document.write("Last Published: " + document.lastModified);
                    the files that are deletable, instead, so no file
                    is written.
                </p>
-<a name="N104BE"></a><a name="Compound Files"></a>
+<a name="N104E2"></a><a name="Compound Files"></a>
 <h3 class="boxed">Compound Files</h3>
 <p>Starting with Lucene 1.4 the compound file format became default. This
                    is simply a container for all files described in the next section
@ -1424,17 +1461,24 @@ document.write("Last Published: " + document.lastModified);
 <p>FileName --&gt; String</p>
 <p>FileData --&gt; raw file data</p>
 <p>The raw file data is the data from the individual files named above.</p>
+<p>Starting with Lucene 2.3, doc store files (stored
+		field values and term vectors) can be shared in a
+		single set of files for more than one segment.  When
+		compound file is enabled, these shared files will be
+		added into a single compound file (same format as
+		above) but with the extension <tt>.cfx</tt>.
+		</p>
 </div>

        
-<a name="N104E0"></a><a name="Per-Segment Files"></a>
+<a name="N1050A"></a><a name="Per-Segment Files"></a>
 <h2 class="boxed">Per-Segment Files</h2>
 <div class="section">
 <p>
                The remaining files are all per-segment, and are
                thus defined by suffix.
            </p>
-<a name="N104E8"></a><a name="Fields"></a>
+<a name="N10512"></a><a name="Fields"></a>
 <h3 class="boxed">Fields</h3>
 <p>
                    
@ -1653,7 +1697,7 @@ document.write("Last Published: " + document.lastModified);
 </li>
                
 </ol>
-<a name="N105A3"></a><a name="Term Dictionary"></a>
+<a name="N105CD"></a><a name="Term Dictionary"></a>
 <h3 class="boxed">Term Dictionary</h3>
 <p>
                    The term dictionary is represented as two files:
@ -1839,7 +1883,7 @@ document.write("Last Published: " + document.lastModified);
 </li>
                
 </ol>
-<a name="N10623"></a><a name="Frequencies"></a>
+<a name="N1064D"></a><a name="Frequencies"></a>
 <h3 class="boxed">Frequencies</h3>
 <p>
                    The .frq file contains the lists of documents
@ -1957,7 +2001,7 @@ document.write("Last Published: " + document.lastModified);
                   entry in level-1. In the example has entry 15 on level 1 a pointer to entry 15 on level 0 and entry 31 on level 1 a pointer
                   to entry 31 on level 0.                   
                </p>
-<a name="N106A5"></a><a name="Positions"></a>
+<a name="N106CF"></a><a name="Positions"></a>
 <h3 class="boxed">Positions</h3>
 <p>
                    The .prx file contains the lists of positions that
@ -2023,7 +2067,7 @@ document.write("Last Published: " + document.lastModified);
                    Payload. If PayloadLength is not stored, then this Payload has the same
                    length as the Payload at the previous position.
                </p>
-<a name="N106E1"></a><a name="Normalization Factors"></a>
+<a name="N1070B"></a><a name="Normalization Factors"></a>
 <h3 class="boxed">Normalization Factors</h3>
 <p>
                    
@ -2127,7 +2171,7 @@ document.write("Last Published: " + document.lastModified);
 <b>2.1 and above:</b>
                    Separate norm files are created (when adequate) for both compound and non compound segments.
                </p>
-<a name="N1074A"></a><a name="Term Vectors"></a>
+<a name="N10774"></a><a name="Term Vectors"></a>
 <h3 class="boxed">Term Vectors</h3>
 <ol>
                    
@ -2253,7 +2297,7 @@ document.write("Last Published: " + document.lastModified);
 </li>
                
 </ol>
-<a name="N107DD"></a><a name="Deleted Documents"></a>
+<a name="N10807"></a><a name="Deleted Documents"></a>
 <h3 class="boxed">Deleted Documents</h3>
 <p>The .del file is
                    optional, and only exists when a segment contains deletions.
@ -2325,7 +2369,7 @@ document.write("Last Published: " + document.lastModified);
 </div>

        
-<a name="N10820"></a><a name="Limitations"></a>
+<a name="N1084A"></a><a name="Limitations"></a>
 <h2 class="boxed">Limitations</h2>
 <div class="section">
 <p>There
--- a/docs/fileformats.pdf
+++ b/docs/fileformats.pdf
--- a/src/java/org/apache/lucene/analysis/Token.java
+++ b/src/java/org/apache/lucene/analysis/Token.java
@ -55,6 +55,13 @@ public class Token implements Cloneable {
  
  Payload payload;
  
+  // For better indexing speed, use termBuffer (and
+  // termBufferOffset/termBufferLength) instead of termText
+  // to save new'ing a String per token
+  char[] termBuffer;
+  int termBufferOffset;
+  int termBufferLength;
+
  private int positionIncrement = 1;

  /** Constructs a Token with the given term text, and start & end offsets.
@ -65,6 +72,17 @@ public class Token implements Cloneable {
    endOffset = end;
  }

+  /** Constructs a Token with the given term text buffer
+   *  starting at offset for length lenth, and start & end offsets.
+   *  The type defaults to "word." */
+  public Token(char[] text, int offset, int length, int start, int end) {
+    termBuffer = text;
+    termBufferOffset = offset;
+    termBufferLength = length;
+    startOffset = start;
+    endOffset = end;
+  }
+
  /** Constructs a Token with the given text, start and end offsets, & type. */
  public Token(String text, int start, int end, String typ) {
    termText = text;
@ -73,6 +91,19 @@ public class Token implements Cloneable {
    type = typ;
  }

+  /** Constructs a Token with the given term text buffer
+   *  starting at offset for length lenth, and start & end
+   *  offsets, & type. */
+  public Token(char[] text, int offset, int length, int start, int end, String typ) {
+    termBuffer = text;
+    termBufferOffset = offset;
+    termBufferLength = length;
+    startOffset = start;
+    endOffset = end;
+    type = typ;
+  }
+
+
  /** Set the position increment.  This determines the position of this token
   * relative to the previous Token in a {@link TokenStream}, used in phrase
   * searching.
@ -117,6 +148,19 @@ public class Token implements Cloneable {

  /** Returns the Token's term text. */
  public final String termText() { return termText; }
+  public final char[] termBuffer() { return termBuffer; }
+  public final int termBufferOffset() { return termBufferOffset; }
+  public final int termBufferLength() { return termBufferLength; }
+
+  public void setStartOffset(int offset) {this.startOffset = offset;}
+  public void setEndOffset(int offset) {this.endOffset = offset;}
+
+  public final void setTermBuffer(char[] buffer, int offset, int length) {
+    this.termBuffer = buffer;
+    this.termBufferOffset = offset;
+    this.termBufferLength = length;
+  }
+    

  /** Returns this Token's starting offset, the position of the first character
    corresponding to this token in the source text.
--- a/src/java/org/apache/lucene/index/DocumentsWriter.java
+++ b/src/java/org/apache/lucene/index/DocumentsWriter.java
--- a/src/java/org/apache/lucene/index/FieldInfo.java
+++ b/src/java/org/apache/lucene/index/FieldInfo.java
@ -43,4 +43,9 @@ final class FieldInfo {
    this.omitNorms = omitNorms;
    this.storePayloads = storePayloads;
  }
+
+  public Object clone() {
+    return new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector,
+                         storeOffsetWithTermVector, omitNorms, storePayloads);
+  }
 }
--- a/src/java/org/apache/lucene/index/FieldInfos.java
+++ b/src/java/org/apache/lucene/index/FieldInfos.java
@ -62,6 +62,20 @@ final class FieldInfos {
    }
  }

+  /**
+   * Returns a deep clone of this FieldInfos instance.
+   */
+  public Object clone() {
+    FieldInfos fis = new FieldInfos();
+    final int numField = byNumber.size();
+    for(int i=0;i<numField;i++) {
+      FieldInfo fi = (FieldInfo) ((FieldInfo) byNumber.get(i)).clone();
+      fis.byNumber.add(fi);
+      fis.byName.put(fi.name, fi);
+    }
+    return fis;
+  }
+
  /** Adds field info for a Document. */
  public void add(Document doc) {
    List fields = doc.getFields();
--- a/src/java/org/apache/lucene/index/FieldsReader.java
+++ b/src/java/org/apache/lucene/index/FieldsReader.java
@ -51,19 +51,39 @@ final class FieldsReader {
  private int size;
  private boolean closed;

+  // The docID offset where our docs begin in the index
+  // file.  This will be 0 if we have our own private file.
+  private int docStoreOffset;
+
  private ThreadLocal fieldsStreamTL = new ThreadLocal();

  FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
-    this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE);
+    this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE, -1, 0);
  }

  FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize) throws IOException {
+    this(d, segment, fn, readBufferSize, -1, 0);
+  }
+
+  FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) throws IOException {
    fieldInfos = fn;

    cloneableFieldsStream = d.openInput(segment + ".fdt", readBufferSize);
    fieldsStream = (IndexInput)cloneableFieldsStream.clone();
    indexStream = d.openInput(segment + ".fdx", readBufferSize);
-    size = (int) (indexStream.length() / 8);
+
+    if (docStoreOffset != -1) {
+      // We read only a slice out of this shared fields file
+      this.docStoreOffset = docStoreOffset;
+      this.size = size;
+
+      // Verify the file is long enough to hold all of our
+      // docs
+      assert ((int) (indexStream.length()/8)) >= size + this.docStoreOffset;
+    } else {
+      this.docStoreOffset = 0;
+      this.size = (int) (indexStream.length() / 8);
+    }
  }

  /**
@ -100,7 +120,7 @@ final class FieldsReader {
  }

  final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
-    indexStream.seek(n * 8L);
+    indexStream.seek((n + docStoreOffset) * 8L);
    long position = indexStream.readLong();
    fieldsStream.seek(position);

--- a/src/java/org/apache/lucene/index/FieldsWriter.java
+++ b/src/java/org/apache/lucene/index/FieldsWriter.java
@ -24,6 +24,7 @@ import java.util.zip.Deflater;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.store.IndexOutput;

 final class FieldsWriter
@ -38,15 +39,92 @@ final class FieldsWriter

    private IndexOutput indexStream;

+    private boolean doClose;
+
    FieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException {
        fieldInfos = fn;
        fieldsStream = d.createOutput(segment + ".fdt");
        indexStream = d.createOutput(segment + ".fdx");
+        doClose = true;
+    }
+
+    FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) throws IOException {
+        fieldInfos = fn;
+        fieldsStream = fdt;
+        indexStream = fdx;
+        doClose = false;
+    }
+
+    // Writes the contents of buffer into the fields stream
+    // and adds a new entry for this document into the index
+    // stream.  This assumes the buffer was already written
+    // in the correct fields format.
+    void flushDocument(RAMOutputStream buffer) throws IOException {
+      indexStream.writeLong(fieldsStream.getFilePointer());
+      buffer.writeTo(fieldsStream);
+    }
+
+    void flush() throws IOException {
+      indexStream.flush();
+      fieldsStream.flush();
    }

    final void close() throws IOException {
+      if (doClose) {
        fieldsStream.close();
        indexStream.close();
+      }
+    }
+
+    final void writeField(FieldInfo fi, Fieldable field) throws IOException {
+      // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
+      // and field.binaryValue() already returns the compressed value for a field
+      // with isCompressed()==true, so we disable compression in that case
+      boolean disableCompression = (field instanceof FieldsReader.FieldForMerge);
+      fieldsStream.writeVInt(fi.number);
+      byte bits = 0;
+      if (field.isTokenized())
+        bits |= FieldsWriter.FIELD_IS_TOKENIZED;
+      if (field.isBinary())
+        bits |= FieldsWriter.FIELD_IS_BINARY;
+      if (field.isCompressed())
+        bits |= FieldsWriter.FIELD_IS_COMPRESSED;
+                
+      fieldsStream.writeByte(bits);
+                
+      if (field.isCompressed()) {
+        // compression is enabled for the current field
+        byte[] data = null;
+                  
+        if (disableCompression) {
+          // optimized case for merging, the data
+          // is already compressed
+          data = field.binaryValue();
+        } else {
+          // check if it is a binary field
+          if (field.isBinary()) {
+            data = compress(field.binaryValue());
+          }
+          else {
+            data = compress(field.stringValue().getBytes("UTF-8"));
+          }
+        }
+        final int len = data.length;
+        fieldsStream.writeVInt(len);
+        fieldsStream.writeBytes(data, len);
+      }
+      else {
+        // compression is disabled for the current field
+        if (field.isBinary()) {
+          byte[] data = field.binaryValue();
+          final int len = data.length;
+          fieldsStream.writeVInt(len);
+          fieldsStream.writeBytes(data, len);
+        }
+        else {
+          fieldsStream.writeString(field.stringValue());
+        }
+      }
    }

    final void addDocument(Document doc) throws IOException {
@ -64,57 +142,8 @@ final class FieldsWriter
        fieldIterator = doc.getFields().iterator();
        while (fieldIterator.hasNext()) {
            Fieldable field = (Fieldable) fieldIterator.next();
-            // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
-            // and field.binaryValue() already returns the compressed value for a field
-            // with isCompressed()==true, so we disable compression in that case
-            boolean disableCompression = (field instanceof FieldsReader.FieldForMerge);
-            if (field.isStored()) {
-                fieldsStream.writeVInt(fieldInfos.fieldNumber(field.name()));
-
-                byte bits = 0;
-                if (field.isTokenized())
-                    bits |= FieldsWriter.FIELD_IS_TOKENIZED;
-                if (field.isBinary())
-                    bits |= FieldsWriter.FIELD_IS_BINARY;
-                if (field.isCompressed())
-                    bits |= FieldsWriter.FIELD_IS_COMPRESSED;
-                
-                fieldsStream.writeByte(bits);
-                
-                if (field.isCompressed()) {
-                  // compression is enabled for the current field
-                  byte[] data = null;
-                  
-                  if (disableCompression) {
-                      // optimized case for merging, the data
-                      // is already compressed
-                      data = field.binaryValue();
-                  } else {
-                      // check if it is a binary field
-                      if (field.isBinary()) {
-                        data = compress(field.binaryValue());
-                      }
-                      else {
-                        data = compress(field.stringValue().getBytes("UTF-8"));
-                      }
-                  }
-                  final int len = data.length;
-                  fieldsStream.writeVInt(len);
-                  fieldsStream.writeBytes(data, len);
-                }
-                else {
-                  // compression is disabled for the current field
-                  if (field.isBinary()) {
-                    byte[] data = field.binaryValue();
-                    final int len = data.length;
-                    fieldsStream.writeVInt(len);
-                    fieldsStream.writeBytes(data, len);
-                  }
-                  else {
-                    fieldsStream.writeString(field.stringValue());
-                  }
-                }
-            }
+            if (field.isStored())
+              writeField(fieldInfos.fieldInfo(field.name()), field);
        }
    }

--- a/src/java/org/apache/lucene/index/IndexFileDeleter.java
+++ b/src/java/org/apache/lucene/index/IndexFileDeleter.java
@ -97,6 +97,7 @@ final class IndexFileDeleter {
  private PrintStream infoStream;
  private Directory directory;
  private IndexDeletionPolicy policy;
+  private DocumentsWriter docWriter;

  void setInfoStream(PrintStream infoStream) {
    this.infoStream = infoStream;
@ -116,10 +117,12 @@ final class IndexFileDeleter {
   * @throws CorruptIndexException if the index is corrupt
   * @throws IOException if there is a low-level IO error
   */
-  public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, PrintStream infoStream)
+  public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, PrintStream infoStream, DocumentsWriter docWriter)
    throws CorruptIndexException, IOException {

+    this.docWriter = docWriter;
    this.infoStream = infoStream;
+
    this.policy = policy;
    this.directory = directory;

@ -294,7 +297,7 @@ final class IndexFileDeleter {
  public void checkpoint(SegmentInfos segmentInfos, boolean isCommit) throws IOException {

    if (infoStream != null) {
-      message("now checkpoint \"" + segmentInfos.getCurrentSegmentFileName() + "\" [isCommit = " + isCommit + "]");
+      message("now checkpoint \"" + segmentInfos.getCurrentSegmentFileName() + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]");
    }

    // Try again now to delete any previously un-deletable
@ -310,6 +313,8 @@ final class IndexFileDeleter {

    // Incref the files:
    incRef(segmentInfos, isCommit);
+    if (docWriter != null)
+      incRef(docWriter.files());

    if (isCommit) {
      // Append to our commits list:
@ -325,9 +330,8 @@ final class IndexFileDeleter {
    // DecRef old files from the last checkpoint, if any:
    int size = lastFiles.size();
    if (size > 0) {
-      for(int i=0;i<size;i++) {
+      for(int i=0;i<size;i++)
        decRef((List) lastFiles.get(i));
-      }
      lastFiles.clear();
    }

@ -340,6 +344,8 @@ final class IndexFileDeleter {
          lastFiles.add(segmentInfo.files());
        }
      }
+      if (docWriter != null)
+        lastFiles.add(docWriter.files());
    }
  }

--- a/src/java/org/apache/lucene/index/IndexFileNames.java
+++ b/src/java/org/apache/lucene/index/IndexFileNames.java
@ -38,18 +38,54 @@ final class IndexFileNames {
  /** Extension of norms file */
  static final String NORMS_EXTENSION = "nrm";

+  /** Extension of freq postings file */
+  static final String FREQ_EXTENSION = "frq";
+
+  /** Extension of prox postings file */
+  static final String PROX_EXTENSION = "prx";
+
+  /** Extension of terms file */
+  static final String TERMS_EXTENSION = "tis";
+
+  /** Extension of terms index file */
+  static final String TERMS_INDEX_EXTENSION = "tii";
+
+  /** Extension of stored fields index file */
+  static final String FIELDS_INDEX_EXTENSION = "fdx";
+
+  /** Extension of stored fields file */
+  static final String FIELDS_EXTENSION = "fdt";
+
+  /** Extension of vectors fields file */
+  static final String VECTORS_FIELDS_EXTENSION = "tvf";
+
+  /** Extension of vectors documents file */
+  static final String VECTORS_DOCUMENTS_EXTENSION = "tvd";
+
+  /** Extension of vectors index file */
+  static final String VECTORS_INDEX_EXTENSION = "tvx";
+
  /** Extension of compound file */
  static final String COMPOUND_FILE_EXTENSION = "cfs";

+  /** Extension of compound file for doc store files*/
+  static final String COMPOUND_FILE_STORE_EXTENSION = "cfx";
+
  /** Extension of deletes */
  static final String DELETES_EXTENSION = "del";

+  /** Extension of field infos */
+  static final String FIELD_INFOS_EXTENSION = "fnm";
+
  /** Extension of plain norms */
  static final String PLAIN_NORMS_EXTENSION = "f";

  /** Extension of separate norms */
  static final String SEPARATE_NORMS_EXTENSION = "s";

+  /** Extension of gen file */
+  static final String GEN_EXTENSION = "gen";
+
  /**
   * This array contains all filename extensions used by
   * Lucene's index files, with two exceptions, namely the
@ -59,25 +95,72 @@ final class IndexFileNames {
   * filename extension.
   */
  static final String INDEX_EXTENSIONS[] = new String[] {
-      "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del",
-      "tvx", "tvd", "tvf", "gen", "nrm" 
+    COMPOUND_FILE_EXTENSION,
+    FIELD_INFOS_EXTENSION,
+    FIELDS_INDEX_EXTENSION,
+    FIELDS_EXTENSION,
+    TERMS_INDEX_EXTENSION,
+    TERMS_EXTENSION,
+    FREQ_EXTENSION,
+    PROX_EXTENSION,
+    DELETES_EXTENSION,
+    VECTORS_INDEX_EXTENSION,
+    VECTORS_DOCUMENTS_EXTENSION,
+    VECTORS_FIELDS_EXTENSION,
+    GEN_EXTENSION,
+    NORMS_EXTENSION,
+    COMPOUND_FILE_STORE_EXTENSION,
  };

  /** File extensions that are added to a compound file
   * (same as above, minus "del", "gen", "cfs"). */
  static final String[] INDEX_EXTENSIONS_IN_COMPOUND_FILE = new String[] {
-      "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx",
-      "tvx", "tvd", "tvf", "nrm" 
+    FIELD_INFOS_EXTENSION,
+    FIELDS_INDEX_EXTENSION,
+    FIELDS_EXTENSION,
+    TERMS_INDEX_EXTENSION,
+    TERMS_EXTENSION,
+    FREQ_EXTENSION,
+    PROX_EXTENSION,
+    VECTORS_INDEX_EXTENSION,
+    VECTORS_DOCUMENTS_EXTENSION,
+    VECTORS_FIELDS_EXTENSION,
+    NORMS_EXTENSION
+  };
+
+  static final String[] STORE_INDEX_EXTENSIONS = new String[] {
+    VECTORS_INDEX_EXTENSION,
+    VECTORS_FIELDS_EXTENSION,
+    VECTORS_DOCUMENTS_EXTENSION,
+    FIELDS_INDEX_EXTENSION,
+    FIELDS_EXTENSION
+  };
+
+  static final String[] NON_STORE_INDEX_EXTENSIONS = new String[] {
+    FIELD_INFOS_EXTENSION,
+    FREQ_EXTENSION,
+    PROX_EXTENSION,
+    TERMS_EXTENSION,
+    TERMS_INDEX_EXTENSION,
+    NORMS_EXTENSION
  };
  
  /** File extensions of old-style index files */
  static final String COMPOUND_EXTENSIONS[] = new String[] {
-    "fnm", "frq", "prx", "fdx", "fdt", "tii", "tis"
+    FIELD_INFOS_EXTENSION,
+    FREQ_EXTENSION,
+    PROX_EXTENSION,
+    FIELDS_INDEX_EXTENSION,
+    FIELDS_EXTENSION,
+    TERMS_INDEX_EXTENSION,
+    TERMS_EXTENSION
  };
  
  /** File extensions for term vector support */
  static final String VECTOR_EXTENSIONS[] = new String[] {
-    "tvx", "tvd", "tvf"
+    VECTORS_INDEX_EXTENSION,
+    VECTORS_DOCUMENTS_EXTENSION,
+    VECTORS_FIELDS_EXTENSION
  };

  /**
--- a/src/java/org/apache/lucene/index/IndexModifier.java
+++ b/src/java/org/apache/lucene/index/IndexModifier.java
@ -203,7 +203,8 @@ public class IndexModifier {
      indexWriter = new IndexWriter(directory, analyzer, false);
      indexWriter.setInfoStream(infoStream);
      indexWriter.setUseCompoundFile(useCompoundFile);
-      indexWriter.setMaxBufferedDocs(maxBufferedDocs);
+      if (maxBufferedDocs != 0)
+        indexWriter.setMaxBufferedDocs(maxBufferedDocs);
      indexWriter.setMaxFieldLength(maxFieldLength);
      indexWriter.setMergeFactor(mergeFactor);
    }
--- a/src/java/org/apache/lucene/index/IndexReader.java
+++ b/src/java/org/apache/lucene/index/IndexReader.java
@ -783,7 +783,7 @@ public abstract class IndexReader {
        // KeepOnlyLastCommitDeleter:
        IndexFileDeleter deleter =  new IndexFileDeleter(directory,
                                                         deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy,
-                                                         segmentInfos, null);
+                                                         segmentInfos, null, null);

        // Checkpoint the state we are about to change, in
        // case we have to roll back:
--- a/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/src/java/org/apache/lucene/index/IndexWriter.java
@ -61,14 +61,19 @@ import java.util.Map.Entry;
  When finished adding, deleting and updating documents, <a href="#close()"><b>close</b></a> should be called.</p>

  <p>These changes are buffered in memory and periodically
-  flushed to the {@link Directory} (during the above method calls).  A flush is triggered when there are
-  enough buffered deletes (see {@link
-  #setMaxBufferedDeleteTerms}) or enough added documents
-  (see {@link #setMaxBufferedDocs}) since the last flush,
-  whichever is sooner.  You can also force a flush by
-  calling {@link #flush}.  When a flush occurs, both pending
-  deletes and added documents are flushed to the index.  A
-  flush may also trigger one or more segment merges.</p>
+  flushed to the {@link Directory} (during the above method
+  calls).  A flush is triggered when there are enough
+  buffered deletes (see {@link #setMaxBufferedDeleteTerms})
+  or enough added documents since the last flush, whichever
+  is sooner.  For the added documents, flushing is triggered
+  either by RAM usage of the documents (see {@link
+  #setRAMBufferSizeMB}) or the number of added documents
+  (this is the default; see {@link #setMaxBufferedDocs}).
+  For best indexing speed you should flush by RAM usage with
+  a large RAM buffer.  You can also force a flush by calling
+  {@link #flush}.  When a flush occurs, both pending deletes
+  and added documents are flushed to the index.  A flush may
+  also trigger one or more segment merges.</p>

  <a name="autoCommit"></a>
  <p>The optional <code>autoCommit</code> argument to the
@ -181,7 +186,20 @@ public class IndexWriter {
  /**
   * Default value is 10. Change using {@link #setMaxBufferedDocs(int)}.
   */
+
  public final static int DEFAULT_MAX_BUFFERED_DOCS = 10;
+  /* new merge policy
+  public final static int DEFAULT_MAX_BUFFERED_DOCS = 0;
+  */
+
+  /**
+   * Default value is 0 MB (which means flush only by doc
+   * count).  Change using {@link #setRAMBufferSizeMB}.
+   */
+  public final static double DEFAULT_RAM_BUFFER_SIZE_MB = 0.0;
+  /* new merge policy
+  public final static double DEFAULT_RAM_BUFFER_SIZE_MB = 16.0;
+  */

  /**
   * Default value is 1000. Change using {@link #setMaxBufferedDeleteTerms(int)}.
@ -224,8 +242,7 @@ public class IndexWriter {
  private boolean autoCommit = true;              // false if we should commit only on close

  SegmentInfos segmentInfos = new SegmentInfos();       // the segments
-  SegmentInfos ramSegmentInfos = new SegmentInfos();    // the segments in ramDirectory
-  private final RAMDirectory ramDirectory = new RAMDirectory(); // for temp segs
+  private DocumentsWriter docWriter;
  private IndexFileDeleter deleter;

  private Lock writeLock;
@ -621,11 +638,14 @@ public class IndexWriter {
        rollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();
      }

+      docWriter = new DocumentsWriter(directory, this);
+      docWriter.setInfoStream(infoStream);
+
      // Default deleter (for backwards compatibility) is
      // KeepOnlyLastCommitDeleter:
      deleter = new IndexFileDeleter(directory,
                                     deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy,
-                                     segmentInfos, infoStream);
+                                     segmentInfos, infoStream, docWriter);

    } catch (IOException e) {
      this.writeLock.release();
@ -683,31 +703,64 @@ public class IndexWriter {
    return maxFieldLength;
  }

-  /** Determines the minimal number of documents required before the buffered
-   * in-memory documents are merged and a new Segment is created.
-   * Since Documents are merged in a {@link org.apache.lucene.store.RAMDirectory},
-   * large value gives faster indexing.  At the same time, mergeFactor limits
-   * the number of files open in a FSDirectory.
+  /** Determines the minimal number of documents required
+   * before the buffered in-memory documents are flushed as
+   * a new Segment.  Large values generally gives faster
+   * indexing.
   *
-   * <p> The default value is 10.
+   * <p>When this is set, the writer will flush every
+   * maxBufferedDocs added documents and never flush by RAM
+   * usage.</p>
   *
-   * @throws IllegalArgumentException if maxBufferedDocs is smaller than 2
+   * <p> The default value is 0 (writer flushes by RAM
+   * usage).</p>
+   *
+   * @throws IllegalArgumentException if maxBufferedDocs is
+   * smaller than 2
+   * @see #setRAMBufferSizeMB
   */
  public void setMaxBufferedDocs(int maxBufferedDocs) {
    ensureOpen();
    if (maxBufferedDocs < 2)
      throw new IllegalArgumentException("maxBufferedDocs must at least be 2");
-    this.minMergeDocs = maxBufferedDocs;
+    docWriter.setMaxBufferedDocs(maxBufferedDocs);
  }

  /**
-   * Returns the number of buffered added documents that will
+   * Returns 0 if this writer is flushing by RAM usage, else
+   * returns the number of buffered added documents that will
   * trigger a flush.
   * @see #setMaxBufferedDocs
   */
  public int getMaxBufferedDocs() {
    ensureOpen();
-    return minMergeDocs;
+    return docWriter.getMaxBufferedDocs();
+  }
+
+  /** Determines the amount of RAM that may be used for
+   * buffering added documents before they are flushed as a
+   * new Segment.  Generally for faster indexing performance
+   * it's best to flush by RAM usage instead of document
+   * count and use as large a RAM buffer as you can.
+   *
+   * <p>When this is set, the writer will flush whenever
+   * buffered documents use this much RAM.</p>
+   *
+   * <p> The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.</p>
+   */
+  public void setRAMBufferSizeMB(double mb) {
+    if (mb <= 0.0)
+      throw new IllegalArgumentException("ramBufferSize should be > 0.0 MB");
+    docWriter.setRAMBufferSizeMB(mb);
+  }
+
+  /**
+   * Returns 0.0 if this writer is flushing by document
+   * count, else returns the value set by {@link
+   * #setRAMBufferSizeMB}.
+   */
+  public double getRAMBufferSizeMB() {
+    return docWriter.getRAMBufferSizeMB();
  }

  /**
@ -788,6 +841,7 @@ public class IndexWriter {
  public void setInfoStream(PrintStream infoStream) {
    ensureOpen();
    this.infoStream = infoStream;
+    docWriter.setInfoStream(infoStream);
    deleter.setInfoStream(infoStream);
  }

@ -871,7 +925,7 @@ public class IndexWriter {
   */
  public synchronized void close() throws CorruptIndexException, IOException {
    if (!closed) {
-      flushRamSegments();
+      flush(true, true);

      if (commitPending) {
        segmentInfos.write(directory);         // now commit changes
@ -880,18 +934,79 @@ public class IndexWriter {
        rollbackSegmentInfos = null;
      }

-      ramDirectory.close();
      if (writeLock != null) {
        writeLock.release();                          // release write lock
        writeLock = null;
      }
      closed = true;
+      docWriter = null;

      if(closeDir)
        directory.close();
    }
  }

+  /** Tells the docWriter to close its currently open shared
+   *  doc stores (stored fields & vectors files). */
+  private void flushDocStores() throws IOException {
+
+    List files = docWriter.files();
+
+    if (files.size() > 0) {
+      String docStoreSegment;
+
+      boolean success = false;
+      try {
+        docStoreSegment = docWriter.closeDocStore();
+        success = true;
+      } finally {
+        if (!success)
+          docWriter.abort();
+      }
+
+      if (useCompoundFile && docStoreSegment != null) {
+        // Now build compound doc store file
+        checkpoint();
+
+        success = false;
+
+        final int numSegments = segmentInfos.size();
+
+        try {
+          CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION);
+          final int size = files.size();
+          for(int i=0;i<size;i++)
+            cfsWriter.addFile((String) files.get(i));
+      
+          // Perform the merge
+          cfsWriter.close();
+
+          for(int i=0;i<numSegments;i++) {
+            SegmentInfo si = segmentInfos.info(i);
+            if (si.getDocStoreOffset() != -1 &&
+                si.getDocStoreSegment().equals(docStoreSegment))
+              si.setDocStoreIsCompoundFile(true);
+          }
+          checkpoint();
+          success = true;
+        } finally {
+          if (!success) {
+            // Rollback to no compound file
+            for(int i=0;i<numSegments;i++) {
+              SegmentInfo si = segmentInfos.info(i);
+              if (si.getDocStoreOffset() != -1 &&
+                  si.getDocStoreSegment().equals(docStoreSegment))
+                si.setDocStoreIsCompoundFile(false);
+            }
+            deleter.refresh();
+          }
+        }
+
+        deleter.checkpoint(segmentInfos, false);
+      }
+    }
+  }
+
  /** Release the write lock, if needed. */
  protected void finalize() throws Throwable {
    try {
@ -916,11 +1031,10 @@ public class IndexWriter {
    return analyzer;
  }

-
  /** Returns the number of documents currently in this index. */
  public synchronized int docCount() {
    ensureOpen();
-    int count = ramSegmentInfos.size();
+    int count = docWriter.getNumDocsInRAM();
    for (int i = 0; i < segmentInfos.size(); i++) {
      SegmentInfo si = segmentInfos.info(i);
      count += si.docCount;
@ -998,22 +1112,8 @@ public class IndexWriter {
   */
  public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException {
    ensureOpen();
-    SegmentInfo newSegmentInfo = buildSingleDocSegment(doc, analyzer);
-    synchronized (this) {
-      ramSegmentInfos.addElement(newSegmentInfo);
-      maybeFlushRamSegments();
-    }
-  }
-
-  SegmentInfo buildSingleDocSegment(Document doc, Analyzer analyzer)
-      throws CorruptIndexException, IOException {
-    DocumentWriter dw = new DocumentWriter(ramDirectory, analyzer, this);
-    dw.setInfoStream(infoStream);
-    String segmentName = newRamSegmentName();
-    dw.addDocument(segmentName, doc);
-    SegmentInfo si = new SegmentInfo(segmentName, 1, ramDirectory, false, false);
-    si.setNumFields(dw.getNumFields());
-    return si;
+    if (docWriter.addDocument(doc, analyzer))
+      flush(true, false);
  }

  /**
@ -1025,7 +1125,7 @@ public class IndexWriter {
  public synchronized void deleteDocuments(Term term) throws CorruptIndexException, IOException {
    ensureOpen();
    bufferDeleteTerm(term);
-    maybeFlushRamSegments();
+    maybeFlush();
  }

  /**
@ -1041,7 +1141,7 @@ public class IndexWriter {
    for (int i = 0; i < terms.length; i++) {
      bufferDeleteTerm(terms[i]);
    }
-    maybeFlushRamSegments();
+    maybeFlush();
  }

  /**
@ -1077,16 +1177,13 @@ public class IndexWriter {
  public void updateDocument(Term term, Document doc, Analyzer analyzer)
      throws CorruptIndexException, IOException {
    ensureOpen();
-    SegmentInfo newSegmentInfo = buildSingleDocSegment(doc, analyzer);
    synchronized (this) {
      bufferDeleteTerm(term);
-      ramSegmentInfos.addElement(newSegmentInfo);
-      maybeFlushRamSegments();
    }
-  }
-
-  final synchronized String newRamSegmentName() {
-    return "_ram_" + Integer.toString(ramSegmentInfos.counter++, Character.MAX_RADIX);
+    if (docWriter.addDocument(doc, analyzer))
+      flush(true, false);
+    else
+      maybeFlush();
  }

  // for test purpose
@ -1095,8 +1192,8 @@ public class IndexWriter {
  }

  // for test purpose
-  final synchronized int getRamSegmentCount(){
-    return ramSegmentInfos.size();
+  final synchronized int getNumBufferedDocuments(){
+    return docWriter.getNumDocsInRAM();
  }

  // for test purpose
@ -1108,7 +1205,7 @@ public class IndexWriter {
    }
  }

-  final synchronized String newSegmentName() {
+  final String newSegmentName() {
    return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX);
  }

@ -1125,17 +1222,10 @@ public class IndexWriter {
   */
  private int mergeFactor = DEFAULT_MERGE_FACTOR;

-  /** Determines the minimal number of documents required before the buffered
-   * in-memory documents are merging and a new Segment is created.
-   * Since Documents are merged in a {@link org.apache.lucene.store.RAMDirectory},
-   * large value gives faster indexing.  At the same time, mergeFactor limits
-   * the number of files open in a FSDirectory.
-   *
-   * <p> The default value is {@link #DEFAULT_MAX_BUFFERED_DOCS}.
-
+  /** Determines amount of RAM usage by the buffered docs at
+   * which point we trigger a flush to the index.
   */
-  private int minMergeDocs = DEFAULT_MAX_BUFFERED_DOCS;
-
+  private double ramBufferSize = DEFAULT_RAM_BUFFER_SIZE_MB*1024F*1024F;

  /** Determines the largest number of documents ever merged by addDocument().
   * Small values (e.g., less than 10,000) are best for interactive indexing,
@ -1151,6 +1241,7 @@ public class IndexWriter {

   */
  private PrintStream infoStream = null;
+
  private static PrintStream defaultInfoStream = null;

  /** Merges all segments together into a single segment,
@ -1219,16 +1310,16 @@ public class IndexWriter {
  */
  public synchronized void optimize() throws CorruptIndexException, IOException {
    ensureOpen();
-    flushRamSegments();
+    flush();
    while (segmentInfos.size() > 1 ||
           (segmentInfos.size() == 1 &&
            (SegmentReader.hasDeletions(segmentInfos.info(0)) ||
             SegmentReader.hasSeparateNorms(segmentInfos.info(0)) ||
             segmentInfos.info(0).dir != directory ||
             (useCompoundFile &&
-              (!SegmentReader.usesCompoundFile(segmentInfos.info(0))))))) {
+              !segmentInfos.info(0).getUseCompoundFile())))) {
      int minSegment = segmentInfos.size() - mergeFactor;
-      mergeSegments(segmentInfos, minSegment < 0 ? 0 : minSegment, segmentInfos.size());
+      mergeSegments(minSegment < 0 ? 0 : minSegment, segmentInfos.size());
    }
  }

@ -1245,7 +1336,7 @@ public class IndexWriter {
    localRollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();
    localAutoCommit = autoCommit;
    if (localAutoCommit) {
-      flushRamSegments();
+      flush();
      // Turn off auto-commit during our local transaction:
      autoCommit = false;
    } else
@ -1335,16 +1426,18 @@ public class IndexWriter {
      segmentInfos.clear();
      segmentInfos.addAll(rollbackSegmentInfos);

+      docWriter.abort();
+
      // Ask deleter to locate unreferenced files & remove
      // them:
      deleter.checkpoint(segmentInfos, false);
      deleter.refresh();

-      ramSegmentInfos = new SegmentInfos();
      bufferedDeleteTerms.clear();
      numBufferedDeleteTerms = 0;

      commitPending = false;
+      docWriter.abort();
      close();

    } else {
@ -1439,7 +1532,7 @@ public class IndexWriter {
        for (int base = start; base < segmentInfos.size(); base++) {
          int end = Math.min(segmentInfos.size(), base+mergeFactor);
          if (end-base > 1) {
-            mergeSegments(segmentInfos, base, end);
+            mergeSegments(base, end);
          }
        }
      }
@ -1479,7 +1572,7 @@ public class IndexWriter {
    // segments in S may not since they could come from multiple indexes.
    // Here is the merge algorithm for addIndexesNoOptimize():
    //
-    // 1 Flush ram segments.
+    // 1 Flush ram.
    // 2 Consider a combined sequence with segments from T followed
    //   by segments from S (same as current addIndexes(Directory[])).
    // 3 Assume the highest level for segments in S is h. Call
@ -1500,13 +1593,18 @@ public class IndexWriter {
    // copy a segment, which may cause doc count to change because deleted
    // docs are garbage collected.

-    // 1 flush ram segments
+    // 1 flush ram

    ensureOpen();
-    flushRamSegments();
+    flush();

    // 2 copy segment infos and find the highest level from dirs
-    int startUpperBound = minMergeDocs;
+    int startUpperBound = docWriter.getMaxBufferedDocs();
+
+    /* new merge policy
+    if (startUpperBound == 0)
+      startUpperBound = 10;
+    */

    boolean success = false;

@ -1566,7 +1664,7 @@ public class IndexWriter {

        // copy those segments from S
        for (int i = segmentCount - numSegmentsToCopy; i < segmentCount; i++) {
-          mergeSegments(segmentInfos, i, i + 1);
+          mergeSegments(i, i + 1);
        }
        if (checkNonDecreasingLevels(segmentCount - numSegmentsToCopy)) {
          success = true;
@ -1575,7 +1673,7 @@ public class IndexWriter {
      }

      // invariants do not hold, simply merge those segments
-      mergeSegments(segmentInfos, segmentCount - numTailSegments, segmentCount);
+      mergeSegments(segmentCount - numTailSegments, segmentCount);

      // maybe merge segments again if necessary
      if (segmentInfos.info(segmentInfos.size() - 1).docCount > startUpperBound) {
@ -1637,7 +1735,8 @@ public class IndexWriter {
        }

        segmentInfos.setSize(0);                      // pop old infos & add new
-        info = new SegmentInfo(mergedName, docCount, directory, false, true);
+        info = new SegmentInfo(mergedName, docCount, directory, false, true,
+                               -1, null, false);
        segmentInfos.addElement(info);

        success = true;
@ -1720,29 +1819,19 @@ public class IndexWriter {
   * buffered added documents or buffered deleted terms are
   * large enough.
   */
-  protected final void maybeFlushRamSegments() throws CorruptIndexException, IOException {
-    // A flush is triggered if enough new documents are buffered or
-    // if enough delete terms are buffered
-    if (ramSegmentInfos.size() >= minMergeDocs || numBufferedDeleteTerms >= maxBufferedDeleteTerms) {
-      flushRamSegments();
-    }
+  protected final synchronized void maybeFlush() throws CorruptIndexException, IOException {
+    // We only check for flush due to number of buffered
+    // delete terms, because triggering of a flush due to
+    // too many added documents is handled by
+    // DocumentsWriter
+    if (numBufferedDeleteTerms >= maxBufferedDeleteTerms && docWriter.setFlushPending())
+      flush(true, false);
  }

-  /** Expert:  Flushes all RAM-resident segments (buffered documents), then may merge segments. */
-  private final synchronized void flushRamSegments() throws CorruptIndexException, IOException {
-    flushRamSegments(true);
+  public final synchronized void flush() throws CorruptIndexException, IOException {  
+    flush(true, false);
  }
-    
-  /** Expert:  Flushes all RAM-resident segments (buffered documents), 
-   *           then may merge segments if triggerMerge==true. */
-  protected final synchronized void flushRamSegments(boolean triggerMerge) 
-      throws CorruptIndexException, IOException {
-    if (ramSegmentInfos.size() > 0 || bufferedDeleteTerms.size() > 0) {
-      mergeSegments(ramSegmentInfos, 0, ramSegmentInfos.size());
-      if (triggerMerge) maybeMergeSegments(minMergeDocs);
-    }
-  }
-  
+
  /**
   * Flush all in-memory buffered updates (adds and deletes)
   * to the Directory. 
@ -1751,9 +1840,158 @@ public class IndexWriter {
   * @throws CorruptIndexException if the index is corrupt
   * @throws IOException if there is a low-level IO error
   */
-  public final synchronized void flush() throws CorruptIndexException, IOException {
+  public final synchronized void flush(boolean triggerMerge, boolean flushDocStores) throws CorruptIndexException, IOException {
    ensureOpen();
-    flushRamSegments();
+
+    // Make sure no threads are actively adding a document
+    docWriter.pauseAllThreads();
+
+    try {
+
+      SegmentInfo newSegment = null;
+
+      final int numDocs = docWriter.getNumDocsInRAM();
+
+      // Always flush docs if there are any
+      boolean flushDocs = numDocs > 0;
+
+      // With autoCommit=true we always must flush the doc
+      // stores when we flush
+      flushDocStores |= autoCommit;
+      String docStoreSegment = docWriter.getDocStoreSegment();
+      if (docStoreSegment == null)
+        flushDocStores = false;
+
+      // Always flush deletes if there are any delete terms.
+      // TODO: when autoCommit=false we don't have to flush
+      // deletes with every flushed segment; we can save
+      // CPU/IO by buffering longer & flushing deletes only
+      // when they are full or writer is being closed.  We
+      // have to fix the "applyDeletesSelectively" logic to
+      // apply to more than just the last flushed segment
+      boolean flushDeletes = bufferedDeleteTerms.size() > 0;
+
+      if (infoStream != null)
+        infoStream.println("  flush: flushDocs=" + flushDocs +
+                           " flushDeletes=" + flushDeletes +
+                           " flushDocStores=" + flushDocStores +
+                           " numDocs=" + numDocs);
+
+      int docStoreOffset = docWriter.getDocStoreOffset();
+      boolean docStoreIsCompoundFile = false;
+
+      // Check if the doc stores must be separately flushed
+      // because other segments, besides the one we are about
+      // to flush, reference it
+      if (flushDocStores && (!flushDocs || !docWriter.getSegment().equals(docWriter.getDocStoreSegment()))) {
+        // We must separately flush the doc store
+        if (infoStream != null)
+          infoStream.println("  flush shared docStore segment " + docStoreSegment);
+      
+        flushDocStores();
+        flushDocStores = false;
+        docStoreIsCompoundFile = useCompoundFile;
+      }
+
+      String segment = docWriter.getSegment();
+
+      if (flushDocs || flushDeletes) {
+
+        SegmentInfos rollback = null;
+
+        if (flushDeletes)
+          rollback = (SegmentInfos) segmentInfos.clone();
+
+        boolean success = false;
+
+        try {
+          if (flushDocs) {
+
+            if (0 == docStoreOffset && flushDocStores) {
+              // This means we are flushing private doc stores
+              // with this segment, so it will not be shared
+              // with other segments
+              assert docStoreSegment != null;
+              assert docStoreSegment.equals(segment);
+              docStoreOffset = -1;
+              docStoreIsCompoundFile = false;
+              docStoreSegment = null;
+            }
+
+            int flushedDocCount = docWriter.flush(flushDocStores);
+          
+            newSegment = new SegmentInfo(segment,
+                                         flushedDocCount,
+                                         directory, false, true,
+                                         docStoreOffset, docStoreSegment,
+                                         docStoreIsCompoundFile);
+            segmentInfos.addElement(newSegment);
+          }
+
+          if (flushDeletes) {
+            // we should be able to change this so we can
+            // buffer deletes longer and then flush them to
+            // multiple flushed segments, when
+            // autoCommit=false
+            applyDeletes(flushDocs);
+            doAfterFlush();
+          }
+
+          checkpoint();
+          success = true;
+        } finally {
+          if (!success) {
+            if (flushDeletes) {
+              // Fully replace the segmentInfos since flushed
+              // deletes could have changed any of the
+              // SegmentInfo instances:
+              segmentInfos.clear();
+              segmentInfos.addAll(rollback);
+            } else {
+              // Remove segment we added, if any:
+              if (newSegment != null && 
+                  segmentInfos.size() > 0 && 
+                  segmentInfos.info(segmentInfos.size()-1) == newSegment)
+                segmentInfos.remove(segmentInfos.size()-1);
+            }
+            if (flushDocs)
+              docWriter.abort();
+            deleter.checkpoint(segmentInfos, false);
+            deleter.refresh();
+          }
+        }
+
+        deleter.checkpoint(segmentInfos, autoCommit);
+
+        if (flushDocs && useCompoundFile) {
+          success = false;
+          try {
+            docWriter.createCompoundFile(segment);
+            newSegment.setUseCompoundFile(true);
+            checkpoint();
+            success = true;
+          } finally {
+            if (!success) {
+              newSegment.setUseCompoundFile(false);
+              deleter.refresh();
+            }
+          }
+
+          deleter.checkpoint(segmentInfos, autoCommit);
+        }
+
+        /* new merge policy
+        if (0 == docWriter.getMaxBufferedDocs())
+          maybeMergeSegments(mergeFactor * numDocs / 2);
+        else
+          maybeMergeSegments(docWriter.getMaxBufferedDocs());
+        */
+        maybeMergeSegments(docWriter.getMaxBufferedDocs());
+      }
+    } finally {
+      docWriter.clearFlushPending();
+      docWriter.resumeAllThreads();
+    }
  }

  /** Expert:  Return the total size of all index files currently cached in memory.
@ -1761,15 +1999,15 @@ public class IndexWriter {
   */
  public final long ramSizeInBytes() {
    ensureOpen();
-    return ramDirectory.sizeInBytes();
+    return docWriter.getRAMUsed();
  }

  /** Expert:  Return the number of documents whose segments are currently cached in memory.
-   * Useful when calling flushRamSegments()
+   * Useful when calling flush()
   */
  public final synchronized int numRamDocs() {
    ensureOpen();
-    return ramSegmentInfos.size();
+    return docWriter.getNumDocsInRAM();
  }
  
  /** Incremental segment merger.  */
@ -1777,6 +2015,10 @@ public class IndexWriter {
    long lowerBound = -1;
    long upperBound = startUpperBound;

+    /* new merge policy
+    if (upperBound == 0) upperBound = 10;
+    */
+
    while (upperBound < maxMergeDocs) {
      int minSegment = segmentInfos.size();
      int maxSegment = -1;
@ -1808,7 +2050,7 @@ public class IndexWriter {
        while (numSegments >= mergeFactor) {
          // merge the leftmost* mergeFactor segments

-          int docCount = mergeSegments(segmentInfos, minSegment, minSegment + mergeFactor);
+          int docCount = mergeSegments(minSegment, minSegment + mergeFactor);
          numSegments -= mergeFactor;

          if (docCount > upperBound) {
@ -1837,39 +2079,108 @@ public class IndexWriter {
   * Merges the named range of segments, replacing them in the stack with a
   * single segment.
   */
-  private final int mergeSegments(SegmentInfos sourceSegments, int minSegment, int end)
+
+  private final int mergeSegments(int minSegment, int end)
    throws CorruptIndexException, IOException {

-    // We may be called solely because there are deletes
-    // pending, in which case doMerge is false:
-    boolean doMerge = end > 0;
    final String mergedName = newSegmentName();
+    
    SegmentMerger merger = null;
-
-    final List ramSegmentsToDelete = new ArrayList();
-
    SegmentInfo newSegment = null;

    int mergedDocCount = 0;
-    boolean anyDeletes = (bufferedDeleteTerms.size() != 0);

    // This is try/finally to make sure merger's readers are closed:
    try {

-      if (doMerge) {
-        if (infoStream != null) infoStream.print("merging segments");
-        merger = new SegmentMerger(this, mergedName);
+      if (infoStream != null) infoStream.print("merging segments");

-        for (int i = minSegment; i < end; i++) {
-          SegmentInfo si = sourceSegments.info(i);
-          if (infoStream != null)
-            infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
-          IndexReader reader = SegmentReader.get(si, MERGE_READ_BUFFER_SIZE); // no need to set deleter (yet)
-          merger.add(reader);
-          if (reader.directory() == this.ramDirectory) {
-            ramSegmentsToDelete.add(si);
-          }
-        }
+      // Check whether this merge will allow us to skip
+      // merging the doc stores (stored field & vectors).
+      // This is a very substantial optimization (saves tons
+      // of IO) that can only be applied with
+      // autoCommit=false.
+
+      Directory lastDir = directory;
+      String lastDocStoreSegment = null;
+      boolean mergeDocStores = false;
+      boolean doFlushDocStore = false;
+      int next = -1;
+
+      // Test each segment to be merged
+      for (int i = minSegment; i < end; i++) {
+        SegmentInfo si = segmentInfos.info(i);
+
+        // If it has deletions we must merge the doc stores
+        if (si.hasDeletions())
+          mergeDocStores = true;
+
+        // If it has its own (private) doc stores we must
+        // merge the doc stores
+        if (-1 == si.getDocStoreOffset())
+          mergeDocStores = true;
+
+        // If it has a different doc store segment than
+        // previous segments, we must merge the doc stores
+        String docStoreSegment = si.getDocStoreSegment();
+        if (docStoreSegment == null)
+          mergeDocStores = true;
+        else if (lastDocStoreSegment == null)
+          lastDocStoreSegment = docStoreSegment;
+        else if (!lastDocStoreSegment.equals(docStoreSegment))
+          mergeDocStores = true;
+
+        // Segments' docScoreOffsets must be in-order,
+        // contiguous.  For the default merge policy now
+        // this will always be the case but for an arbitrary
+        // merge policy this may not be the case
+        if (-1 == next)
+          next = si.getDocStoreOffset() + si.docCount;
+        else if (next != si.getDocStoreOffset())
+          mergeDocStores = true;
+        else
+          next = si.getDocStoreOffset() + si.docCount;
+      
+        // If the segment comes from a different directory
+        // we must merge
+        if (lastDir != si.dir)
+          mergeDocStores = true;
+
+        // If the segment is referencing the current "live"
+        // doc store outputs then we must merge
+        if (si.getDocStoreOffset() != -1 && si.getDocStoreSegment().equals(docWriter.getDocStoreSegment()))
+          doFlushDocStore = true;
+      }
+
+      final int docStoreOffset;
+      final String docStoreSegment;
+      final boolean docStoreIsCompoundFile;
+      if (mergeDocStores) {
+        docStoreOffset = -1;
+        docStoreSegment = null;
+        docStoreIsCompoundFile = false;
+      } else {
+        SegmentInfo si = segmentInfos.info(minSegment);        
+        docStoreOffset = si.getDocStoreOffset();
+        docStoreSegment = si.getDocStoreSegment();
+        docStoreIsCompoundFile = si.getDocStoreIsCompoundFile();
+      }
+
+      if (mergeDocStores && doFlushDocStore)
+        // SegmentMerger intends to merge the doc stores
+        // (stored fields, vectors), and at least one of the
+        // segments to be merged refers to the currently
+        // live doc stores.
+        flushDocStores();
+
+      merger = new SegmentMerger(this, mergedName);
+
+      for (int i = minSegment; i < end; i++) {
+        SegmentInfo si = segmentInfos.info(i);
+        if (infoStream != null)
+          infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
+        IndexReader reader = SegmentReader.get(si, MERGE_READ_BUFFER_SIZE, mergeDocStores); // no need to set deleter (yet)
+        merger.add(reader);
      }

      SegmentInfos rollback = null;
@ -1879,65 +2190,32 @@ public class IndexWriter {
      // if we hit exception when doing the merge:
      try {

-        if (doMerge) {
-          mergedDocCount = merger.merge();
+        mergedDocCount = merger.merge(mergeDocStores);

-          if (infoStream != null) {
-            infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
-          }
-
-          newSegment = new SegmentInfo(mergedName, mergedDocCount,
-                                       directory, false, true);
+        if (infoStream != null) {
+          infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
        }
+
+        newSegment = new SegmentInfo(mergedName, mergedDocCount,
+                                     directory, false, true,
+                                     docStoreOffset,
+                                     docStoreSegment,
+                                     docStoreIsCompoundFile);
        
-        if (sourceSegments != ramSegmentInfos || anyDeletes) {
-          // Now save the SegmentInfo instances that
-          // we are replacing:
-          rollback = (SegmentInfos) segmentInfos.clone();
-        }
+        rollback = (SegmentInfos) segmentInfos.clone();

-        if (doMerge) {
-          if (sourceSegments == ramSegmentInfos) {
-            segmentInfos.addElement(newSegment);
-          } else {
-            for (int i = end-1; i > minSegment; i--)     // remove old infos & add new
-              sourceSegments.remove(i);
+        for (int i = end-1; i > minSegment; i--)     // remove old infos & add new
+          segmentInfos.remove(i);

-            segmentInfos.set(minSegment, newSegment);
-          }
-        }
+        segmentInfos.set(minSegment, newSegment);

-        if (sourceSegments == ramSegmentInfos) {
-          maybeApplyDeletes(doMerge);
-          doAfterFlush();
-        }
-        
        checkpoint();

        success = true;

      } finally {
-
-        if (success) {
-          // The non-ram-segments case is already committed
-          // (above), so all the remains for ram segments case
-          // is to clear the ram segments:
-          if (sourceSegments == ramSegmentInfos) {
-            ramSegmentInfos.removeAllElements();
-          }
-        } else {
-
-          // Must rollback so our state matches index:
-          if (sourceSegments == ramSegmentInfos && !anyDeletes) {
-            // Simple case: newSegment may or may not have
-            // been added to the end of our segment infos,
-            // so just check & remove if so:
-            if (newSegment != null && 
-                segmentInfos.size() > 0 && 
-                segmentInfos.info(segmentInfos.size()-1) == newSegment) {
-              segmentInfos.remove(segmentInfos.size()-1);
-            }
-          } else if (rollback != null) {
+        if (!success) {
+          if (rollback != null) {
            // Rollback the individual SegmentInfo
            // instances, but keep original SegmentInfos
            // instance (so we don't try to write again the
@ -1952,16 +2230,13 @@ public class IndexWriter {
      }
    } finally {
      // close readers before we attempt to delete now-obsolete segments
-      if (doMerge) merger.closeReaders();
+      merger.closeReaders();
    }

-    // Delete the RAM segments
-    deleter.deleteDirect(ramDirectory, ramSegmentsToDelete);
-
    // Give deleter a chance to remove files now.
    deleter.checkpoint(segmentInfos, autoCommit);

-    if (useCompoundFile && doMerge) {
+    if (useCompoundFile) {

      boolean success = false;

@ -1988,19 +2263,23 @@ public class IndexWriter {
  }

  // Called during flush to apply any buffered deletes.  If
-  // doMerge is true then a new segment was just created and
-  // flushed from the ram segments.
-  private final void maybeApplyDeletes(boolean doMerge) throws CorruptIndexException, IOException {
+  // flushedNewSegment is true then a new segment was just
+  // created and flushed from the ram segments, so we will
+  // selectively apply the deletes to that new segment.
+  private final void applyDeletes(boolean flushedNewSegment) throws CorruptIndexException, IOException {

    if (bufferedDeleteTerms.size() > 0) {
      if (infoStream != null)
        infoStream.println("flush " + numBufferedDeleteTerms + " buffered deleted terms on "
                           + segmentInfos.size() + " segments.");

-      if (doMerge) {
+      if (flushedNewSegment) {
        IndexReader reader = null;
        try {
-          reader = SegmentReader.get(segmentInfos.info(segmentInfos.size() - 1));
+          // Open readers w/o opening the stored fields /
+          // vectors because these files may still be held
+          // open for writing by docWriter
+          reader = SegmentReader.get(segmentInfos.info(segmentInfos.size() - 1), false);

          // Apply delete terms to the segment just flushed from ram
          // apply appropriately so that a delete term is only applied to
@ -2018,14 +2297,14 @@ public class IndexWriter {
      }

      int infosEnd = segmentInfos.size();
-      if (doMerge) {
+      if (flushedNewSegment) {
        infosEnd--;
      }

      for (int i = 0; i < infosEnd; i++) {
        IndexReader reader = null;
        try {
-          reader = SegmentReader.get(segmentInfos.info(i));
+          reader = SegmentReader.get(segmentInfos.info(i), false);

          // Apply delete terms to disk segments
          // except the one just flushed from ram.
@ -2049,7 +2328,12 @@ public class IndexWriter {

  private final boolean checkNonDecreasingLevels(int start) {
    int lowerBound = -1;
-    int upperBound = minMergeDocs;
+    int upperBound = docWriter.getMaxBufferedDocs();
+
+    /* new merge policy
+    if (upperBound == 0)
+      upperBound = 10;
+    */

    for (int i = segmentInfos.size() - 1; i >= start; i--) {
      int docCount = segmentInfos.info(i).docCount;
@ -2098,10 +2382,11 @@ public class IndexWriter {
  // well as the disk segments.
  private void bufferDeleteTerm(Term term) {
    Num num = (Num) bufferedDeleteTerms.get(term);
+    int numDoc = docWriter.getNumDocsInRAM();
    if (num == null) {
-      bufferedDeleteTerms.put(term, new Num(ramSegmentInfos.size()));
+      bufferedDeleteTerms.put(term, new Num(numDoc));
    } else {
-      num.setNum(ramSegmentInfos.size());
+      num.setNum(numDoc);
    }
    numBufferedDeleteTerms++;
  }
--- a/src/java/org/apache/lucene/index/SegmentInfo.java
+++ b/src/java/org/apache/lucene/index/SegmentInfo.java
@ -65,6 +65,12 @@ final class SegmentInfo {
  private List files;                             // cached list of files that this segment uses
                                                  // in the Directory

+  private int docStoreOffset;                     // if this segment shares stored fields & vectors, this
+                                                  // offset is where in that file this segment's docs begin
+  private String docStoreSegment;                 // name used to derive fields/vectors file we share with
+                                                  // other segments
+  private boolean docStoreIsCompoundFile;         // whether doc store files are stored in compound file (*.cfx)
+
  public SegmentInfo(String name, int docCount, Directory dir) {
    this.name = name;
    this.docCount = docCount;
@ -73,13 +79,25 @@ final class SegmentInfo {
    isCompoundFile = CHECK_DIR;
    preLockless = true;
    hasSingleNormFile = false;
+    docStoreOffset = -1;
+    docStoreSegment = name;
+    docStoreIsCompoundFile = false;
  }

  public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile) { 
+    this(name, docCount, dir, isCompoundFile, hasSingleNormFile, -1, null, false);
+  }
+
+  public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile,
+                     int docStoreOffset, String docStoreSegment, boolean docStoreIsCompoundFile) { 
    this(name, docCount, dir);
    this.isCompoundFile = (byte) (isCompoundFile ? YES : NO);
    this.hasSingleNormFile = hasSingleNormFile;
    preLockless = false;
+    this.docStoreOffset = docStoreOffset;
+    this.docStoreSegment = docStoreSegment;
+    this.docStoreIsCompoundFile = docStoreIsCompoundFile;
+    assert docStoreOffset == -1 || docStoreSegment != null;
  }

  /**
@ -92,6 +110,8 @@ final class SegmentInfo {
    dir = src.dir;
    preLockless = src.preLockless;
    delGen = src.delGen;
+    docStoreOffset = src.docStoreOffset;
+    docStoreIsCompoundFile = src.docStoreIsCompoundFile;
    if (src.normGen == null) {
      normGen = null;
    } else {
@ -116,6 +136,20 @@ final class SegmentInfo {
    docCount = input.readInt();
    if (format <= SegmentInfos.FORMAT_LOCKLESS) {
      delGen = input.readLong();
+      if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) {
+        docStoreOffset = input.readInt();
+        if (docStoreOffset != -1) {
+          docStoreSegment = input.readString();
+          docStoreIsCompoundFile = (1 == input.readByte());
+        } else {
+          docStoreSegment = name;
+          docStoreIsCompoundFile = false;
+        }
+      } else {
+        docStoreOffset = -1;
+        docStoreSegment = name;
+        docStoreIsCompoundFile = false;
+      }
      if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) {
        hasSingleNormFile = (1 == input.readByte());
      } else {
@ -138,6 +172,9 @@ final class SegmentInfo {
      isCompoundFile = CHECK_DIR;
      preLockless = true;
      hasSingleNormFile = false;
+      docStoreOffset = -1;
+      docStoreIsCompoundFile = false;
+      docStoreSegment = null;
    }
  }
  
@ -368,6 +405,28 @@ final class SegmentInfo {
      return dir.fileExists(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
    }
  }
+
+  int getDocStoreOffset() {
+    return docStoreOffset;
+  }
+  
+  boolean getDocStoreIsCompoundFile() {
+    return docStoreIsCompoundFile;
+  }
+  
+  void setDocStoreIsCompoundFile(boolean v) {
+    docStoreIsCompoundFile = v;
+    files = null;
+  }
+  
+  String getDocStoreSegment() {
+    return docStoreSegment;
+  }
+  
+  void setDocStoreOffset(int offset) {
+    docStoreOffset = offset;
+    files = null;
+  }
  
  /**
   * Save this segment's info.
@ -377,6 +436,12 @@ final class SegmentInfo {
    output.writeString(name);
    output.writeInt(docCount);
    output.writeLong(delGen);
+    output.writeInt(docStoreOffset);
+    if (docStoreOffset != -1) {
+      output.writeString(docStoreSegment);
+      output.writeByte((byte) (docStoreIsCompoundFile ? 1:0));
+    }
+
    output.writeByte((byte) (hasSingleNormFile ? 1:0));
    if (normGen == null) {
      output.writeInt(NO);
@ -389,6 +454,11 @@ final class SegmentInfo {
    output.writeByte(isCompoundFile);
  }

+  private void addIfExists(List files, String fileName) throws IOException {
+    if (dir.fileExists(fileName))
+      files.add(fileName);
+  }
+
  /*
   * Return all files referenced by this SegmentInfo.  The
   * returns List is a locally cached List so you should not
@ -409,13 +479,28 @@ final class SegmentInfo {
    if (useCompoundFile) {
      files.add(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
    } else {
-      for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS_IN_COMPOUND_FILE.length; i++) {
-        String ext = IndexFileNames.INDEX_EXTENSIONS_IN_COMPOUND_FILE[i];
-        String fileName = name + "." + ext;
-        if (dir.fileExists(fileName)) {
-          files.add(fileName);
-        }
+      final String[] exts = IndexFileNames.NON_STORE_INDEX_EXTENSIONS;
+      for(int i=0;i<exts.length;i++)
+        addIfExists(files, name + "." + exts[i]);
+    }
+
+    if (docStoreOffset != -1) {
+      // We are sharing doc stores (stored fields, term
+      // vectors) with other segments
+      assert docStoreSegment != null;
+      if (docStoreIsCompoundFile) {
+        files.add(docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION);
+      } else {
+        final String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS;
+        for(int i=0;i<exts.length;i++)
+          addIfExists(files, docStoreSegment + "." + exts[i]);
      }
+    } else if (!useCompoundFile) {
+      // We are not sharing, and, these files were not
+      // included in the compound file
+      final String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS;
+      for(int i=0;i<exts.length;i++)
+        addIfExists(files, name + "." + exts[i]);
    }

    String delFileName = IndexFileNames.fileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen);
--- a/src/java/org/apache/lucene/index/SegmentInfos.java
+++ b/src/java/org/apache/lucene/index/SegmentInfos.java
@ -51,8 +51,12 @@ final class SegmentInfos extends Vector {
   */
  public static final int FORMAT_SINGLE_NORM_FILE = -3;

+  /** This format allows multiple segments to share a single
+   * vectors and stored fields file. */
+  public static final int FORMAT_SHARED_DOC_STORE = -4;
+
  /* This must always point to the most recent file format. */
-  private static final int CURRENT_FORMAT = FORMAT_SINGLE_NORM_FILE;
+  private static final int CURRENT_FORMAT = FORMAT_SHARED_DOC_STORE;
  
  public int counter = 0;    // used to name new segments
  /**
--- a/src/java/org/apache/lucene/index/SegmentMerger.java
+++ b/src/java/org/apache/lucene/index/SegmentMerger.java
@ -52,6 +52,12 @@ final class SegmentMerger {
  
  private int mergedDocs;

+  // Whether we should merge doc stores (stored fields and
+  // vectors files).  When all segments we are merging
+  // already share the same doc store files, we don't need
+  // to merge the doc stores.
+  private boolean mergeDocStores;
+
  /** This ctor used only by test code.
   * 
   * @param dir The Directory to merge the other segments into
@ -92,18 +98,32 @@ final class SegmentMerger {
   * @throws IOException if there is a low-level IO error
   */
  final int merge() throws CorruptIndexException, IOException {
-    int value;
-    
+    return merge(true);
+  }
+
+  /**
+   * Merges the readers specified by the {@link #add} method
+   * into the directory passed to the constructor.
+   * @param mergeDocStores if false, we will not merge the
+   * stored fields nor vectors files
+   * @return The number of documents that were merged
+   * @throws CorruptIndexException if the index is corrupt
+   * @throws IOException if there is a low-level IO error
+   */
+  final int merge(boolean mergeDocStores) throws CorruptIndexException, IOException {
+
+    this.mergeDocStores = mergeDocStores;
+
    mergedDocs = mergeFields();
    mergeTerms();
    mergeNorms();

-    if (fieldInfos.hasVectors())
+    if (mergeDocStores && fieldInfos.hasVectors())
      mergeVectors();

    return mergedDocs;
  }
-  
+
  /**
   * close all IndexReaders that have been added.
   * Should not be called before merge().
@ -126,7 +146,10 @@ final class SegmentMerger {
    
    // Basic files
    for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.length; i++) {
-      files.add(segment + "." + IndexFileNames.COMPOUND_EXTENSIONS[i]);
+      String ext = IndexFileNames.COMPOUND_EXTENSIONS[i];
+      if (mergeDocStores || (!ext.equals(IndexFileNames.FIELDS_EXTENSION) &&
+                            !ext.equals(IndexFileNames.FIELDS_INDEX_EXTENSION)))
+        files.add(segment + "." + ext);
    }

    // Fieldable norm files
@ -139,7 +162,7 @@ final class SegmentMerger {
    }

    // Vector files
-    if (fieldInfos.hasVectors()) {
+    if (fieldInfos.hasVectors() && mergeDocStores) {
      for (int i = 0; i < IndexFileNames.VECTOR_EXTENSIONS.length; i++) {
        files.add(segment + "." + IndexFileNames.VECTOR_EXTENSIONS[i]);
      }
@ -173,7 +196,20 @@ final class SegmentMerger {
   * @throws IOException if there is a low-level IO error
   */
  private final int mergeFields() throws CorruptIndexException, IOException {
-    fieldInfos = new FieldInfos();		  // merge field names
+
+    if (!mergeDocStores) {
+      // When we are not merging by doc stores, that means
+      // all segments were written as part of a single
+      // autoCommit=false IndexWriter session, so their field
+      // name -> number mapping are the same.  So, we start
+      // with the fieldInfos of the last segment in this
+      // case, to keep that numbering.
+      final SegmentReader sr = (SegmentReader) readers.elementAt(readers.size()-1);
+      fieldInfos = (FieldInfos) sr.fieldInfos.clone();
+    } else {
+      fieldInfos = new FieldInfos();		  // merge field names
+    }
+
    int docCount = 0;
    for (int i = 0; i < readers.size(); i++) {
      IndexReader reader = (IndexReader) readers.elementAt(i);
@ -187,30 +223,40 @@ final class SegmentMerger {
    }
    fieldInfos.write(directory, segment + ".fnm");

-    FieldsWriter fieldsWriter = // merge field values
-            new FieldsWriter(directory, segment, fieldInfos);
+    if (mergeDocStores) {
+
+      FieldsWriter fieldsWriter = // merge field values
+        new FieldsWriter(directory, segment, fieldInfos);
    
-    // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
-    // in  merge mode, we use this FieldSelector
-    FieldSelector fieldSelectorMerge = new FieldSelector() {
-      public FieldSelectorResult accept(String fieldName) {
-        return FieldSelectorResult.LOAD_FOR_MERGE;
-      }        
-    };
-    
-    try {
-      for (int i = 0; i < readers.size(); i++) {
-        IndexReader reader = (IndexReader) readers.elementAt(i);
-        int maxDoc = reader.maxDoc();
-        for (int j = 0; j < maxDoc; j++)
-          if (!reader.isDeleted(j)) {               // skip deleted docs
-            fieldsWriter.addDocument(reader.document(j, fieldSelectorMerge));
-            docCount++;
-          }
+      // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
+      // in  merge mode, we use this FieldSelector
+      FieldSelector fieldSelectorMerge = new FieldSelector() {
+          public FieldSelectorResult accept(String fieldName) {
+            return FieldSelectorResult.LOAD_FOR_MERGE;
+          }        
+        };
+
+      try {
+        for (int i = 0; i < readers.size(); i++) {
+          IndexReader reader = (IndexReader) readers.elementAt(i);
+          int maxDoc = reader.maxDoc();
+          for (int j = 0; j < maxDoc; j++)
+            if (!reader.isDeleted(j)) {               // skip deleted docs
+              fieldsWriter.addDocument(reader.document(j, fieldSelectorMerge));
+              docCount++;
+            }
+        }
+      } finally {
+        fieldsWriter.close();
      }
-    } finally {
-      fieldsWriter.close();
-    }
+
+    } else
+      // If we are skipping the doc stores, that means there
+      // are no deletions in any of these segments, so we
+      // just sum numDocs() of each segment to get total docCount
+      for (int i = 0; i < readers.size(); i++)
+        docCount += ((IndexReader) readers.elementAt(i)).numDocs();
+
    return docCount;
  }

@ -355,6 +401,7 @@ final class SegmentMerger {
    for (int i = 0; i < n; i++) {
      SegmentMergeInfo smi = smis[i];
      TermPositions postings = smi.getPositions();
+      assert postings != null;
      int base = smi.base;
      int[] docMap = smi.getDocMap();
      postings.seek(smi.termEnum);
--- a/src/java/org/apache/lucene/index/SegmentReader.java
+++ b/src/java/org/apache/lucene/index/SegmentReader.java
@ -60,6 +60,7 @@ class SegmentReader extends IndexReader {

  // Compound File Reader when based on a compound file segment
  CompoundFileReader cfsReader = null;
+  CompoundFileReader storeCFSReader = null;

  private class Norm {
    public Norm(IndexInput in, int number, long normSeek)
@ -128,7 +129,15 @@ class SegmentReader extends IndexReader {
   * @throws IOException if there is a low-level IO error
   */
  public static SegmentReader get(SegmentInfo si) throws CorruptIndexException, IOException {
-    return get(si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE);
+    return get(si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE, true);
+  }
+
+  /**
+   * @throws CorruptIndexException if the index is corrupt
+   * @throws IOException if there is a low-level IO error
+   */
+  public static SegmentReader get(SegmentInfo si, boolean doOpenStores) throws CorruptIndexException, IOException {
+    return get(si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE, doOpenStores);
  }

  /**
@ -136,7 +145,15 @@ class SegmentReader extends IndexReader {
   * @throws IOException if there is a low-level IO error
   */
  public static SegmentReader get(SegmentInfo si, int readBufferSize) throws CorruptIndexException, IOException {
-    return get(si.dir, si, null, false, false, readBufferSize);
+    return get(si.dir, si, null, false, false, readBufferSize, true);
+  }
+
+  /**
+   * @throws CorruptIndexException if the index is corrupt
+   * @throws IOException if there is a low-level IO error
+   */
+  public static SegmentReader get(SegmentInfo si, int readBufferSize, boolean doOpenStores) throws CorruptIndexException, IOException {
+    return get(si.dir, si, null, false, false, readBufferSize, doOpenStores);
  }

  /**
@ -145,7 +162,7 @@ class SegmentReader extends IndexReader {
   */
  public static SegmentReader get(SegmentInfos sis, SegmentInfo si,
                                  boolean closeDir) throws CorruptIndexException, IOException {
-    return get(si.dir, si, sis, closeDir, true, BufferedIndexInput.BUFFER_SIZE);
+    return get(si.dir, si, sis, closeDir, true, BufferedIndexInput.BUFFER_SIZE, true);
  }

  /**
@ -157,6 +174,19 @@ class SegmentReader extends IndexReader {
                                  boolean closeDir, boolean ownDir,
                                  int readBufferSize)
    throws CorruptIndexException, IOException {
+    return get(dir, si, sis, closeDir, ownDir, readBufferSize, true);
+  }
+
+  /**
+   * @throws CorruptIndexException if the index is corrupt
+   * @throws IOException if there is a low-level IO error
+   */
+  public static SegmentReader get(Directory dir, SegmentInfo si,
+                                  SegmentInfos sis,
+                                  boolean closeDir, boolean ownDir,
+                                  int readBufferSize,
+                                  boolean doOpenStores)
+    throws CorruptIndexException, IOException {
    SegmentReader instance;
    try {
      instance = (SegmentReader)IMPL.newInstance();
@ -164,11 +194,11 @@ class SegmentReader extends IndexReader {
      throw new RuntimeException("cannot load SegmentReader class: " + e, e);
    }
    instance.init(dir, sis, closeDir, ownDir);
-    instance.initialize(si, readBufferSize);
+    instance.initialize(si, readBufferSize, doOpenStores);
    return instance;
  }

-  private void initialize(SegmentInfo si, int readBufferSize) throws CorruptIndexException, IOException {
+  private void initialize(SegmentInfo si, int readBufferSize, boolean doOpenStores) throws CorruptIndexException, IOException {
    segment = si.name;
    this.si = si;

@ -178,17 +208,45 @@ class SegmentReader extends IndexReader {
      // Use compound file directory for some files, if it exists
      Directory cfsDir = directory();
      if (si.getUseCompoundFile()) {
-        cfsReader = new CompoundFileReader(directory(), segment + ".cfs", readBufferSize);
+        cfsReader = new CompoundFileReader(directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
        cfsDir = cfsReader;
      }

+      final Directory storeDir;
+
+      if (doOpenStores) {
+        if (si.getDocStoreOffset() != -1) {
+          if (si.getDocStoreIsCompoundFile()) {
+            storeCFSReader = new CompoundFileReader(directory(), si.getDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize);
+            storeDir = storeCFSReader;
+          } else {
+            storeDir = directory();
+          }
+        } else {
+          storeDir = cfsDir;
+        }
+      } else
+        storeDir = null;
+
      // No compound file exists - use the multi-file format
      fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
-      fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos, readBufferSize);

-      // Verify two sources of "maxDoc" agree:
-      if (fieldsReader.size() != si.docCount) {
-        throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.size() + " but segmentInfo shows " + si.docCount);
+      final String fieldsSegment;
+      final Directory dir;
+
+      if (si.getDocStoreOffset() != -1)
+        fieldsSegment = si.getDocStoreSegment();
+      else
+        fieldsSegment = segment;
+
+      if (doOpenStores) {
+        fieldsReader = new FieldsReader(storeDir, fieldsSegment, fieldInfos, readBufferSize,
+                                        si.getDocStoreOffset(), si.docCount);
+
+        // Verify two sources of "maxDoc" agree:
+        if (si.getDocStoreOffset() == -1 && fieldsReader.size() != si.docCount) {
+          throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.size() + " but segmentInfo shows " + si.docCount);
+        }
      }

      tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize);
@ -209,8 +267,13 @@ class SegmentReader extends IndexReader {
      proxStream = cfsDir.openInput(segment + ".prx", readBufferSize);
      openNorms(cfsDir, readBufferSize);

-      if (fieldInfos.hasVectors()) { // open term vector files only as needed
-        termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos, readBufferSize);
+      if (doOpenStores && fieldInfos.hasVectors()) { // open term vector files only as needed
+        final String vectorsSegment;
+        if (si.getDocStoreOffset() != -1)
+          vectorsSegment = si.getDocStoreSegment();
+        else
+          vectorsSegment = segment;
+        termVectorsReaderOrig = new TermVectorsReader(storeDir, vectorsSegment, fieldInfos, readBufferSize, si.getDocStoreOffset(), si.docCount);
      }
      success = true;
    } finally {
@ -273,6 +336,9 @@ class SegmentReader extends IndexReader {

    if (cfsReader != null)
      cfsReader.close();
+
+    if (storeCFSReader != null)
+      storeCFSReader.close();
  }

  static boolean hasDeletions(SegmentInfo si) throws IOException {
--- a/src/java/org/apache/lucene/index/TermVectorsReader.java
+++ b/src/java/org/apache/lucene/index/TermVectorsReader.java
@ -33,6 +33,10 @@ class TermVectorsReader implements Cloneable {
  private IndexInput tvd;
  private IndexInput tvf;
  private int size;
+
+  // The docID offset where our docs begin in the index
+  // file.  This will be 0 if we have our own private file.
+  private int docStoreOffset;
  
  private int tvdFormat;
  private int tvfFormat;
@ -43,6 +47,11 @@ class TermVectorsReader implements Cloneable {
  }

  TermVectorsReader(Directory d, String segment, FieldInfos fieldInfos, int readBufferSize)
+    throws CorruptIndexException, IOException {
+    this(d, segment, fieldInfos, BufferedIndexInput.BUFFER_SIZE, -1, 0);
+  }
+    
+  TermVectorsReader(Directory d, String segment, FieldInfos fieldInfos, int readBufferSize, int docStoreOffset, int size)
    throws CorruptIndexException, IOException {
    if (d.fileExists(segment + TermVectorsWriter.TVX_EXTENSION)) {
      tvx = d.openInput(segment + TermVectorsWriter.TVX_EXTENSION, readBufferSize);
@ -51,7 +60,16 @@ class TermVectorsReader implements Cloneable {
      tvdFormat = checkValidFormat(tvd);
      tvf = d.openInput(segment + TermVectorsWriter.TVF_EXTENSION, readBufferSize);
      tvfFormat = checkValidFormat(tvf);
-      size = (int) tvx.length() / 8;
+      if (-1 == docStoreOffset) {
+        this.docStoreOffset = 0;
+        this.size = (int) (tvx.length() / 8);
+      } else {
+        this.docStoreOffset = docStoreOffset;
+        this.size = size;
+        // Verify the file is long enough to hold all of our
+        // docs
+        assert ((int) (tvx.length()/8)) >= size + docStoreOffset;
+      }
    }

    this.fieldInfos = fieldInfos;
@ -102,7 +120,7 @@ class TermVectorsReader implements Cloneable {
      //We don't need to do this in other seeks because we already have the
      // file pointer
      //that was written in another file
-      tvx.seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
+      tvx.seek(((docNum + docStoreOffset) * 8L) + TermVectorsWriter.FORMAT_SIZE);
      //System.out.println("TVX Pointer: " + tvx.getFilePointer());
      long position = tvx.readLong();

@ -154,7 +172,7 @@ class TermVectorsReader implements Cloneable {
    // Check if no term vectors are available for this segment at all
    if (tvx != null) {
      //We need to offset by
-      tvx.seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
+      tvx.seek(((docNum + docStoreOffset) * 8L) + TermVectorsWriter.FORMAT_SIZE);
      long position = tvx.readLong();

      tvd.seek(position);
--- a/src/java/org/apache/lucene/store/IndexOutput.java
+++ b/src/java/org/apache/lucene/store/IndexOutput.java
@ -125,6 +125,31 @@ public abstract class IndexOutput {
    }
  }

+  /** Writes a sequence of UTF-8 encoded characters from a char[].
+   * @param s the source of the characters
+   * @param start the first character in the sequence
+   * @param length the number of characters in the sequence
+   * @see IndexInput#readChars(char[],int,int)
+   */
+  public void writeChars(char[] s, int start, int length)
+    throws IOException {
+    final int end = start + length;
+    for (int i = start; i < end; i++) {
+      final int code = (int)s[i];
+      if (code >= 0x01 && code <= 0x7F)
+	writeByte((byte)code);
+      else if (((code >= 0x80) && (code <= 0x7FF)) || code == 0) {
+	writeByte((byte)(0xC0 | (code >> 6)));
+	writeByte((byte)(0x80 | (code & 0x3F)));
+      } else {
+	writeByte((byte)(0xE0 | (code >>> 12)));
+	writeByte((byte)(0x80 | ((code >> 6) & 0x3F)));
+	writeByte((byte)(0x80 | (code & 0x3F)));
+      }
+    }
+  }
+
+
  /** Forces any buffered output to be written. */
  public abstract void flush() throws IOException;

--- a/src/site/src/documentation/content/xdocs/fileformats.xml
+++ b/src/site/src/documentation/content/xdocs/fileformats.xml
@ -60,6 +60,15 @@
                Lucene will not be able to read the index.
            </p>

+            <p>
+                In version 2.3, the file format was changed to allow
+		segments to share a single set of doc store (vectors &amp;
+		stored fields) files.  This allows for faster indexing
+		in certain cases.  The change is fully backwards
+		compatible (in the same way as the lock-less commits
+		change in 2.1).
+            </p>
+
        </section>

        <section id="Definitions"><title>Definitions</title>
@ -809,9 +818,15 @@
                    NormGen<sup>NumField</sup>,
                    IsCompoundFile&gt;<sup>SegCount</sup>
                </p>
+                <p>
+                    <b>2.3 and above:</b>
+                    Segments --&gt; Format, Version, NameCounter, SegCount, &lt;SegName, SegSize, DelGen, DocStoreOffset, [DocStoreSegment, DocStoreIsCompoundFile], HasSingleNormFile, NumField,
+                    NormGen<sup>NumField</sup>,
+                    IsCompoundFile&gt;<sup>SegCount</sup>
+                </p>

                <p>
-                    Format, NameCounter, SegCount, SegSize, NumField --&gt; Int32
+                    Format, NameCounter, SegCount, SegSize, NumField, DocStoreOffset --&gt; Int32
                </p>

                <p>
@ -819,11 +834,11 @@
                </p>

                <p>
-                    SegName --&gt; String
+                    SegName, DocStoreSegment --&gt; String
                </p>

                <p>
-                    IsCompoundFile, HasSingleNormFile --&gt; Int8
+                    IsCompoundFile, HasSingleNormFile, DocStoreIsCompoundFile --&gt; Int8
                </p>

                <p>
@ -889,6 +904,29 @@
                    "Normalization Factors" below for details.
                </p>

+                <p>
+		    DocStoreOffset, DocStoreSegment,
+                    DocStoreIsCompoundFile: If DocStoreOffset is -1,
+                    this segment has its own doc store (stored fields
+                    values and term vectors) files and DocStoreSegment
+                    and DocStoreIsCompoundFile are not stored.  In
+                    this case all files for stored field values
+                    (<tt>*.fdt</tt> and <tt>*.fdx</tt>) and term
+                    vectors (<tt>*.tvf</tt>, <tt>*.tvd</tt> and
+                    <tt>*.tvx</tt>) will be stored with this segment.
+                    Otherwise, DocStoreSegment is the name of the
+                    segment that has the shared doc store files;
+                    DocStoreIsCompoundFile is 1 if that segment is
+                    stored in compound file format (as a <tt>.cfx</tt>
+                    file); and DocStoreOffset is the starting document
+                    in the shared doc store files where this segment's
+                    documents begin.  In this case, this segment does
+                    not store its own doc store files but instead
+                    shares a single set of these files with other
+                    segments.
+                </p>
+		
+
            </section>

            <section id="Lock File"><title>Lock File</title>
@ -947,6 +985,14 @@
                <p>FileData --&gt; raw file data</p>
                <p>The raw file data is the data from the individual files named above.</p>

+		<p>Starting with Lucene 2.3, doc store files (stored
+		field values and term vectors) can be shared in a
+		single set of files for more than one segment.  When
+		compound file is enabled, these shared files will be
+		added into a single compound file (same format as
+		above) but with the extension <tt>.cfx</tt>.
+		</p>
+
            </section>

        </section>
--- a/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
+++ b/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
@ -106,8 +106,12 @@ public class TestBackwardsCompatibility extends TestCase
    rmDir(dirName);
  }

+  final String[] oldNames = {"prelockless.cfs",
+                             "prelockless.nocfs",
+                             "presharedstores.cfs",
+                             "presharedstores.nocfs"};
+
  public void testSearchOldIndex() throws IOException {
-    String[] oldNames = {"prelockless.cfs", "prelockless.nocfs"};
    for(int i=0;i<oldNames.length;i++) {
      String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
      unzip(dirName, oldNames[i]);
@ -117,7 +121,6 @@ public class TestBackwardsCompatibility extends TestCase
  }

  public void testIndexOldIndexNoAdds() throws IOException {
-    String[] oldNames = {"prelockless.cfs", "prelockless.nocfs"};
    for(int i=0;i<oldNames.length;i++) {
      String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
      unzip(dirName, oldNames[i]);
@ -131,7 +134,6 @@ public class TestBackwardsCompatibility extends TestCase
  }

  public void testIndexOldIndex() throws IOException {
-    String[] oldNames = {"prelockless.cfs", "prelockless.nocfs"};
    for(int i=0;i<oldNames.length;i++) {
      String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
      unzip(dirName, oldNames[i]);
@ -312,8 +314,9 @@ public class TestBackwardsCompatibility extends TestCase
        Directory dir = FSDirectory.getDirectory(fullDir(outputDir));

        boolean autoCommit = 0 == pass;
-      
+ 
        IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true);
+        writer.setRAMBufferSizeMB(16.0);
        //IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
        for(int i=0;i<35;i++) {
          addDoc(writer, i);
@ -337,8 +340,8 @@ public class TestBackwardsCompatibility extends TestCase
        // figure out which field number corresponds to
        // "content", and then set our expected file names below
        // accordingly:
-        CompoundFileReader cfsReader = new CompoundFileReader(dir, "_2.cfs");
-        FieldInfos fieldInfos = new FieldInfos(cfsReader, "_2.fnm");
+        CompoundFileReader cfsReader = new CompoundFileReader(dir, "_0.cfs");
+        FieldInfos fieldInfos = new FieldInfos(cfsReader, "_0.fnm");
        int contentFieldIndex = -1;
        for(int i=0;i<fieldInfos.size();i++) {
          FieldInfo fi = fieldInfos.fieldInfo(i);
@ -351,17 +354,15 @@ public class TestBackwardsCompatibility extends TestCase
        assertTrue("could not locate the 'content' field number in the _2.cfs segment", contentFieldIndex != -1);

        // Now verify file names:
-        String[] expected = {"_0.cfs",
-                             "_0_1.del",
-                             "_1.cfs",
-                             "_2.cfs",
-                             "_2_1.s" + contentFieldIndex,
-                             "_3.cfs",
-                             "segments_a",
-                             "segments.gen"};
-        if (!autoCommit) {
-          expected[6] = "segments_3";
-        }
+        String[] expected;
+        expected = new String[] {"_0.cfs",
+                    "_0_1.del",
+                    "_0_1.s" + contentFieldIndex,
+                    "segments_4",
+                    "segments.gen"};
+
+        if (!autoCommit)
+          expected[3] = "segments_3";

        String[] actual = dir.list();
        Arrays.sort(expected);
--- a/src/test/org/apache/lucene/index/TestDeletionPolicy.java
+++ b/src/test/org/apache/lucene/index/TestDeletionPolicy.java
@ -256,6 +256,7 @@ public class TestDeletionPolicy extends TestCase
      Directory dir = new RAMDirectory();

      IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
+      writer.setMaxBufferedDocs(10);
      writer.setUseCompoundFile(useCompoundFile);
      for(int i=0;i<107;i++) {
        addDoc(writer);
@ -318,6 +319,7 @@ public class TestDeletionPolicy extends TestCase
      Directory dir = new RAMDirectory();

      IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
+      writer.setMaxBufferedDocs(10);
      writer.setUseCompoundFile(useCompoundFile);
      for(int i=0;i<107;i++) {
        addDoc(writer);
@ -365,6 +367,7 @@ public class TestDeletionPolicy extends TestCase

      for(int j=0;j<N+1;j++) {
        IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
+        writer.setMaxBufferedDocs(10);
        writer.setUseCompoundFile(useCompoundFile);
        for(int i=0;i<17;i++) {
          addDoc(writer);
@ -525,6 +528,7 @@ public class TestDeletionPolicy extends TestCase

      Directory dir = new RAMDirectory();
      IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
+      writer.setMaxBufferedDocs(10);
      writer.setUseCompoundFile(useCompoundFile);
      writer.close();
      Term searchTerm = new Term("content", "aaa");        
@ -533,6 +537,7 @@ public class TestDeletionPolicy extends TestCase
      for(int i=0;i<N+1;i++) {

        writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
+        writer.setMaxBufferedDocs(10);
        writer.setUseCompoundFile(useCompoundFile);
        for(int j=0;j<17;j++) {
          addDoc(writer);
--- a/src/test/org/apache/lucene/index/TestIndexFileDeleter.java
+++ b/src/test/org/apache/lucene/index/TestIndexFileDeleter.java
@ -51,6 +51,7 @@ public class TestIndexFileDeleter extends TestCase
    Directory dir = new RAMDirectory();

    IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+    writer.setMaxBufferedDocs(10);
    int i;
    for(i=0;i<35;i++) {
      addDoc(writer, i);
--- a/src/test/org/apache/lucene/index/TestIndexModifier.java
+++ b/src/test/org/apache/lucene/index/TestIndexModifier.java
@ -74,6 +74,9 @@ public class TestIndexModifier extends TestCase {
    //  Lucene defaults:
    assertNull(i.getInfoStream());
    assertTrue(i.getUseCompoundFile());
+    /* new merge policy
+    assertEquals(0, i.getMaxBufferedDocs());
+    */
    assertEquals(10, i.getMaxBufferedDocs());
    assertEquals(10000, i.getMaxFieldLength());
    assertEquals(10, i.getMergeFactor());
--- a/src/test/org/apache/lucene/index/TestIndexReader.java
+++ b/src/test/org/apache/lucene/index/TestIndexReader.java
@ -803,7 +803,7 @@ public class TestIndexReader extends TestCase
          String[] startFiles = dir.list();
          SegmentInfos infos = new SegmentInfos();
          infos.read(dir);
-          IndexFileDeleter d = new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null);
+          IndexFileDeleter d = new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null, null);
          String[] endFiles = dir.list();

          Arrays.sort(startFiles);
--- a/src/test/org/apache/lucene/index/TestIndexWriter.java
+++ b/src/test/org/apache/lucene/index/TestIndexWriter.java
@ -20,6 +20,7 @@ package org.apache.lucene.index;
 import java.io.IOException;
 import java.io.File;
 import java.util.Arrays;
+import java.util.Random;

 import junit.framework.TestCase;

@ -478,7 +479,7 @@ public class TestIndexWriter extends TestCase
      String[] startFiles = dir.list();
      SegmentInfos infos = new SegmentInfos();
      infos.read(dir);
-      IndexFileDeleter d = new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null);
+      IndexFileDeleter d = new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null, null);
      String[] endFiles = dir.list();

      Arrays.sort(startFiles);
@ -859,6 +860,7 @@ public class TestIndexWriter extends TestCase
    public void testCommitOnCloseAbort() throws IOException {
      Directory dir = new RAMDirectory();      
      IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+      writer.setMaxBufferedDocs(10);
      for (int i = 0; i < 14; i++) {
        addDoc(writer);
      }
@ -871,6 +873,7 @@ public class TestIndexWriter extends TestCase
      searcher.close();

      writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false);
+      writer.setMaxBufferedDocs(10);
      for(int j=0;j<17;j++) {
        addDoc(writer);
      }
@ -895,6 +898,7 @@ public class TestIndexWriter extends TestCase
      // Now make sure we can re-open the index, add docs,
      // and all is good:
      writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false);
+      writer.setMaxBufferedDocs(10);
      for(int i=0;i<12;i++) {
        for(int j=0;j<17;j++) {
          addDoc(writer);
@ -962,6 +966,7 @@ public class TestIndexWriter extends TestCase
    public void testCommitOnCloseOptimize() throws IOException {
      RAMDirectory dir = new RAMDirectory();      
      IndexWriter writer  = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+      writer.setMaxBufferedDocs(10);
      for(int j=0;j<17;j++) {
        addDocWithIndex(writer, j);
      }
@ -1002,6 +1007,255 @@ public class TestIndexWriter extends TestCase
      reader.close();
    }

+    public void testIndexNoDocuments() throws IOException {
+      RAMDirectory dir = new RAMDirectory();      
+      IndexWriter writer  = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+      writer.flush();
+      writer.close();
+
+      IndexReader reader = IndexReader.open(dir);
+      assertEquals(0, reader.maxDoc());
+      assertEquals(0, reader.numDocs());
+      reader.close();
+
+      writer  = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
+      writer.flush();
+      writer.close();
+
+      reader = IndexReader.open(dir);
+      assertEquals(0, reader.maxDoc());
+      assertEquals(0, reader.numDocs());
+      reader.close();
+    }
+
+    public void testManyFields() throws IOException {
+      RAMDirectory dir = new RAMDirectory();      
+      IndexWriter writer  = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+      writer.setMaxBufferedDocs(10);
+      for(int j=0;j<100;j++) {
+        Document doc = new Document();
+        doc.add(new Field("a"+j, "aaa" + j, Field.Store.YES, Field.Index.TOKENIZED));
+        doc.add(new Field("b"+j, "aaa" + j, Field.Store.YES, Field.Index.TOKENIZED));
+        doc.add(new Field("c"+j, "aaa" + j, Field.Store.YES, Field.Index.TOKENIZED));
+        doc.add(new Field("d"+j, "aaa", Field.Store.YES, Field.Index.TOKENIZED));
+        doc.add(new Field("e"+j, "aaa", Field.Store.YES, Field.Index.TOKENIZED));
+        doc.add(new Field("f"+j, "aaa", Field.Store.YES, Field.Index.TOKENIZED));
+        writer.addDocument(doc);
+      }
+      writer.close();
+
+      IndexReader reader = IndexReader.open(dir);
+      assertEquals(100, reader.maxDoc());
+      assertEquals(100, reader.numDocs());
+      for(int j=0;j<100;j++) {
+        assertEquals(1, reader.docFreq(new Term("a"+j, "aaa"+j)));
+        assertEquals(1, reader.docFreq(new Term("b"+j, "aaa"+j)));
+        assertEquals(1, reader.docFreq(new Term("c"+j, "aaa"+j)));
+        assertEquals(1, reader.docFreq(new Term("d"+j, "aaa")));
+        assertEquals(1, reader.docFreq(new Term("e"+j, "aaa")));
+        assertEquals(1, reader.docFreq(new Term("f"+j, "aaa")));
+      }
+      reader.close();
+      dir.close();
+    }
+
+    public void testSmallRAMBuffer() throws IOException {
+      RAMDirectory dir = new RAMDirectory();      
+      IndexWriter writer  = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+      writer.setRAMBufferSizeMB(0.000001);
+      int lastNumFile = dir.list().length;
+      for(int j=0;j<9;j++) {
+        Document doc = new Document();
+        doc.add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.TOKENIZED));
+        writer.addDocument(doc);
+        int numFile = dir.list().length;
+        // Verify that with a tiny RAM buffer we see new
+        // segment after every doc
+        assertTrue(numFile > lastNumFile);
+        lastNumFile = numFile;
+      }
+      writer.close();
+      dir.close();
+    }
+
+    // Make sure it's OK to change RAM buffer size and
+    // maxBufferedDocs in a write session
+    public void testChangingRAMBuffer() throws IOException {
+      RAMDirectory dir = new RAMDirectory();      
+      IndexWriter writer  = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+      writer.setMaxBufferedDocs(10);
+      int lastNumFile = dir.list().length;
+      long lastGen = -1;
+      for(int j=1;j<52;j++) {
+        Document doc = new Document();
+        doc.add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.TOKENIZED));
+        writer.addDocument(doc);
+        long gen = SegmentInfos.generationFromSegmentsFileName(SegmentInfos.getCurrentSegmentFileName(dir.list()));
+        if (j == 1)
+          lastGen = gen;
+        else if (j < 10)
+          // No new files should be created
+          assertEquals(gen, lastGen);
+        else if (10 == j) {
+          assertTrue(gen > lastGen);
+          lastGen = gen;
+          writer.setRAMBufferSizeMB(0.000001);
+        } else if (j < 20) {
+          assertTrue(gen > lastGen);
+          lastGen = gen;
+        } else if (20 == j) {
+          writer.setRAMBufferSizeMB(16);
+          lastGen = gen;
+        } else if (j < 30) {
+          assertEquals(gen, lastGen);
+        } else if (30 == j) {
+          writer.setRAMBufferSizeMB(0.000001);
+        } else if (j < 40) {
+          assertTrue(gen> lastGen);
+          lastGen = gen;
+        } else if (40 == j) {
+          writer.setMaxBufferedDocs(10);
+          lastGen = gen;
+        } else if (j < 50) {
+          assertEquals(gen, lastGen);
+          writer.setMaxBufferedDocs(10);
+        } else if (50 == j) {
+          assertTrue(gen > lastGen);
+        }
+      }
+      writer.close();
+      dir.close();
+    }
+
+    public void testDiverseDocs() throws IOException {
+      RAMDirectory dir = new RAMDirectory();      
+      IndexWriter writer  = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+      // writer.setInfoStream(System.out);
+      long t0 = System.currentTimeMillis();
+      writer.setRAMBufferSizeMB(0.5);
+      Random rand = new Random(31415);
+      for(int i=0;i<3;i++) {
+        // First, docs where every term is unique (heavy on
+        // Posting instances)
+        for(int j=0;j<100;j++) {
+          Document doc = new Document();
+          for(int k=0;k<100;k++) {
+            doc.add(new Field("field", Integer.toString(rand.nextInt()), Field.Store.YES, Field.Index.TOKENIZED));
+          }
+          writer.addDocument(doc);
+        }
+
+        // Next, many single term docs where only one term
+        // occurs (heavy on byte blocks)
+        for(int j=0;j<100;j++) {
+          Document doc = new Document();
+          doc.add(new Field("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", Field.Store.YES, Field.Index.TOKENIZED));
+          writer.addDocument(doc);
+        }
+
+        // Next, many single term docs where only one term
+        // occurs but the terms are very long (heavy on
+        // char[] arrays)
+        for(int j=0;j<100;j++) {
+          StringBuffer b = new StringBuffer();
+          String x = Integer.toString(j) + ".";
+          for(int k=0;k<1000;k++)
+            b.append(x);
+          String longTerm = b.toString();
+
+          Document doc = new Document();
+          doc.add(new Field("field", longTerm, Field.Store.YES, Field.Index.TOKENIZED));
+          writer.addDocument(doc);
+        }
+      }
+      writer.close();
+
+      long t1 = System.currentTimeMillis();
+      IndexSearcher searcher = new IndexSearcher(dir);
+      Hits hits = searcher.search(new TermQuery(new Term("field", "aaa")));
+      assertEquals(300, hits.length());
+      searcher.close();
+
+      dir.close();
+    }
+
+    public void testEnablingNorms() throws IOException {
+      RAMDirectory dir = new RAMDirectory();      
+      IndexWriter writer  = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+      writer.setMaxBufferedDocs(10);
+      // Enable norms for only 1 doc, pre flush
+      for(int j=0;j<10;j++) {
+        Document doc = new Document();
+        Field f = new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED); 
+        if (j != 8) {
+          f.setOmitNorms(true);
+        }
+        doc.add(f);
+        writer.addDocument(doc);
+      }
+      writer.close();
+
+      Term searchTerm = new Term("field", "aaa");
+
+      IndexSearcher searcher = new IndexSearcher(dir);
+      Hits hits = searcher.search(new TermQuery(searchTerm));
+      assertEquals(10, hits.length());
+      searcher.close();
+
+      writer  = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+      writer.setMaxBufferedDocs(10);
+      // Enable norms for only 1 doc, post flush
+      for(int j=0;j<27;j++) {
+        Document doc = new Document();
+        Field f = new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED); 
+        if (j != 26) {
+          f.setOmitNorms(true);
+        }
+        doc.add(f);
+        writer.addDocument(doc);
+      }
+      writer.close();
+      searcher = new IndexSearcher(dir);
+      hits = searcher.search(new TermQuery(searchTerm));
+      assertEquals(27, hits.length());
+      searcher.close();
+
+      IndexReader reader = IndexReader.open(dir);
+      reader.close();
+
+      dir.close();
+    }
+
+    public void testHighFreqTerm() throws IOException {
+      RAMDirectory dir = new RAMDirectory();      
+      IndexWriter writer  = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+      writer.setRAMBufferSizeMB(0.01);
+      writer.setMaxFieldLength(100000000);
+      // Massive doc that has 128 K a's
+      StringBuffer b = new StringBuffer(1024*1024);
+      for(int i=0;i<4096;i++) {
+        b.append(" a a a a a a a a");
+        b.append(" a a a a a a a a");
+        b.append(" a a a a a a a a");
+        b.append(" a a a a a a a a");
+      }
+      Document doc = new Document();
+      doc.add(new Field("field", b.toString(), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
+      writer.addDocument(doc);
+      writer.close();
+
+      IndexReader reader = IndexReader.open(dir);
+      assertEquals(1, reader.maxDoc());
+      assertEquals(1, reader.numDocs());
+      Term t = new Term("field", "a");
+      assertEquals(1, reader.docFreq(t));
+      TermDocs td = reader.termDocs(t);
+      td.next();
+      assertEquals(128*1024, td.freq());
+      reader.close();
+      dir.close();
+    }
+
    // Make sure that a Directory implementation that does
    // not use LockFactory at all (ie overrides makeLock and
    // implements its own private locking) works OK.  This
--- a/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
+++ b/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
@ -110,7 +110,7 @@ public class TestIndexWriterDelete extends TestCase {
      }
      modifier.flush();

-      assertEquals(0, modifier.getRamSegmentCount());
+      assertEquals(0, modifier.getNumBufferedDocuments());
      assertTrue(0 < modifier.getSegmentCount());

      if (!autoCommit) {
@ -452,7 +452,7 @@ public class TestIndexWriterDelete extends TestCase {
          String[] startFiles = dir.list();
          SegmentInfos infos = new SegmentInfos();
          infos.read(dir);
-          IndexFileDeleter d = new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null);
+          IndexFileDeleter d = new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null, null);
          String[] endFiles = dir.list();

          Arrays.sort(startFiles);
--- a/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java
+++ b/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java
@ -57,7 +57,7 @@ public class TestIndexWriterMergePolicy extends TestCase {
    for (int i = 0; i < 100; i++) {
      addDoc(writer);
      checkInvariants(writer);
-      if (writer.getRamSegmentCount() + writer.getSegmentCount() >= 18) {
+      if (writer.getNumBufferedDocuments() + writer.getSegmentCount() >= 18) {
        noOverMerge = true;
      }
    }
@ -195,7 +195,7 @@ public class TestIndexWriterMergePolicy extends TestCase {
    int mergeFactor = writer.getMergeFactor();
    int maxMergeDocs = writer.getMaxMergeDocs();

-    int ramSegmentCount = writer.getRamSegmentCount();
+    int ramSegmentCount = writer.getNumBufferedDocuments();
    assertTrue(ramSegmentCount < maxBufferedDocs);

    int lowerBound = -1;
--- a/src/test/org/apache/lucene/index/TestLazyProxSkipping.java
+++ b/src/test/org/apache/lucene/index/TestLazyProxSkipping.java
@ -50,7 +50,7 @@ public class TestLazyProxSkipping extends TestCase {
        
        Directory directory = new RAMDirectory();
        IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
-        
+        writer.setMaxBufferedDocs(10);
        for (int i = 0; i < numDocs; i++) {
            Document doc = new Document();
            String content;
--- a/src/test/org/apache/lucene/index/TestPayloads.java
+++ b/src/test/org/apache/lucene/index/TestPayloads.java
@ -467,7 +467,8 @@ public class TestPayloads extends TestCase {
                            d.add(new Field(field, new PoolingPayloadTokenStream(pool)));
                            writer.addDocument(d);
                        }
-                    } catch (IOException e) {
+                    } catch (Exception e) {
+                        e.printStackTrace();
                        fail(e.toString());
                    }
                }
@ -480,7 +481,6 @@ public class TestPayloads extends TestCase {
                ingesters[i].join();
            } catch (InterruptedException e) {}
        }
-        
        writer.close();
        IndexReader reader = IndexReader.open(dir);
        TermEnum terms = reader.terms();
--- a/src/test/org/apache/lucene/index/TestStressIndexing.java
+++ b/src/test/org/apache/lucene/index/TestStressIndexing.java
@ -74,8 +74,6 @@ public class TestStressIndexing extends TestCase {
          count++;
        }
        
-        modifier.close();
-
      } catch (Exception e) {
        System.out.println(e.toString());
        e.printStackTrace();
@ -125,6 +123,9 @@ public class TestStressIndexing extends TestCase {
    IndexerThread indexerThread = new IndexerThread(modifier);
    indexerThread.start();
      
+    IndexerThread indexerThread2 = new IndexerThread(modifier);
+    indexerThread2.start();
+      
    // Two searchers that constantly just re-instantiate the searcher:
    SearcherThread searcherThread1 = new SearcherThread(directory);
    searcherThread1.start();
@ -133,9 +134,14 @@ public class TestStressIndexing extends TestCase {
    searcherThread2.start();

    indexerThread.join();
+    indexerThread2.join();
    searcherThread1.join();
    searcherThread2.join();
+
+    modifier.close();
+
    assertTrue("hit unexpected exception in indexer", !indexerThread.failed);
+    assertTrue("hit unexpected exception in indexer 2", !indexerThread2.failed);
    assertTrue("hit unexpected exception in search1", !searcherThread1.failed);
    assertTrue("hit unexpected exception in search2", !searcherThread2.failed);
    //System.out.println("    Writer: " + indexerThread.count + " iterations");
--- a/src/test/org/apache/lucene/index/index.presharedstores.cfs.zip
+++ b/src/test/org/apache/lucene/index/index.presharedstores.cfs.zip
--- a/src/test/org/apache/lucene/index/index.presharedstores.nocfs.zip
+++ b/src/test/org/apache/lucene/index/index.presharedstores.nocfs.zip
--- a/src/test/org/apache/lucene/search/TestTermVectors.java
+++ b/src/test/org/apache/lucene/search/TestTermVectors.java
@ -291,6 +291,80 @@ public class TestTermVectors extends TestCase {
        Field.Index.TOKENIZED, Field.TermVector.YES));
    //System.out.println("Document: " + doc);
  }
-  
-  
+
+  // Test only a few docs having vectors
+  public void testRareVectors() throws IOException {
+    IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true);
+    for(int i=0;i<100;i++) {
+      Document doc = new Document();
+      doc.add(new Field("field", English.intToEnglish(i),
+                        Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
+      writer.addDocument(doc);
+    }
+    for(int i=0;i<10;i++) {
+      Document doc = new Document();
+      doc.add(new Field("field", English.intToEnglish(100+i),
+                        Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
+      writer.addDocument(doc);
+    }
+
+    writer.close();
+    searcher = new IndexSearcher(directory);
+
+    Query query = new TermQuery(new Term("field", "hundred"));
+    Hits hits = searcher.search(query);
+    assertEquals(10, hits.length());
+    for (int i = 0; i < hits.length(); i++) {
+      TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i));
+      assertTrue(vector != null);
+      assertTrue(vector.length == 1);
+    }
+  }
+
+
+  // In a single doc, for the same field, mix the term
+  // vectors up
+  public void testMixedVectrosVectors() throws IOException {
+    IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true);
+    Document doc = new Document();
+    doc.add(new Field("field", "one",
+                      Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
+    doc.add(new Field("field", "one",
+                      Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
+    doc.add(new Field("field", "one",
+                      Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS));
+    doc.add(new Field("field", "one",
+                      Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_OFFSETS));
+    doc.add(new Field("field", "one",
+                      Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
+    writer.addDocument(doc);
+    writer.close();
+
+    searcher = new IndexSearcher(directory);
+
+    Query query = new TermQuery(new Term("field", "one"));
+    Hits hits = searcher.search(query);
+    assertEquals(1, hits.length());
+
+    TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(0));
+    assertTrue(vector != null);
+    assertTrue(vector.length == 1);
+    TermPositionVector tfv = (TermPositionVector) vector[0];
+    assertTrue(tfv.getField().equals("field"));
+    String[] terms = tfv.getTerms();
+    assertEquals(1, terms.length);
+    assertEquals(terms[0], "one");
+    assertEquals(5, tfv.getTermFrequencies()[0]);
+
+    int[] positions = tfv.getTermPositions(0);
+    assertEquals(5, positions.length);
+    for(int i=0;i<5;i++)
+      assertEquals(i, positions[i]);
+    TermVectorOffsetInfo[] offsets = tfv.getOffsets(0);
+    assertEquals(5, offsets.length);
+    for(int i=0;i<5;i++) {
+      assertEquals(4*i, offsets[i].getStartOffset());
+      assertEquals(4*i+3, offsets[i].getEndOffset());
+    }
+  }
 }