Added term vector support.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150206 13f79535-47bb-0310-9956-ffa450edef68
2025-03-02 14:29:23 +00:00 · 2004-02-20 20:14:56 +00:00 · 2004-02-20 20:14:56 +00:00 · 12eee6df5a
commit 12eee6df5a
parent abb62bda9c
41 changed files with 3734 additions and 328 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -54,6 +54,10 @@ $Id$
 9. Added MultiReader, an IndexReader that combines multiple other
    IndexReaders.  (Cutting)

+10. Added support for term vectors.  See Field#isTermVectorStored().
+    (Grant Ingersoll, Cutting & Dmitry)
+
+
 1.3 final

 1. Added catch of BooleanQuery$TooManyClauses in QueryParser to
--- a/src/java/org/apache/lucene/document/Field.java
+++ b/src/java/org/apache/lucene/document/Field.java
@ -71,6 +71,7 @@ import org.apache.lucene.search.Hits;             // for javadoc
 public final class Field implements java.io.Serializable {
  private String name = "body";
  private String stringValue = null;
+  private boolean storeTermVector = false;
  private Reader readerValue = null;
  private boolean isStored = false;
  private boolean isIndexed = true;
@ -114,7 +115,8 @@ public final class Field implements java.io.Serializable {
  }

  /** Constructs a String-valued Field that is not tokenized, but is indexed
-    and stored.  Useful for non-text fields, e.g. date or url.  */
+    and stored.  Useful for non-text fields, e.g. date or url.  
+   */
  public static final Field Keyword(String name, String value) {
    return new Field(name, value, true, true, false);
  }
@ -127,9 +129,9 @@ public final class Field implements java.io.Serializable {

  /** Constructs a String-valued Field that is tokenized and indexed,
    and is stored in the index, for return with hits.  Useful for short text
-    fields, like "title" or "subject". */
+    fields, like "title" or "subject". Term vector will not be stored for this field. */
  public static final Field Text(String name, String value) {
-    return new Field(name, value, true, true, true);
+    return Text(name, value, false);
  }

  /** Constructs a Date-valued Field that is not tokenized and is indexed,
@ -139,16 +141,38 @@ public final class Field implements java.io.Serializable {
  }

  /** Constructs a String-valued Field that is tokenized and indexed,
-    but that is not stored in the index. */
+    and is stored in the index, for return with hits.  Useful for short text
+    fields, like "title" or "subject". */
+  public static final Field Text(String name, String value, boolean storeTermVector) {
+    return new Field(name, value, true, true, true, storeTermVector);
+  }
+
+  /** Constructs a String-valued Field that is tokenized and indexed,
+    but that is not stored in the index.  Term vector will not be stored for this field. */
  public static final Field UnStored(String name, String value) {
-    return new Field(name, value, false, true, true);
+    return UnStored(name, value, false);
+  }
+
+  /** Constructs a String-valued Field that is tokenized and indexed,
+    but that is not stored in the index. */
+  public static final Field UnStored(String name, String value, boolean storeTermVector) {
+    return new Field(name, value, false, true, true, storeTermVector);
+  }
+
+  /** Constructs a Reader-valued Field that is tokenized and indexed, but is
+    not stored in the index verbatim.  Useful for longer text fields, like
+    "body". Term vector will not be stored for this field. */
+  public static final Field Text(String name, Reader value) {
+    return Text(name, value, false);
  }

  /** Constructs a Reader-valued Field that is tokenized and indexed, but is
    not stored in the index verbatim.  Useful for longer text fields, like
    "body". */
-  public static final Field Text(String name, Reader value) {
-    return new Field(name, value);
+  public static final Field Text(String name, Reader value, boolean storeTermVector) {
+    Field f = new Field(name, value);
+    f.storeTermVector = storeTermVector;
+    return f;
  }

  /** The name of the field (e.g., "date", "subject", "title", or "body")
@ -162,19 +186,41 @@ public final class Field implements java.io.Serializable {
    is used.  Exactly one of stringValue() and readerValue() must be set. */
  public Reader readerValue()	{ return readerValue; }

+
+  /** Create a field by specifying all parameters except for <code>storeTermVector</code>,
+   *  which is set to <code>false</code>.
+   */
  public Field(String name, String string,
 	       boolean store, boolean index, boolean token) {
+    this(name, string, store, index, token, false);
+  }
+
+  /**
+   * 
+   * @param name The name of the field
+   * @param string The string to process
+   * @param store true if the field should store the string
+   * @param index true if the field should be indexed
+   * @param token true if the field should be tokenized
+   * @param storeTermVector true if we should store the Term Vector info
+   */ 
+  public Field(String name, String string,
+	       boolean store, boolean index, boolean token, boolean storeTermVector) {
    if (name == null)
      throw new IllegalArgumentException("name cannot be null");
    if (string == null)
      throw new IllegalArgumentException("value cannot be null");
+    if (!index && storeTermVector)
+      throw new IllegalArgumentException("cannot store a term vector for fields that are not indexed.");

    this.name = name.intern();			  // field names are interned
    this.stringValue = string;
    this.isStored = store;
    this.isIndexed = index;
    this.isTokenized = token;
+    this.storeTermVector = storeTermVector;
  }
+
  Field(String name, Reader reader) {
    if (name == null)
      throw new IllegalArgumentException("name cannot be null");
@ -199,6 +245,16 @@ public final class Field implements java.io.Serializable {
    Reader-valued. */
  public final boolean 	isTokenized() 	{ return isTokenized; }

+  /** True iff the term or terms used to index this field are stored as a term
+   *  vector, avaliable from {@link IndexReader#getTermFreqVector(int,String)}.
+   *  These methods do not provide access to the original content of the field,
+   *  only to terms used to index it. If the original content must be
+   *  preserved, use the <code>stored</code> attribute instead.
+   *
+   * @see IndexReader#getTermFreqVector(int, String)
+   */
+  public final boolean isTermVectorStored() { return storeTermVector; }
+
  /** Prints a Field for human consumption. */
  public final String toString() {
    if (isStored && isIndexed && !isTokenized)
@ -209,8 +265,14 @@ public final class Field implements java.io.Serializable {
      return "Text<" + name + ":" + stringValue + ">";
    else if (!isStored && isIndexed && isTokenized && readerValue!=null)
      return "Text<" + name + ":" + readerValue + ">";
+    else if (!isStored && isIndexed && isTokenized)
+    {
+      return "UnStored<" + name + ">";
+    }
    else
+    {
      return super.toString();
+    }
  }

 }
--- a/src/java/org/apache/lucene/index/CompoundFileReader.java
+++ b/src/java/org/apache/lucene/index/CompoundFileReader.java
@ -72,7 +72,7 @@ import java.io.IOException;
 * @author Dmitry Serebrennikov
 * @version $Id$
 */
-public class CompoundFileReader extends Directory {
+class CompoundFileReader extends Directory {

    private static final class FileEntry {
        long offset;
--- a/src/java/org/apache/lucene/index/DocumentWriter.java
+++ b/src/java/org/apache/lucene/index/DocumentWriter.java
@ -77,6 +77,13 @@ final class DocumentWriter {
  private FieldInfos fieldInfos;
  private int maxFieldLength;

+  /**
+   * 
+   * @param directory The directory to write the document information to
+   * @param analyzer The analyzer to use for the document
+   * @param similarity The Similarity function
+   * @param maxFieldLength The maximum number of tokens a field may have
+   */ 
  DocumentWriter(Directory directory, Analyzer analyzer,
                 Similarity similarity, int maxFieldLength) {
    this.directory = directory;
@ -86,7 +93,7 @@ final class DocumentWriter {
  }

  final void addDocument(String segment, Document doc)
-    throws IOException {
+          throws IOException {
    // write field names
    fieldInfos = new FieldInfos();
    fieldInfos.add(doc);
@ -94,7 +101,7 @@ final class DocumentWriter {

    // write field values
    FieldsWriter fieldsWriter =
-      new FieldsWriter(directory, segment, fieldInfos);
+            new FieldsWriter(directory, segment, fieldInfos);
    try {
      fieldsWriter.addDocument(doc);
    } finally {
@ -144,7 +151,7 @@ final class DocumentWriter {

  // Tokenizes the fields of a document into Postings.
  private final void invertDocument(Document doc)
-    throws IOException {
+          throws IOException {
    Enumeration fields = doc.fields();
    while (fields.hasMoreElements()) {
      Field field = (Field) fields.nextElement();
@ -166,7 +173,7 @@ final class DocumentWriter {
            reader = new StringReader(field.stringValue());
          else
            throw new IllegalArgumentException
-              ("field must have either String or Reader value");
+                    ("field must have either String or Reader value");

          // Tokenize field and add to postingTable
          TokenStream stream = analyzer.tokenStream(fieldName, reader);
@ -277,15 +284,17 @@ final class DocumentWriter {
  }

  private final void writePostings(Posting[] postings, String segment)
-    throws IOException {
+          throws IOException {
    OutputStream freq = null, prox = null;
    TermInfosWriter tis = null;
-
+    TermVectorsWriter termVectorWriter = null;
    try {
+      //open files for inverse index storage
      freq = directory.createFile(segment + ".frq");
      prox = directory.createFile(segment + ".prx");
      tis = new TermInfosWriter(directory, segment, fieldInfos);
      TermInfo ti = new TermInfo();
+      String currentField = null;

      for (int i = 0; i < postings.length; i++) {
        Posting posting = postings[i];
@ -295,38 +304,65 @@ final class DocumentWriter {
        tis.add(posting.term, ti);

        // add an entry to the freq file
-        int f = posting.freq;
-        if (f == 1)				  // optimize freq=1
+        int postingFreq = posting.freq;
+        if (postingFreq == 1)				  // optimize freq=1
          freq.writeVInt(1);			  // set low bit of doc num.
        else {
          freq.writeVInt(0);			  // the document number
-          freq.writeVInt(f);			  // frequency in doc
+          freq.writeVInt(postingFreq);			  // frequency in doc
        }

        int lastPosition = 0;			  // write positions
        int[] positions = posting.positions;
-        for (int j = 0; j < f; j++) {		  // use delta-encoding
+        for (int j = 0; j < postingFreq; j++) {		  // use delta-encoding
          int position = positions[j];
          prox.writeVInt(position - lastPosition);
          lastPosition = position;
        }
+        // check to see if we switched to a new field
+        String termField = posting.term.field();
+        if (currentField != termField) {
+          // changing field - see if there is something to save
+          currentField = termField;
+          FieldInfo fi = fieldInfos.fieldInfo(currentField);
+          if (fi.storeTermVector) {
+            if (termVectorWriter == null) {
+              termVectorWriter =
+                new TermVectorsWriter(directory, segment, fieldInfos);
+              termVectorWriter.openDocument();
+            }
+            termVectorWriter.openField(currentField);
+          } else if (termVectorWriter != null) {
+            termVectorWriter.closeField();
+          }
+        }
+        if (termVectorWriter != null && termVectorWriter.isFieldOpen()) {
+          termVectorWriter.addTerm(posting.term.text(), postingFreq);
+        }
      }
+      if (termVectorWriter != null)
+        termVectorWriter.closeDocument();
    } finally {
-      if (freq != null) freq.close();
-      if (prox != null) prox.close();
-      if (tis != null) tis.close();
+      // make an effort to close all streams we can but remember and re-throw
+      // the first exception encountered in this process
+      IOException keep = null;
+      if (freq != null) try { freq.close(); } catch (IOException e) { if (keep == null) keep = e; }
+      if (prox != null) try { prox.close(); } catch (IOException e) { if (keep == null) keep = e; }
+      if (tis  != null) try {  tis.close(); } catch (IOException e) { if (keep == null) keep = e; }
+      if (termVectorWriter  != null) try {  termVectorWriter.close(); } catch (IOException e) { if (keep == null) keep = e; }
+      if (keep != null) throw (IOException) keep.fillInStackTrace();
    }
  }

  private final void writeNorms(Document doc, String segment)
-    throws IOException {
+          throws IOException {
    Enumeration fields = doc.fields();
    while (fields.hasMoreElements()) {
      Field field = (Field) fields.nextElement();
      if (field.isIndexed()) {
        int n = fieldInfos.fieldNumber(field.name());
        float norm =
-          fieldBoosts[n] * similarity.lengthNorm(field.name(),fieldLengths[n]);
+                fieldBoosts[n] * similarity.lengthNorm(field.name(), fieldLengths[n]);
        OutputStream norms = directory.createFile(segment + ".f" + n);
        try {
          norms.writeByte(similarity.encodeNorm(norm));
--- a/src/java/org/apache/lucene/index/FieldInfo.java
+++ b/src/java/org/apache/lucene/index/FieldInfo.java
@ -59,9 +59,13 @@ final class FieldInfo {
  boolean isIndexed;
  int number;

-  FieldInfo(String na, boolean tk, int nu) {
+  // true if term vector for this field should be stored
+  boolean storeTermVector;
+
+  FieldInfo(String na, boolean tk, int nu, boolean storeTermVector) {
    name = na;
    isIndexed = tk;
    number = nu;
+    this.storeTermVector = storeTermVector;
  }
 }
--- a/src/java/org/apache/lucene/index/FieldInfos.java
+++ b/src/java/org/apache/lucene/index/FieldInfos.java
@ -54,11 +54,7 @@ package org.apache.lucene.index;
 * <http://www.apache.org/>.
 */

-import java.util.Hashtable;
-import java.util.Vector;
-import java.util.Enumeration;
-import java.util.Collection;
-import java.util.Iterator;
+import java.util.*;
 import java.io.IOException;

 import org.apache.lucene.document.Document;
@ -68,6 +64,12 @@ import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.OutputStream;
 import org.apache.lucene.store.InputStream;

+/** Access to the Field Info file that describes document fields and whether or
+ *  not they are indexed. Each segment has a separate Field Info file. Objects
+ *  of this class are thread-safe for multiple readers, but only one thread can
+ *  be adding documents at a time, with no other reader or writer threads
+ *  accessing this object.
+ */
 final class FieldInfos {
  private Vector byNumber = new Vector();
  private Hashtable byName = new Hashtable();
@ -76,6 +78,15 @@ final class FieldInfos {
    add("", false);
  }

+  /**
+   * Construct a FieldInfos object using the directory and the name of the file
+   * InputStream
+   * @param d The directory to open the InputStream from
+   * @param name The name of the file to open the InputStream from in the Directory
+   * @throws IOException
+   * 
+   * @see #read
+   */
  FieldInfos(Directory d, String name) throws IOException {
    InputStream input = d.openFile(name);
    try {
@ -86,36 +97,83 @@ final class FieldInfos {
  }

  /** Adds field info for a Document. */
-  final void add(Document doc) {
-    Enumeration fields  = doc.fields();
+  public void add(Document doc) {
+    Enumeration fields = doc.fields();
    while (fields.hasMoreElements()) {
-      Field field = (Field)fields.nextElement();
-      add(field.name(), field.isIndexed());
+      Field field = (Field) fields.nextElement();
+      add(field.name(), field.isIndexed(), field.isTermVectorStored());
    }
  }

-  final void add(Collection names, boolean isIndexed) {
+  /**
+   * @param names The names of the fields
+   * @param storeTermVectors Whether the fields store term vectors or not
+   */
+  public void addIndexed(Collection names, boolean storeTermVectors) {
    Iterator i = names.iterator();
+    int j = 0;
+    while (i.hasNext()) {
+      add((String)i.next(), true, storeTermVectors);
+    }
+  }
+
+  /**
+   * Assumes the field is not storing term vectors 
+   * @param names The names of the fields
+   * @param isIndexed Whether the fields are indexed or not
+   * 
+   * @see #add(String, boolean)
+   */
+  public void add(Collection names, boolean isIndexed) {
+    Iterator i = names.iterator();
+    int j = 0;
    while (i.hasNext()) {
      add((String)i.next(), isIndexed);
    }
  }

-  final void add(String name, boolean isIndexed) {
-    FieldInfo fi = fieldInfo(name);
-    if (fi == null)
-      addInternal(name, isIndexed);
-    else if (fi.isIndexed != isIndexed)
-      fi.isIndexed = true;
-   }
+  /**
+   * Calls three parameter add with false for the storeTermVector parameter 
+   * @param name The name of the Field
+   * @param isIndexed true if the field is indexed
+   * @see #add(String, boolean, boolean)
+   */
+  public void add(String name, boolean isIndexed) {
+    add(name, isIndexed, false);
+  }

-  private final void addInternal(String name, boolean isIndexed) {
-    FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.size());
+
+  /** If the field is not yet known, adds it. If it is known, checks to make
+   *  sure that the isIndexed flag is the same as was given previously for this
+   *  field. If not - marks it as being indexed.  Same goes for storeTermVector
+   * 
+   * @param name The name of the field
+   * @param isIndexed true if the field is indexed
+   * @param storeTermVector true if the term vector should be stored
+   */
+  public void add(String name, boolean isIndexed, boolean storeTermVector) {
+    FieldInfo fi = fieldInfo(name);
+    if (fi == null) {
+      addInternal(name, isIndexed, storeTermVector);
+    } else {
+      if (fi.isIndexed != isIndexed) {
+        fi.isIndexed = true;                      // once indexed, always index
+      }
+      if (fi.storeTermVector != storeTermVector) {
+        fi.storeTermVector = true;                // once vector, always vector
+      }
+    }
+  }
+
+  private void addInternal(String name, boolean isIndexed,
+                           boolean storeTermVector) {
+    FieldInfo fi =
+      new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector);
    byNumber.addElement(fi);
    byName.put(name, fi);
  }

-  final int fieldNumber(String fieldName) {
+  public int fieldNumber(String fieldName) {
    FieldInfo fi = fieldInfo(fieldName);
    if (fi != null)
      return fi.number;
@ -123,23 +181,32 @@ final class FieldInfos {
      return -1;
  }

-  final FieldInfo fieldInfo(String fieldName) {
-    return (FieldInfo)byName.get(fieldName);
+  public FieldInfo fieldInfo(String fieldName) {
+    return (FieldInfo) byName.get(fieldName);
  }

-  final String fieldName(int fieldNumber) {
+  public String fieldName(int fieldNumber) {
    return fieldInfo(fieldNumber).name;
  }

-  final FieldInfo fieldInfo(int fieldNumber) {
-    return (FieldInfo)byNumber.elementAt(fieldNumber);
+  public FieldInfo fieldInfo(int fieldNumber) {
+    return (FieldInfo) byNumber.elementAt(fieldNumber);
  }

-  final int size() {
+  public int size() {
    return byNumber.size();
  }

-  final void write(Directory d, String name) throws IOException {
+  public boolean hasVectors() {
+    boolean hasVectors = false;
+    for (int i = 0; i < size(); i++) {
+      if (fieldInfo(i).storeTermVector)
+        hasVectors = true;
+    }
+    return hasVectors;
+  }
+
+  public void write(Directory d, String name) throws IOException {
    OutputStream output = d.createFile(name);
    try {
      write(output);
@ -148,19 +215,29 @@ final class FieldInfos {
    }
  }

-  final void write(OutputStream output) throws IOException {
+  public void write(OutputStream output) throws IOException {
    output.writeVInt(size());
    for (int i = 0; i < size(); i++) {
      FieldInfo fi = fieldInfo(i);
+      byte bits = 0x0;
+      if (fi.isIndexed) bits |= 0x1;
+      if (fi.storeTermVector) bits |= 0x2;
      output.writeString(fi.name);
-      output.writeByte((byte)(fi.isIndexed ? 1 : 0));
+      //Was REMOVE
+      //output.writeByte((byte)(fi.isIndexed ? 1 : 0));
+      output.writeByte(bits);
    }
  }

-  private final void read(InputStream input) throws IOException {
-    int size = input.readVInt();
-    for (int i = 0; i < size; i++)
-      addInternal(input.readString().intern(),
-		  input.readByte() != 0);
+  private void read(InputStream input) throws IOException {
+    int size = input.readVInt();//read in the size
+    for (int i = 0; i < size; i++) {
+      String name = input.readString().intern();
+      byte bits = input.readByte();
+      boolean isIndexed = (bits & 0x1) != 0;
+      boolean storeTermVector = (bits & 0x2) != 0;
+      addInternal(name, isIndexed, storeTermVector);
+    }    
  }
+
 }
--- a/src/java/org/apache/lucene/index/FieldsReader.java
+++ b/src/java/org/apache/lucene/index/FieldsReader.java
@ -63,6 +63,7 @@ import org.apache.lucene.document.Field;

 /**
 * Class responsible for access to stored document fields.
+ *
 * It uses &lt;segment&gt;.fdt and &lt;segment&gt;.fdx; files.
 *
 * @version $Id$
@ -108,7 +109,7 @@ final class FieldsReader {
 			fieldsStream.readString(), // read value
 			true,			  // stored
 			fi.isIndexed,		  // indexed
-			(bits & 1) != 0));	  // tokenized
+			(bits & 1) != 0, fi.storeTermVector)); // vector
    }

    return doc;
--- a/src/java/org/apache/lucene/index/FilterIndexReader.java
+++ b/src/java/org/apache/lucene/index/FilterIndexReader.java
@ -66,7 +66,7 @@ import org.apache.lucene.document.Document;
 * contained index reader. Subclasses of <code>FilterIndexReader</code> may
 * further override some of these methods and may also provide additional
 * methods and fields.
-*/
+ */
 public class FilterIndexReader extends IndexReader {

  /** Base class for filtering {@link TermDocs} implementations. */
@ -89,7 +89,7 @@ public class FilterIndexReader extends IndexReader {

  /** Base class for filtering {@link TermPositions} implementations. */
  public static class FilterTermPositions
-     extends FilterTermDocs implements TermPositions {
+          extends FilterTermDocs implements TermPositions {

    public FilterTermPositions(TermPositions in) { super(in); }

@ -118,10 +118,20 @@ public class FilterIndexReader extends IndexReader {
    this.in = in;
  }

+  public TermFreqVector[] getTermFreqVectors(int docNumber)
+          throws IOException {
+    return in.getTermFreqVectors(docNumber);
+  }
+
+  public TermFreqVector getTermFreqVector(int docNumber, String field)
+          throws IOException {
+    return in.getTermFreqVector(docNumber, field);
+  }
+
  public int numDocs() { return in.numDocs(); }
  public int maxDoc() { return in.maxDoc(); }

-  public Document document(int n) throws IOException {return in.document(n);}
+  public Document document(int n) throws IOException { return in.document(n); }

  public boolean isDeleted(int n) { return in.isDeleted(n); }
  public boolean hasDeletions() { return in.hasDeletions(); }
@ -132,7 +142,7 @@ public class FilterIndexReader extends IndexReader {
    in.norms(f, bytes, offset);
  }
  public void setNorm(int d, String f, byte b) throws IOException {
-    in.setNorm(d,f,b);
+    in.setNorm(d, f, b);
  }

  public TermEnum terms() throws IOException { return in.terms(); }
@ -141,6 +151,7 @@ public class FilterIndexReader extends IndexReader {
  public int docFreq(Term t) throws IOException { return in.docFreq(t); }

  public TermDocs termDocs() throws IOException { return in.termDocs(); }
+
  public TermPositions termPositions() throws IOException {
    return in.termPositions();
  }
@ -151,7 +162,18 @@ public class FilterIndexReader extends IndexReader {
  public Collection getFieldNames() throws IOException {
    return in.getFieldNames();
  }
+
  public Collection getFieldNames(boolean indexed) throws IOException {
    return in.getFieldNames(indexed);
  }
+
+  /**
+   * 
+   * @param storedTermVector if true, returns only Indexed fields that have term vector info, 
+   *                        else only indexed fields without term vector info 
+   * @return Collection of Strings indicating the names of the fields
+   */
+  public Collection getIndexedFieldNames(boolean storedTermVector) {
+    return in.getIndexedFieldNames(storedTermVector);
+  }
 }
--- a/src/java/org/apache/lucene/index/IndexReader.java
+++ b/src/java/org/apache/lucene/index/IndexReader.java
@ -66,20 +66,20 @@ import org.apache.lucene.document.Field;          // for javadoc
 import org.apache.lucene.search.Similarity;

 /** IndexReader is an abstract class, providing an interface for accessing an
-  index.  Search of an index is done entirely through this abstract interface,
-  so that any subclass which implements it is searchable.
+ index.  Search of an index is done entirely through this abstract interface,
+ so that any subclass which implements it is searchable.

-  <p> Concrete subclasses of IndexReader are usually constructed with a call to
-  the static method {@link #open}.
+ <p> Concrete subclasses of IndexReader are usually constructed with a call to
+ the static method {@link #open}.

-  <p> For efficiency, in this API documents are often referred to via
-  <i>document numbers</i>, non-negative integers which each name a unique
-  document in the index.  These document numbers are ephemeral--they may change
-  as documents are added to and deleted from an index.  Clients should thus not
-  rely on a given document having the same number between sessions.
+ <p> For efficiency, in this API documents are often referred to via
+ <i>document numbers</i>, non-negative integers which each name a unique
+ document in the index.  These document numbers are ephemeral--they may change
+ as documents are added to and deleted from an index.  Clients should thus not
+ rely on a given document having the same number between sessions.

-  @author Doug Cutting
-  @version $Id$
+ @author Doug Cutting
+ @version $Id$
 */
 public abstract class IndexReader {
  protected IndexReader(Directory directory) {
@ -92,21 +92,21 @@ public abstract class IndexReader {
  private Lock writeLock;
  SegmentInfos segmentInfos = null;
  private boolean stale = false;
-  
+
  /** Returns an IndexReader reading the index in an FSDirectory in the named
-  path. */
+   path. */
  public static IndexReader open(String path) throws IOException {
    return open(FSDirectory.getDirectory(path, false));
  }

  /** Returns an IndexReader reading the index in an FSDirectory in the named
-  path. */
+   path. */
  public static IndexReader open(File path) throws IOException {
    return open(FSDirectory.getDirectory(path, false));
  }

  /** Returns an IndexReader reading the index in the given Directory. */
-  public static IndexReader open(final Directory directory) throws IOException{
+  public static IndexReader open(final Directory directory) throws IOException {
    synchronized (directory) {			  // in- & inter-process sync
      return (IndexReader)new Lock.With(
          directory.makeLock(IndexWriter.COMMIT_LOCK_NAME),
@ -117,10 +117,10 @@ public abstract class IndexReader {
            if (infos.size() == 1) {		  // index is optimized
              return new SegmentReader(infos, infos.info(0), true);
            } else {
-                IndexReader[] readers = new IndexReader[infos.size()];
-                for (int i = 0; i < infos.size(); i++)
-                  readers[i] = new SegmentReader(infos, infos.info(i), i==infos.size()-1);
-                return new MultiReader(directory, readers);
+              IndexReader[] readers = new IndexReader[infos.size()];
+              for (int i = 0; i < infos.size(); i++)
+                readers[i] = new SegmentReader(infos, infos.info(i), i==infos.size()-1);
+              return new MultiReader(directory, readers);
            }
          }
        }.run();
@ -174,7 +174,7 @@ public abstract class IndexReader {
  public static long lastModified(Directory directory) throws IOException {
    return directory.fileModified("segments");
  }
-  
+
  /**
   * Reads version number from segments files. The version number counts the
   * number of changes of the index.
@ -186,7 +186,7 @@ public abstract class IndexReader {
  public static long getCurrentVersion(String directory) throws IOException {
    return getCurrentVersion(new File(directory));
  }
-  
+
  /**
   * Reads version number from segments files. The version number counts the
   * number of changes of the index.
@ -201,7 +201,7 @@ public abstract class IndexReader {
    dir.close();
    return version;
  }
-  
+
  /**
   * Reads version number from segments files. The version number counts the
   * number of changes of the index.
@ -214,6 +214,27 @@ public abstract class IndexReader {
    return SegmentInfos.readCurrentVersion(directory);
  }

+  /** Return an array of term frequency vectors for the specified document.
+   *  The array contains a vector for each vectorized field in the document.
+   *  Each vector vector contains term numbers and frequencies for all terms
+   *  in a given vectorized field.
+   *  If no such fields existed, the method returns null.
+   *
+   * @see Field#isTermVectorStored()
+   */
+  abstract public TermFreqVector[] getTermFreqVectors(int docNumber)
+          throws IOException;
+
+  /** Return a term frequency vector for the specified document and field. The
+   *  vector returned contains term numbers and frequencies for all terms in
+   *  the specified field of this document, if the field had storeTermVector
+   *  flag set.  If the flag was not set, the method returns null.
+   *
+   * @see Field#isTermVectorStored()
+   */
+  abstract public TermFreqVector getTermFreqVector(int docNumber, String field)
+          throws IOException;
+ 
  /**
   * Returns <code>true</code> if an index exists at the specified directory.
   * If the directory does not exist or if there is no index in it.
@ -250,13 +271,13 @@ public abstract class IndexReader {
  public abstract int numDocs();

  /** Returns one greater than the largest possible document number.
-    This may be used to, e.g., determine how big to allocate an array which
-    will have an element for every document number in an index.
+   This may be used to, e.g., determine how big to allocate an array which
+   will have an element for every document number in an index.
   */
  public abstract int maxDoc();

  /** Returns the stored fields of the <code>n</code><sup>th</sup>
-      <code>Document</code> in this index. */
+   <code>Document</code> in this index. */
  public abstract Document document(int n) throws IOException;

  /** Returns true if document <i>n</i> has been deleted */
@ -264,7 +285,7 @@ public abstract class IndexReader {

  /** Returns true if any documents have been deleted */
  public abstract boolean hasDeletions();
-
+  
  /** Returns the byte-encoded normalization factor for the named field of
   * every document.  This is used by the search code to score documents.
   *
@ -283,14 +304,14 @@ public abstract class IndexReader {
  /** Expert: Resets the normalization factor for the named field of the named
   * document.  The norm represents the product of the field's {@link
   * Field#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
-   * int) length normalization}.  Thus, to preserve the length normalization
+          * int) length normalization}.  Thus, to preserve the length normalization
   * values when resetting this, one should base the new value upon the old.
   *
   * @see #norms(String)
   * @see Similarity#decodeNorm(byte)
   */
  public abstract void setNorm(int doc, String field, byte value)
-    throws IOException;
+          throws IOException;

  /** Expert: Resets the normalization factor for the named field of the named
   * document.
@ -299,20 +320,20 @@ public abstract class IndexReader {
   * @see Similarity#decodeNorm(byte)
   */
  public void setNorm(int doc, String field, float value)
-    throws IOException {
+          throws IOException {
    setNorm(doc, field, Similarity.encodeNorm(value));
  }


  /** Returns an enumeration of all the terms in the index.
-    The enumeration is ordered by Term.compareTo().  Each term
-    is greater than all that precede it in the enumeration.
+   The enumeration is ordered by Term.compareTo().  Each term
+   is greater than all that precede it in the enumeration.
   */
  public abstract TermEnum terms() throws IOException;

  /** Returns an enumeration of all terms after a given term.
-    The enumeration is ordered by Term.compareTo().  Each term
-    is greater than all that precede it in the enumeration.
+   The enumeration is ordered by Term.compareTo().  Each term
+   is greater than all that precede it in the enumeration.
   */
  public abstract TermEnum terms(Term t) throws IOException;

@ -320,15 +341,15 @@ public abstract class IndexReader {
  public abstract int docFreq(Term t) throws IOException;

  /** Returns an enumeration of all the documents which contain
-    <code>term</code>. For each document, the document number, the frequency of
-    the term in that document is also provided, for use in search scoring.
-    Thus, this method implements the mapping:
-    <p><ul>
-    Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq&gt;<sup>*</sup>
-    </ul>
-    <p>The enumeration is ordered by document number.  Each document number
-    is greater than all that precede it in the enumeration.
-  */
+   <code>term</code>. For each document, the document number, the frequency of
+   the term in that document is also provided, for use in search scoring.
+   Thus, this method implements the mapping:
+   <p><ul>
+   Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq&gt;<sup>*</sup>
+   </ul>
+   <p>The enumeration is ordered by document number.  Each document number
+   is greater than all that precede it in the enumeration.
+   */
  public TermDocs termDocs(Term term) throws IOException {
    TermDocs termDocs = termDocs();
    termDocs.seek(term);
@ -339,21 +360,21 @@ public abstract class IndexReader {
  public abstract TermDocs termDocs() throws IOException;

  /** Returns an enumeration of all the documents which contain
-    <code>term</code>.  For each document, in addition to the document number
-    and frequency of the term in that document, a list of all of the ordinal
-    positions of the term in the document is available.  Thus, this method
-    implements the mapping:
+   <code>term</code>.  For each document, in addition to the document number
+   and frequency of the term in that document, a list of all of the ordinal
+   positions of the term in the document is available.  Thus, this method
+   implements the mapping:

-    <p><ul>
-    Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq,
-          &lt;pos<sub>1</sub>, pos<sub>2</sub>, ...
-          pos<sub>freq-1</sub>&gt;
-        &gt;<sup>*</sup>
-    </ul>
-    <p> This positional information faciliates phrase and proximity searching.
-    <p>The enumeration is ordered by document number.  Each document number is
-    greater than all that precede it in the enumeration.
-  */
+   <p><ul>
+   Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq,
+   &lt;pos<sub>1</sub>, pos<sub>2</sub>, ...
+   pos<sub>freq-1</sub>&gt;
+   &gt;<sup>*</sup>
+   </ul>
+   <p> This positional information faciliates phrase and proximity searching.
+   <p>The enumeration is ordered by document number.  Each document number is
+   greater than all that precede it in the enumeration.
+   */
  public TermPositions termPositions(Term term) throws IOException {
    TermPositions termPositions = termPositions();
    termPositions.seek(term);
@ -364,16 +385,16 @@ public abstract class IndexReader {
  public abstract TermPositions termPositions() throws IOException;

  /** Deletes the document numbered <code>docNum</code>.  Once a document is
-    deleted it will not appear in TermDocs or TermPostitions enumerations.
-    Attempts to read its field with the {@link #document}
-    method will result in an error.  The presence of this document may still be
-    reflected in the {@link #docFreq} statistic, though
-    this will be corrected eventually as the index is further modified.
-  */
+   deleted it will not appear in TermDocs or TermPostitions enumerations.
+   Attempts to read its field with the {@link #document}
+   method will result in an error.  The presence of this document may still be
+   reflected in the {@link #docFreq} statistic, though
+   this will be corrected eventually as the index is further modified.
+   */
  public final synchronized void delete(int docNum) throws IOException {
-    if(stale)
+    if (stale)
      throw new IOException("IndexReader out of date and no longer valid for deletion");
-      
+
    if (writeLock == null) {
      Lock writeLock = directory.makeLock(IndexWriter.WRITE_LOCK_NAME);
      if (!writeLock.obtain(IndexWriter.WRITE_LOCK_TIMEOUT)) // obtain write lock
@ -382,11 +403,11 @@ public abstract class IndexReader {

      // we have to check whether index has changed since this reader was opened.
      // if so, this reader is no longer valid for deletion
-      if(segmentInfos != null  && SegmentInfos.readCurrentVersion(directory) > segmentInfos.getVersion()){
-          stale = true;
-          this.writeLock.release();
-          this.writeLock = null;
-          throw new IOException("IndexReader out of date and no longer valid for deletion");
+      if (segmentInfos != null && SegmentInfos.readCurrentVersion(directory) > segmentInfos.getVersion()) {
+        stale = true;
+        this.writeLock.release();
+        this.writeLock = null;
+        throw new IOException("IndexReader out of date and no longer valid for deletion");
      }
    }
    doDelete(docNum);
@ -398,14 +419,14 @@ public abstract class IndexReader {
  protected abstract void doDelete(int docNum) throws IOException;

  /** Deletes all documents containing <code>term</code>.
-    This is useful if one uses a document field to hold a unique ID string for
-    the document.  Then to delete such a document, one merely constructs a
-    term with the appropriate field and the unique ID string as its text and
-    passes it to this method.  Returns the number of documents deleted.
-  */
+   This is useful if one uses a document field to hold a unique ID string for
+   the document.  Then to delete such a document, one merely constructs a
+   term with the appropriate field and the unique ID string as its text and
+   passes it to this method.  Returns the number of documents deleted.
+   */
  public final int delete(Term term) throws IOException {
    TermDocs docs = termDocs(term);
-    if ( docs == null ) return 0;
+    if (docs == null) return 0;
    int n = 0;
    try {
      while (docs.next()) {
@ -444,25 +465,33 @@ public abstract class IndexReader {
      writeLock = null;
    }
  }
-
+  
  /**
-   * Returns a list of all unique field names that exist in the index pointed to by
-   * this IndexReader.
+   * Returns a list of all unique field names that exist in the index pointed
+   * to by this IndexReader.
   * @return Collection of Strings indicating the names of the fields
   * @throws IOException if there is a problem with accessing the index
   */
  public abstract Collection getFieldNames() throws IOException;

  /**
-   * Returns a list of all unique field names that exist in the index pointed to by
-   * this IndexReader.  The boolean argument specifies whether the fields returned
-   * are indexed or not.
+   * Returns a list of all unique field names that exist in the index pointed
+   * to by this IndexReader.  The boolean argument specifies whether the fields
+   * returned are indexed or not.
   * @param indexed <code>true</code> if only indexed fields should be returned;
   *                <code>false</code> if only unindexed fields should be returned.
   * @return Collection of Strings indicating the names of the fields
   * @throws IOException if there is a problem with accessing the index
   */
-   public abstract Collection getFieldNames(boolean indexed) throws IOException;
+  public abstract Collection getFieldNames(boolean indexed) throws IOException;
+
+  /**
+   * 
+   * @param storedTermVector if true, returns only Indexed fields that have term vector info, 
+   *                        else only indexed fields without term vector info 
+   * @return Collection of Strings indicating the names of the fields
+   */ 
+  public abstract Collection getIndexedFieldNames(boolean storedTermVector);

  /**
   * Returns <code>true</code> iff the index in the named directory is
@ -470,12 +499,12 @@ public abstract class IndexReader {
   * @param directory the directory to check for a lock
   * @throws IOException if there is a problem with accessing the index
   */
-    public static boolean isLocked(Directory directory) throws IOException {
-      return
-        directory.makeLock(IndexWriter.WRITE_LOCK_NAME).isLocked() ||
-        directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).isLocked();
+  public static boolean isLocked(Directory directory) throws IOException {
+    return
+            directory.makeLock(IndexWriter.WRITE_LOCK_NAME).isLocked() ||
+            directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).isLocked();

-    }
+  }

  /**
   * Returns <code>true</code> iff the index in the named directory is
@ -483,19 +512,19 @@ public abstract class IndexReader {
   * @param directory the directory to check for a lock
   * @throws IOException if there is a problem with accessing the index
   */
-    public static boolean isLocked(String directory) throws IOException {
-      return isLocked(FSDirectory.getDirectory(directory, false));
-    }
+  public static boolean isLocked(String directory) throws IOException {
+    return isLocked(FSDirectory.getDirectory(directory, false));
+  }

-   /**
-    * Forcibly unlocks the index in the named directory.
-    * <P>
-    * Caution: this should only be used by failure recovery code,
-    * when it is known that no other process nor thread is in fact
-    * currently accessing this index.
-    */
-    public static void unlock(Directory directory) throws IOException {
-      directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
-      directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).release();
-    }
+  /**
+   * Forcibly unlocks the index in the named directory.
+   * <P>
+   * Caution: this should only be used by failure recovery code,
+   * when it is known that no other process nor thread is in fact
+   * currently accessing this index.
+   */
+  public static void unlock(Directory directory) throws IOException {
+    directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
+    directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).release();
+  }
 }
--- a/src/java/org/apache/lucene/index/MultiReader.java
+++ b/src/java/org/apache/lucene/index/MultiReader.java
@ -75,7 +75,7 @@ public class MultiReader extends IndexReader {
  private int maxDoc = 0;
  private int numDocs = -1;
  private boolean hasDeletions = false;
-  
+
  /** Construct reading the named set of readers. */
  public MultiReader(IndexReader[] readers) throws IOException {
    this(readers.length == 0 ? null : readers[0].directory(), readers);
@ -97,6 +97,25 @@ public class MultiReader extends IndexReader {
    starts[readers.length] = maxDoc;
  }

+
+  /** Return an array of term frequency vectors for the specified document.
+   *  The array contains a vector for each vectorized field in the document.
+   *  Each vector vector contains term numbers and frequencies for all terms
+   *  in a given vectorized field.
+   *  If no such fields existed, the method returns null.
+   */
+  public TermFreqVector[] getTermFreqVectors(int n)
+          throws IOException {
+    int i = readerIndex(n);			  // find segment num
+    return readers[i].getTermFreqVectors(n - starts[i]); // dispatch to segment
+  }
+
+  public TermFreqVector getTermFreqVector(int n, String field)
+          throws IOException {
+    int i = readerIndex(n);			  // find segment num
+    return readers[i].getTermFreqVector(n - starts[i], field);
+  }
+
  public synchronized int numDocs() {
    if (numDocs == -1) {			  // check cache
      int n = 0;				  // cache miss--recompute
@ -245,6 +264,18 @@ public class MultiReader extends IndexReader {
    }
    return fieldSet;
  }
+
+  public Collection getIndexedFieldNames(boolean storedTermVector) {
+    // maintain a unique set of field names
+    Set fieldSet = new HashSet();
+    for (int i = 0; i < readers.length; i++) {
+        IndexReader reader = readers[i];
+        Collection names = reader.getIndexedFieldNames(storedTermVector);
+        fieldSet.addAll(names);
+    }
+    return fieldSet;
+  }
+
 }

 class MultiTermEnum extends TermEnum {
--- a/src/java/org/apache/lucene/index/SegmentMerger.java
+++ b/src/java/org/apache/lucene/index/SegmentMerger.java
@ -61,10 +61,19 @@ import java.io.IOException;

 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.OutputStream;
-import org.apache.lucene.store.InputStream;
 import org.apache.lucene.store.RAMOutputStream;
-import org.apache.lucene.util.BitVector;

+/**
+ * The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add},
+ * into a single Segment.  After adding the appropriate readers, call the merge method to combine the 
+ * segments.
+ *<P> 
+ * If the compoundFile flag is set, then the segments will be merged into a compound file.
+ *   
+ * 
+ * @see #merge
+ * @see #add
+ */
 final class SegmentMerger {
  private boolean useCompoundFile;
  private Directory directory;
@ -77,51 +86,78 @@ final class SegmentMerger {
  private static final String COMPOUND_EXTENSIONS[] = new String[] {
    "fnm", "frq", "prx", "fdx", "fdt", "tii", "tis"
  };
-  
+  private static final String VECTOR_EXTENSIONS[] = new String[] {
+    "tvx", "tvd", "tvf"
+  };
+
+  /**
+   * 
+   * @param dir The Directory to merge the other segments into
+   * @param name The name of the new segment
+   * @param compoundFile true if the new segment should use a compoundFile
+   */
  SegmentMerger(Directory dir, String name, boolean compoundFile) {
    directory = dir;
    segment = name;
    useCompoundFile = compoundFile;
  }

+  /**
+   * Add an IndexReader to the collection of readers that are to be merged
+   * @param reader
+   */
  final void add(IndexReader reader) {
    readers.addElement(reader);
  }

+  /**
+   * 
+   * @param i The index of the reader to return
+   * @return The ith reader to be merged
+   */
  final IndexReader segmentReader(int i) {
-    return (IndexReader)readers.elementAt(i);
+    return (IndexReader) readers.elementAt(i);
  }

+  /**
+   * Merges the readers specified by the {@link #add} method into the directory passed to the constructor
+   * @return The number of documents that were merged
+   * @throws IOException
+   */
  final int merge() throws IOException {
    int value;
    try {
      value = mergeFields();
      mergeTerms();
      mergeNorms();
+
+      if (fieldInfos.hasVectors())
+        mergeVectors();
+
    } finally {
      for (int i = 0; i < readers.size(); i++) {  // close readers
-	IndexReader reader = (IndexReader)readers.elementAt(i);
-	reader.close();
+        IndexReader reader = (IndexReader) readers.elementAt(i);
+        reader.close();
      }
    }
-    
+
    if (useCompoundFile)
-        createCompoundFile();
+      createCompoundFile();

    return value;
  }

-  private final void createCompoundFile() 
-  throws IOException {
-    CompoundFileWriter cfsWriter = 
-        new CompoundFileWriter(directory, segment + ".cfs");
-    
-    ArrayList files = 
-        new ArrayList(COMPOUND_EXTENSIONS.length + fieldInfos.size());    
+  private final void createCompoundFile()
+          throws IOException {
+    CompoundFileWriter cfsWriter =
+            new CompoundFileWriter(directory, segment + ".cfs");
+
+    ArrayList files =
+      new ArrayList(COMPOUND_EXTENSIONS.length + fieldInfos.size());    
    
    // Basic files
-    for (int i=0; i<COMPOUND_EXTENSIONS.length; i++) {
-        files.add(segment + "." + COMPOUND_EXTENSIONS[i]);
+    for (int i = 0; i < COMPOUND_EXTENSIONS.length; i++) {
+      files.add(segment + "." + COMPOUND_EXTENSIONS[i]);
    }

    // Field norm files
@ -132,9 +168,16 @@ final class SegmentMerger {
      }
    }

+    // Vector files
+    if (fieldInfos.hasVectors()) {
+      for (int i = 0; i < VECTOR_EXTENSIONS.length; i++) {
+        files.add(segment + "." + VECTOR_EXTENSIONS[i]);
+      }
+    }
+
    // Now merge all added files
    Iterator it = files.iterator();
-    while(it.hasNext()) {
+    while (it.hasNext()) {
      cfsWriter.addFile((String) it.next());
    }
    
@ -143,33 +186,38 @@ final class SegmentMerger {
        
    // Now delete the source files
    it = files.iterator();
-    while(it.hasNext()) {
+    while (it.hasNext()) {
      directory.deleteFile((String) it.next());
    }
  }
-  
-  
+
+  /**
+   * 
+   * @return The number of documents in all of the readers
+   * @throws IOException
+   */
  private final int mergeFields() throws IOException {
    fieldInfos = new FieldInfos();		  // merge field names
    int docCount = 0;
    for (int i = 0; i < readers.size(); i++) {
-      IndexReader reader = (IndexReader)readers.elementAt(i);
-      fieldInfos.add(reader.getFieldNames(true), true);
+      IndexReader reader = (IndexReader) readers.elementAt(i);
+      fieldInfos.addIndexed(reader.getIndexedFieldNames(true), true);
+      fieldInfos.addIndexed(reader.getIndexedFieldNames(false), false);
      fieldInfos.add(reader.getFieldNames(false), false);
    }
    fieldInfos.write(directory, segment + ".fnm");
-    
-    FieldsWriter fieldsWriter =			  // merge field values
-      new FieldsWriter(directory, segment, fieldInfos);
+
+    FieldsWriter fieldsWriter = // merge field values
+            new FieldsWriter(directory, segment, fieldInfos);
    try {
      for (int i = 0; i < readers.size(); i++) {
-	IndexReader reader = (IndexReader)readers.elementAt(i);
-	int maxDoc = reader.maxDoc();
-	for (int j = 0; j < maxDoc; j++)
-	  if (!reader.isDeleted(j)){               // skip deleted docs
+        IndexReader reader = (IndexReader) readers.elementAt(i);
+        int maxDoc = reader.maxDoc();
+        for (int j = 0; j < maxDoc; j++)
+          if (!reader.isDeleted(j)) {               // skip deleted docs
            fieldsWriter.addDocument(reader.document(j));
            docCount++;
-	  }
+          }
      }
    } finally {
      fieldsWriter.close();
@ -177,6 +225,50 @@ final class SegmentMerger {
    return docCount;
  }

+  /**
+   * Merge the TermVectors from each of the segments into the new one.
+   * @throws IOException
+   */
+  private final void mergeVectors() throws IOException {
+    TermVectorsWriter termVectorsWriter = 
+      new TermVectorsWriter(directory, segment, fieldInfos);
+
+    try {
+      for (int r = 0; r < readers.size(); r++) {
+        IndexReader reader = (IndexReader) readers.elementAt(r);
+        int maxDoc = reader.maxDoc();
+        for (int docNum = 0; docNum < maxDoc; docNum++) {
+          // skip deleted docs
+          if (reader.isDeleted(docNum)) {
+            continue;
+          }
+          termVectorsWriter.openDocument();
+
+          // get all term vectors
+          TermFreqVector[] sourceTermVector =
+            reader.getTermFreqVectors(docNum);
+
+          if (sourceTermVector != null) {
+            for (int f = 0; f < sourceTermVector.length; f++) {
+              // translate field numbers
+              TermFreqVector termVector = sourceTermVector[f];
+              termVectorsWriter.openField(termVector.getField());
+              String [] terms = termVector.getTerms();
+              int [] freqs = termVector.getTermFrequencies();
+              
+              for (int t = 0; t < terms.length; t++) {
+                termVectorsWriter.addTerm(terms[t], freqs[t]);
+              }
+            }
+            termVectorsWriter.closeDocument();
+          }
+        }
+      }
+    } finally {
+      termVectorsWriter.close();
+    }
+  }
+
  private OutputStream freqOutput = null;
  private OutputStream proxOutput = null;
  private TermInfosWriter termInfosWriter = null;
@ -187,15 +279,15 @@ final class SegmentMerger {
      freqOutput = directory.createFile(segment + ".frq");
      proxOutput = directory.createFile(segment + ".prx");
      termInfosWriter =
-        new TermInfosWriter(directory, segment, fieldInfos);
-      
+              new TermInfosWriter(directory, segment, fieldInfos);
+
      mergeTermInfos();
-      
+
    } finally {
-      if (freqOutput != null) 		freqOutput.close();
-      if (proxOutput != null) 		proxOutput.close();
-      if (termInfosWriter != null) 	termInfosWriter.close();
-      if (queue != null)		queue.close();
+      if (freqOutput != null) freqOutput.close();
+      if (proxOutput != null) proxOutput.close();
+      if (termInfosWriter != null) termInfosWriter.close();
+      if (queue != null) queue.close();
    }
  }

@ -203,7 +295,7 @@ final class SegmentMerger {
    queue = new SegmentMergeQueue(readers.size());
    int base = 0;
    for (int i = 0; i < readers.size(); i++) {
-      IndexReader reader = (IndexReader)readers.elementAt(i);
+      IndexReader reader = (IndexReader) readers.elementAt(i);
      TermEnum termEnum = reader.terms();
      SegmentMergeInfo smi = new SegmentMergeInfo(base, termEnum, reader);
      base += reader.numDocs();
@ -214,20 +306,20 @@ final class SegmentMerger {
    }

    SegmentMergeInfo[] match = new SegmentMergeInfo[readers.size()];
-    
+
    while (queue.size() > 0) {
      int matchSize = 0;			  // pop matching terms
-      match[matchSize++] = (SegmentMergeInfo)queue.pop();
+      match[matchSize++] = (SegmentMergeInfo) queue.pop();
      Term term = match[0].term;
-      SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
-      
+      SegmentMergeInfo top = (SegmentMergeInfo) queue.top();
+
      while (top != null && term.compareTo(top.term) == 0) {
-        match[matchSize++] = (SegmentMergeInfo)queue.pop();
-        top = (SegmentMergeInfo)queue.top();
+        match[matchSize++] = (SegmentMergeInfo) queue.pop();
+        top = (SegmentMergeInfo) queue.top();
      }

      mergeTermInfo(match, matchSize);		  // add new TermInfo
-      
+
      while (matchSize > 0) {
        SegmentMergeInfo smi = match[--matchSize];
        if (smi.next())
@ -240,8 +332,15 @@ final class SegmentMerger {

  private final TermInfo termInfo = new TermInfo(); // minimize consing

+  /** Merge one term found in one or more segments. The array <code>smis</code>
+   *  contains segments that are positioned at the same term. <code>N</code>
+   *  is the number of cells in the array actually occupied.
+   *
+   * @param smis array of segments
+   * @param n number of cells in the array actually occupied
+   */
  private final void mergeTermInfo(SegmentMergeInfo[] smis, int n)
-       throws IOException {
+          throws IOException {
    long freqPointer = freqOutput.getFilePointer();
    long proxPointer = proxOutput.getFilePointer();

@ -251,13 +350,21 @@ final class SegmentMerger {

    if (df > 0) {
      // add an entry to the dictionary with pointers to prox and freq files
-      termInfo.set(df, freqPointer, proxPointer, (int)(skipPointer-freqPointer));
+      termInfo.set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer));
      termInfosWriter.add(smis[0].term, termInfo);
    }
  }

+  /** Process postings from multiple segments all positioned on the
+   *  same term. Writes out merged entries into freqOutput and
+   *  the proxOutput streams.
+   *
+   * @param smis array of segments
+   * @param n number of cells in the array actually occupied
+   * @return number of documents across all segments where this term was found
+   */
  private final int appendPostings(SegmentMergeInfo[] smis, int n)
-       throws IOException {
+          throws IOException {
    final int skipInterval = termInfosWriter.skipInterval;
    int lastDoc = 0;
    int df = 0;					  // number of docs w/ term
@ -285,7 +392,7 @@ final class SegmentMerger {

        int docCode = (doc - lastDoc) << 1;	  // use low bit to flag freq=1
        lastDoc = doc;
-        
+
        int freq = postings.freq();
        if (freq == 1) {
          freqOutput.writeVInt(docCode | 1);	  // write doc & freq=1
@ -293,10 +400,10 @@ final class SegmentMerger {
          freqOutput.writeVInt(docCode);	  // write doc
          freqOutput.writeVInt(freq);		  // write frequency in doc
        }
-	  
+
        int lastPosition = 0;			  // write position deltas
-	for (int j = 0; j < freq; j++) {
-	  int position = postings.nextPosition();
+        for (int j = 0; j < freq; j++) {
+          int position = postings.nextPosition();
          proxOutput.writeVInt(position - lastPosition);
          lastPosition = position;
        }
@ -321,9 +428,9 @@ final class SegmentMerger {
    long freqPointer = freqOutput.getFilePointer();
    long proxPointer = proxOutput.getFilePointer();

-    skipBuffer.writeVInt(doc - lastSkipDoc); 
-    skipBuffer.writeVInt((int)(freqPointer - lastSkipFreqPointer));
-    skipBuffer.writeVInt((int)(proxPointer - lastSkipProxPointer));
+    skipBuffer.writeVInt(doc - lastSkipDoc);
+    skipBuffer.writeVInt((int) (freqPointer - lastSkipFreqPointer));
+    skipBuffer.writeVInt((int) (proxPointer - lastSkipProxPointer));

    lastSkipDoc = doc;
    lastSkipFreqPointer = freqPointer;
@ -340,22 +447,22 @@ final class SegmentMerger {
    for (int i = 0; i < fieldInfos.size(); i++) {
      FieldInfo fi = fieldInfos.fieldInfo(i);
      if (fi.isIndexed) {
-	OutputStream output = directory.createFile(segment + ".f" + i);
-	try {
-	  for (int j = 0; j < readers.size(); j++) {
-	    IndexReader reader = (IndexReader)readers.elementAt(j);
-	    byte[] input = reader.norms(fi.name);
+        OutputStream output = directory.createFile(segment + ".f" + i);
+        try {
+          for (int j = 0; j < readers.size(); j++) {
+            IndexReader reader = (IndexReader) readers.elementAt(j);
+            byte[] input = reader.norms(fi.name);
            int maxDoc = reader.maxDoc();
            for (int k = 0; k < maxDoc; k++) {
-              byte norm = input != null ? input[k] : (byte)0;
+              byte norm = input != null ? input[k] : (byte) 0;
              if (!reader.isDeleted(k)) {
                output.writeByte(norm);
              }
-	    }
-	  }
-	} finally {
-	  output.close();
-	}
+            }
+          }
+        } finally {
+          output.close();
+        }
      }
    }
  }
--- a/src/java/org/apache/lucene/index/SegmentReader.java
+++ b/src/java/org/apache/lucene/index/SegmentReader.java
@ -82,6 +82,7 @@ final class SegmentReader extends IndexReader {
  private FieldsReader fieldsReader;

  TermInfosReader tis;
+  TermVectorsReader termVectorsReader;

  BitVector deletedDocs = null;
  private boolean deletedDocsDirty = false;
@ -109,21 +110,22 @@ final class SegmentReader extends IndexReader {
        out.close();
      }
      String fileName = segment + ".f" + fieldInfos.fieldNumber(name);
-      directory().renameFile(segment + ".tmp",  fileName);
+      directory().renameFile(segment + ".tmp", fileName);
      this.dirty = false;
    }
  }
+
  private Hashtable norms = new Hashtable();

  SegmentReader(SegmentInfos sis, SegmentInfo si, boolean closeDir)
-    throws IOException {
+          throws IOException {
    this(si);
    closeDirectory = closeDir;
    segmentInfos = sis;
  }

  SegmentReader(SegmentInfo si)
-    throws IOException {
+          throws IOException {
    super(si.dir);
    segment = si.name;

@ -149,13 +151,17 @@ final class SegmentReader extends IndexReader {
    freqStream = cfsDir.openFile(segment + ".frq");
    proxStream = cfsDir.openFile(segment + ".prx");
    openNorms(cfsDir);
+
+    if (fieldInfos.hasVectors()) { // open term vector files only as needed
+      termVectorsReader = new TermVectorsReader(cfsDir, segment, fieldInfos);
+    }
  }

  protected final synchronized void doClose() throws IOException {
    if (deletedDocsDirty || normsDirty) {
      synchronized (directory()) {		  // in- & inter-process sync
        new Lock.With(directory().makeLock(IndexWriter.COMMIT_LOCK_NAME),
-          IndexWriter.COMMIT_LOCK_TIMEOUT) {
+                IndexWriter.COMMIT_LOCK_TIMEOUT) {
          public Object doBody() throws IOException {

            if (deletedDocsDirty) {               // re-write deleted 
@ -164,18 +170,18 @@ final class SegmentReader extends IndexReader {
            }

            if (normsDirty) {               // re-write norms 
-              Enumeration keys  = norms.keys();
-              Enumeration values  = norms.elements();
+              Enumeration keys = norms.keys();
+              Enumeration values = norms.elements();
              while (values.hasMoreElements()) {
-                String field = (String)keys.nextElement();
-                Norm norm = (Norm)values.nextElement();
+                String field = (String) keys.nextElement();
+                Norm norm = (Norm) values.nextElement();
                if (norm.dirty) {
                  norm.reWrite(field);
                }
              }
            }

-            if(segmentInfos != null)
+            if (segmentInfos != null)
              segmentInfos.write(directory());
            else
              directory().touchFile("segments");
@ -196,6 +202,7 @@ final class SegmentReader extends IndexReader {
      proxStream.close();

    closeNorms();
+    if (termVectorsReader != null) termVectorsReader.close();

    if (cfsReader != null)
      cfsReader.close();
@ -212,6 +219,7 @@ final class SegmentReader extends IndexReader {
    return deletedDocs != null;
  }

+
  static final boolean usesCompoundFile(SegmentInfo si) throws IOException {
    return si.dir.fileExists(si.name + ".cfs");
  }
@ -226,7 +234,7 @@ final class SegmentReader extends IndexReader {
  public synchronized void undeleteAll() throws IOException {
    synchronized (directory()) {		  // in- & inter-process sync
      new Lock.With(directory().makeLock(IndexWriter.COMMIT_LOCK_NAME),
-                    IndexWriter.COMMIT_LOCK_TIMEOUT) {
+              IndexWriter.COMMIT_LOCK_TIMEOUT) {
        public Object doBody() throws IOException {
          if (directory().fileExists(segment + ".del")) {
            directory().deleteFile(segment + ".del");
@ -242,11 +250,11 @@ final class SegmentReader extends IndexReader {

  final Vector files() throws IOException {
    Vector files = new Vector(16);
-    final String ext[] = new String[] {
-      "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del"
-    };
+    final String ext[] = new String[]{
+      "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del",
+      "tvx", "tvd", "tvf", "tvp" };

-    for (int i=0; i<ext.length; i++) {
+    for (int i = 0; i < ext.length; i++) {
      String name = segment + "." + ext[i];
      if (directory().fileExists(name))
        files.addElement(name);
@ -271,7 +279,7 @@ final class SegmentReader extends IndexReader {
  public final synchronized Document document(int n) throws IOException {
    if (isDeleted(n))
      throw new IllegalArgumentException
-        ("attempt to access a deleted document");
+              ("attempt to access a deleted document");
    return fieldsReader.doc(n);
  }

@ -329,12 +337,31 @@ final class SegmentReader extends IndexReader {
      FieldInfo fi = fieldInfos.fieldInfo(i);
      if (fi.isIndexed == indexed)
        fieldSet.add(fi.name);
-      }
-      return fieldSet;
    }
+    return fieldSet;
+  }
+
+  /**
+   * 
+   * @param storedTermVector if true, returns only Indexed fields that have term vector info, 
+   *                        else only indexed fields without term vector info 
+   * @return Collection of Strings indicating the names of the fields
+   */
+  public Collection getIndexedFieldNames(boolean storedTermVector) {
+    // maintain a unique set of field names
+    Set fieldSet = new HashSet();
+    for (int i = 0; i < fieldInfos.size(); i++) {
+      FieldInfo fi = fieldInfos.fieldInfo(i);
+      if (fi.isIndexed == true && fi.storeTermVector == storedTermVector){
+        fieldSet.add(fi.name);
+      }
+    }
+    return fieldSet;
+
+  }

  public synchronized byte[] norms(String field) throws IOException {
-    Norm norm = (Norm)norms.get(field);
+    Norm norm = (Norm) norms.get(field);
    if (norm == null)                             // not an indexed field
      return null;
    if (norm.bytes == null) {                     // value not yet read
@ -346,8 +373,8 @@ final class SegmentReader extends IndexReader {
  }

  public synchronized void setNorm(int doc, String field, byte value)
-    throws IOException {
-    Norm norm = (Norm)norms.get(field);
+          throws IOException {
+    Norm norm = (Norm) norms.get(field);
    if (norm == null)                             // not an indexed field
      return;
    norm.dirty = true;                            // mark it dirty
@ -360,7 +387,7 @@ final class SegmentReader extends IndexReader {
  public synchronized void norms(String field, byte[] bytes, int offset)
    throws IOException {

-    Norm norm = (Norm)norms.get(field);
+    Norm norm = (Norm) norms.get(field);
    if (norm == null)
      return;					  // use zeros in array

@ -369,7 +396,7 @@ final class SegmentReader extends IndexReader {
      return;
    }

-    InputStream normStream = (InputStream)norm.in.clone();
+    InputStream normStream = (InputStream) norm.in.clone();
    try {                                         // read from disk
      normStream.seek(0);
      normStream.readBytes(bytes, offset, maxDoc());
@ -392,11 +419,40 @@ final class SegmentReader extends IndexReader {

  private final void closeNorms() throws IOException {
    synchronized (norms) {
-      Enumeration enumerator  = norms.elements();
+      Enumeration enumerator = norms.elements();
      while (enumerator.hasMoreElements()) {
-        Norm norm = (Norm)enumerator.nextElement();
+        Norm norm = (Norm) enumerator.nextElement();
        norm.in.close();
      }
    }
  }
+  
+  /** Return a term frequency vector for the specified document and field. The
+   *  vector returned contains term numbers and frequencies for all terms in
+   *  the specified field of this document, if the field had storeTermVector
+   *  flag set.  If the flag was not set, the method returns null.
+   */
+  public TermFreqVector getTermFreqVector(int docNumber, String field)
+          throws IOException {
+    // Check if this field is invalid or has no stored term vector
+    FieldInfo fi = fieldInfos.fieldInfo(field);
+    if (fi == null || !fi.storeTermVector) return null;
+
+    return termVectorsReader.get(docNumber, field);
+  }
+
+
+  /** Return an array of term frequency vectors for the specified document.
+   *  The array contains a vector for each vectorized field in the document.
+   *  Each vector vector contains term numbers and frequencies for all terms
+   *  in a given vectorized field.
+   *  If no such fields existed, the method returns null.
+   */
+  public TermFreqVector[] getTermFreqVectors(int docNumber)
+          throws IOException {
+    if (termVectorsReader == null)
+      return null;
+
+    return termVectorsReader.get(docNumber);
+  }
 }
--- a/src/java/org/apache/lucene/index/SegmentTermDocs.java
+++ b/src/java/org/apache/lucene/index/SegmentTermDocs.java
@ -77,27 +77,27 @@ class SegmentTermDocs implements TermDocs {
  private boolean haveSkipped;

  SegmentTermDocs(SegmentReader parent)
-    throws IOException {
+          throws IOException {
    this.parent = parent;
-    this.freqStream = (InputStream)parent.freqStream.clone();
+    this.freqStream = (InputStream) parent.freqStream.clone();
    this.deletedDocs = parent.deletedDocs;
    this.skipInterval = parent.tis.getSkipInterval();
  }
-  
+
  public void seek(Term term) throws IOException {
    TermInfo ti = parent.tis.get(term);
    seek(ti);
  }
-  
+
  public void seek(TermEnum enum) throws IOException {
    TermInfo ti;
    if (enum instanceof SegmentTermEnum)          // optimized case
-      ti = ((SegmentTermEnum)enum).termInfo();
+      ti = ((SegmentTermEnum) enum).termInfo();
    else                                          // punt case
      ti = parent.tis.get(enum.term());
    seek(ti);
  }
-  
+
  void seek(TermInfo ti) throws IOException {
    count = 0;
    if (ti == null) {
@ -114,7 +114,7 @@ class SegmentTermDocs implements TermDocs {
      haveSkipped = false;
    }
  }
-  
+
  public void close() throws IOException {
    freqStream.close();
  }
@ -128,19 +128,19 @@ class SegmentTermDocs implements TermDocs {
  public boolean next() throws IOException {
    while (true) {
      if (count == df)
-	return false;
+        return false;

      int docCode = freqStream.readVInt();
      doc += docCode >>> 1;			  // shift off low bit
      if ((docCode & 1) != 0)			  // if low bit is set
-	freq = 1;				  // freq is one
+        freq = 1;				  // freq is one
      else
-	freq = freqStream.readVInt();		  // else read freq
- 
+        freq = freqStream.readVInt();		  // else read freq
+
      count++;
-    
+
      if (deletedDocs == null || !deletedDocs.get(doc))
-	break;
+        break;
      skippingDoc();
    }
    return true;
@ -148,7 +148,7 @@ class SegmentTermDocs implements TermDocs {

  /** Optimized implementation. */
  public int read(final int[] docs, final int[] freqs)
-      throws IOException {
+          throws IOException {
    final int length = docs.length;
    int i = 0;
    while (i < length && count < df) {
@ -157,17 +157,17 @@ class SegmentTermDocs implements TermDocs {
      final int docCode = freqStream.readVInt();
      doc += docCode >>> 1;			  // shift off low bit
      if ((docCode & 1) != 0)			  // if low bit is set
-	freq = 1;				  // freq is one
+        freq = 1;				  // freq is one
      else
-	freq = freqStream.readVInt();		  // else read freq
+        freq = freqStream.readVInt();		  // else read freq
      count++;
-   
+
      if (deletedDocs == null || !deletedDocs.get(doc)) {
-	docs[i] = doc;
-	freqs[i] = freq;
-	++i;
+        docs[i] = doc;
+        freqs[i] = freq;
+        ++i;
      }
-     }
+    }
    return i;
  }

@ -179,7 +179,7 @@ class SegmentTermDocs implements TermDocs {
    if (df > skipInterval) {                      // optimized case

      if (skipStream == null)
-        skipStream = (InputStream)freqStream.clone(); // lazily clone
+        skipStream = (InputStream) freqStream.clone(); // lazily clone

      if (!haveSkipped) {                          // lazily seek skip stream
        skipStream.seek(skipPointer);
@ -190,8 +190,8 @@ class SegmentTermDocs implements TermDocs {
      int lastSkipDoc = skipDoc;
      long lastFreqPointer = freqStream.getFilePointer();
      long lastProxPointer = -1;
-      int numSkipped = -1 -(count % skipInterval);
-      
+      int numSkipped = -1 - (count % skipInterval);
+
      while (target > skipDoc) {
        lastSkipDoc = skipDoc;
        lastFreqPointer = freqPointer;
@ -205,7 +205,7 @@ class SegmentTermDocs implements TermDocs {
        skipDoc += skipStream.readVInt();
        freqPointer += skipStream.readVInt();
        proxPointer += skipStream.readVInt();
-        
+
        skipCount++;
      }
      
@ -213,7 +213,7 @@ class SegmentTermDocs implements TermDocs {
      if (lastFreqPointer > freqStream.getFilePointer()) {
        freqStream.seek(lastFreqPointer);
        skipProx(lastProxPointer);
-        
+
        doc = lastSkipDoc;
        count += numSkipped;
      }
@ -223,7 +223,7 @@ class SegmentTermDocs implements TermDocs {
    // done skipping, now just scan
    do {
      if (!next())
-	return false;
+        return false;
    } while (target > doc);
    return true;
  }
--- a/src/java/org/apache/lucene/index/SegmentTermEnum.java
+++ b/src/java/org/apache/lucene/index/SegmentTermEnum.java
@ -76,9 +76,9 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
  private char[] buffer = {};

  SegmentTermEnum(InputStream i, FieldInfos fis, boolean isi)
-       throws IOException {
+          throws IOException {
    input = i;
-    fieldInfos = fis; 
+    fieldInfos = fis;
    isIndex = isi;

    int firstInt = input.readInt();
@ -98,24 +98,24 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
      // check that it is a format we can understand
      if (format < TermInfosWriter.FORMAT)
        throw new IOException("Unknown format version:" + format);
-      
+
      size = input.readLong();                    // read the size
-      
+
      if (!isIndex) {
        indexInterval = input.readInt();
        skipInterval = input.readInt();
      }
    }
-    
+
  }
-  
+
  protected Object clone() {
    SegmentTermEnum clone = null;
    try {
-      clone = (SegmentTermEnum)super.clone();
+      clone = (SegmentTermEnum) super.clone();
    } catch (CloneNotSupportedException e) {}

-    clone.input = (InputStream)input.clone();
+    clone.input = (InputStream) input.clone();
    clone.termInfo = new TermInfo(termInfo);
    if (term != null) clone.growBuffer(term.text.length());

@ -123,7 +123,7 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
  }

  final void seek(long pointer, int p, Term t, TermInfo ti)
-       throws IOException {
+          throws IOException {
    input.seek(pointer);
    position = p;
    term = t;
@ -134,7 +134,7 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {

  /** Increments the enumeration to the next element.  True if one exists.*/
  public final boolean next() throws IOException {
-    if (position++ >= size-1) {
+    if (position++ >= size - 1) {
      term = null;
      return false;
    }
@ -145,7 +145,7 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
    termInfo.docFreq = input.readVInt();	  // read doc freq
    termInfo.freqPointer += input.readVLong();	  // read freq pointer
    termInfo.proxPointer += input.readVLong();	  // read prox pointer
-    
+
    if (!isIndex) {
      if (termInfo.docFreq > skipInterval) {
        termInfo.skipOffset = input.readVInt();
@ -164,10 +164,10 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
    int totalLength = start + length;
    if (buffer.length < totalLength)
      growBuffer(totalLength);
-    
+
    input.readChars(buffer, start, length);
    return new Term(fieldInfos.fieldName(input.readVInt()),
-		    new String(buffer, 0, totalLength), false);
+            new String(buffer, 0, totalLength), false);
  }

  private final void growBuffer(int length) {
@ -177,25 +177,25 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
  }

  /** Returns the current Term in the enumeration.
-    Initially invalid, valid after next() called for the first time.*/
+   Initially invalid, valid after next() called for the first time.*/
  public final Term term() {
    return term;
  }

  /** Returns the current TermInfo in the enumeration.
-    Initially invalid, valid after next() called for the first time.*/
+   Initially invalid, valid after next() called for the first time.*/
  final TermInfo termInfo() {
    return new TermInfo(termInfo);
  }

  /** Sets the argument to the current TermInfo in the enumeration.
-    Initially invalid, valid after next() called for the first time.*/
+   Initially invalid, valid after next() called for the first time.*/
  final void termInfo(TermInfo ti) {
    ti.set(termInfo);
  }

  /** Returns the docFreq from the current TermInfo in the enumeration.
-    Initially invalid, valid after next() called for the first time.*/
+   Initially invalid, valid after next() called for the first time.*/
  public final int docFreq() {
    return termInfo.docFreq;
  }
--- a/src/java/org/apache/lucene/index/SegmentTermPositions.java
+++ b/src/java/org/apache/lucene/index/SegmentTermPositions.java
@ -106,7 +106,7 @@ extends SegmentTermDocs implements TermPositions {

  public final int read(final int[] docs, final int[] freqs)
      throws IOException {
-    throw new UnsupportedOperationException();
+    throw new UnsupportedOperationException("TermPositions does not support processing multiple documents in one call. Use TermDocs instead.");
  }


--- a/src/java/org/apache/lucene/index/SegmentTermVector.java
+++ b/src/java/org/apache/lucene/index/SegmentTermVector.java
@ -0,0 +1,117 @@
+package org.apache.lucene.index;
+import java.io.IOException;
+import java.util.*;
+
+/**
+ */
+class SegmentTermVector implements TermFreqVector {
+  private String field;
+  private String terms[];
+  private int termFreqs[];
+  
+  SegmentTermVector(String field, String terms[], int termFreqs[]) {
+    this.field = field;
+    this.terms = terms;
+    this.termFreqs = termFreqs;
+  }
+
+  /**
+   * 
+   * @return The number of the field this vector is associated with
+   */
+  public String getField() {
+    return field;
+  }
+
+  public String toString() {
+    StringBuffer sb = new StringBuffer();
+    sb.append('{');
+    sb.append(field).append(": ");
+    for (int i=0; i<terms.length; i++) {
+      if (i>0) sb.append(", ");
+      sb.append(terms[i]).append('/').append(termFreqs[i]);
+    }
+    sb.append('}');
+    return sb.toString();
+  }
+
+
+  public String toString(IndexReader ir)
+    throws IOException
+  {
+    return toString();
+    /*StringBuffer sb = new StringBuffer();
+    //TODO: Reimplement
+
+    sb.append('{');
+    sb.append(field).append(": ");
+    for (int i=0; i<terms.length; i++) {
+    if (i>0) sb.append(", ");
+    Term t = ir.getTerm(terms[i]);
+    String text = t == null ? "UNKNOWN(" + i + ")" : t.text;
+    sb.append(text).append('/').append(termFreqs[i]);
+    if (termProx != null) appendTermProx(sb.append('/'), termProx[i]);
+    }
+    sb.append('}');
+    return sb.toString();*/
+  }
+
+
+  /** Number of terms in the term vector. If there are no terms in the
+   *  vector, returns 0.
+   */
+  public int size() {
+    return terms == null ? 0 : terms.length;
+  }
+
+  /** Array of term numbers in ascending order. If there are no terms in
+   *  the vector, returns null.
+   */
+  public String [] getTerms() {
+    return terms;
+  }
+
+  /** Array of term frequencies. Locations of the array correspond one to one
+   *  to the term numbers in the array obtained from <code>getTermNumbers</code>
+   *  method. Each location in the array contains the number of times this
+   *  term occurs in the document or the document field. If there are no terms in
+   *  the vector, returns null.
+   */
+  public int[] getTermFrequencies() {
+    return termFreqs;
+  }
+
+
+
+  /** Return an index in the term numbers array returned from <code>getTermNumbers</code>
+   *  at which the term with the specified <code>termNumber</code> appears. If this
+   *  term does not appear in the array, return -1.
+   */
+  public int indexOf(String termText) {
+    int res = Arrays.binarySearch(terms, termText);
+    return res >= 0 ? res : -1;
+  }
+
+  /** Just like <code>indexOf(int)</code> but searches for a number of terms
+   *  at the same time. Returns an array that has the same size as the number
+   *  of terms searched for, each slot containing the result of searching for
+   *  that term number. Array of term numbers must be sorted in ascending order.
+   *
+   *  @param termNumbers array containing term numbers to look for
+   *  @param start index in the array where the list of termNumbers starts
+   *  @param len the number of termNumbers in the list
+   */
+  public int[] indexesOf(String [] termNumbers, int start, int len) {
+    // TODO: there must be a more efficient way of doing this.
+    //       At least, we could advance the lower bound of the terms array
+    //       as we find valid indexes. Also, it might be possible to leverage
+    //       this even more by starting in the middle of the termNumbers array
+    //       and thus dividing the terms array maybe in half with each found index.
+    int res[] = new int[len];
+
+    for (int i=0; i < len; i++) {
+      res[i] = indexOf(termNumbers[i]);
+    }
+    return res;
+  }
+}
--- a/src/java/org/apache/lucene/index/TermDocs.java
+++ b/src/java/org/apache/lucene/index/TermDocs.java
@ -57,13 +57,13 @@ package org.apache.lucene.index;
 import java.io.IOException;

 /** TermDocs provides an interface for enumerating &lt;document, frequency&gt;
-  pairs for a term.  <p> The document portion names each document containing
-  the term.  Documents are indicated by number.  The frequency portion gives
-  the number of times the term occurred in each document.  <p> The pairs are
-  ordered by document number.
+ pairs for a term.  <p> The document portion names each document containing
+ the term.  Documents are indicated by number.  The frequency portion gives
+ the number of times the term occurred in each document.  <p> The pairs are
+ ordered by document number.

-  @see IndexReader#termDocs
-  */
+ @see IndexReader#termDocs
+ */

 public interface TermDocs {
  /** Sets this to the data for a term.
@ -77,15 +77,15 @@ public interface TermDocs {
  void seek(TermEnum termEnum) throws IOException;

  /** Returns the current document number.  <p> This is invalid until {@link
-      #next()} is called for the first time.*/
+   #next()} is called for the first time.*/
  int doc();

  /** Returns the frequency of the term within the current document.  <p> This
-    is invalid until {@link #next()} is called for the first time.*/
+   is invalid until {@link #next()} is called for the first time.*/
  int freq();

  /** Moves to the next pair in the enumeration.  <p> Returns true iff there is
-    such a next pair in the enumeration. */
+   such a next pair in the enumeration. */
  boolean next() throws IOException;

  /** Attempts to read multiple entries from the enumeration, up to length of
--- a/src/java/org/apache/lucene/index/TermEnum.java
+++ b/src/java/org/apache/lucene/index/TermEnum.java
@ -73,4 +73,27 @@ public abstract class TermEnum {

  /** Closes the enumeration to further activity, freeing resources. */
  public abstract void close() throws IOException;
+  
+// Term Vector support
+  
+  /** Skips terms to the first beyond the current whose value is
+   * greater or equal to <i>target</i>. <p>Returns true iff there is such
+   * an entry.  <p>Behaves as if written: <pre>
+   *   public boolean skipTo(Term target) {
+   *     do {
+   *       if (!next())
+   * 	     return false;
+   *     } while (target > term());
+   *     return true;
+   *   }
+   * </pre>
+   * Some implementations are considerably more efficient than that.
+   */
+  public boolean skipTo(Term target) throws IOException {
+     do {
+        if (!next())
+  	        return false;
+     } while (target.compareTo(term()) > 0);
+     return true;
+  }
 }
--- a/src/java/org/apache/lucene/index/TermFreqVector.java
+++ b/src/java/org/apache/lucene/index/TermFreqVector.java
@ -0,0 +1,64 @@
+package org.apache.lucene.index;
+
+import java.io.IOException;
+
+/** Provides access to stored term vector of 
+ *  a document field.
+ */
+public interface TermFreqVector {
+  /**
+   * 
+   * @return The field this vector is associated with.
+   * 
+   */ 
+  public String getField();
+  
+  /** 
+   * @return The number of terms in the term vector.
+   */
+  public int size();
+
+  /** 
+   * @return An Array of term texts in ascending order.
+   */
+  public String[] getTerms();
+
+
+  /** Array of term frequencies. Locations of the array correspond one to one
+   *  to the term numbers in the array obtained from <code>getTermNumbers</code>
+   *  method. Each location in the array contains the number of times this
+   *  term occurs in the document or the document field.
+   */
+  public int[] getTermFrequencies();
+  
+  
+  /** Return a string representation of the vector.
+   */
+  public String toString();
+
+
+  /** Return a string representation of the vector, but use the provided IndexReader
+   *  to obtain text for each term and include the text instead of term numbers.
+   */
+  public String toString(IndexReader ir) throws IOException;
+
+
+  /** Return an index in the term numbers array returned from <code>getTermNumbers</code>
+   *  at which the term with the specified <code>termNumber</code> appears. If this
+   *  term does not appear in the array, return -1.
+   */
+  public int indexOf(String term);
+
+
+  /** Just like <code>indexOf(int)</code> but searches for a number of terms
+   *  at the same time. Returns an array that has the same size as the number
+   *  of terms searched for, each slot containing the result of searching for
+   *  that term number.
+   *
+   *  @param terms array containing terms to look for
+   *  @param start index in the array where the list of terms starts
+   *  @param len the number of terms in the list
+   */
+  public int[] indexesOf(String[] terms, int start, int len);
+
+}
--- a/src/java/org/apache/lucene/index/TermInfosWriter.java
+++ b/src/java/org/apache/lucene/index/TermInfosWriter.java
@ -57,6 +57,7 @@ package org.apache.lucene.index;
 import java.io.IOException;
 import org.apache.lucene.store.OutputStream;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.StringHelper;

 /** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
  Directory.  A TermInfos can be written once, in order.  */
@ -156,10 +157,10 @@ final class TermInfosWriter {
    lastTi.set(ti);
    size++;
  }
-
+  
  private final void writeTerm(Term term)
       throws IOException {
-    int start = stringDifference(lastTerm.text, term.text);
+    int start = StringHelper.stringDifference(lastTerm.text, term.text);
    int length = term.text.length() - start;

    output.writeVInt(start);			  // write shared prefix length
@ -171,15 +172,7 @@ final class TermInfosWriter {
    lastTerm = term;
  }

-  private static final int stringDifference(String s1, String s2) {
-    int len1 = s1.length();
-    int len2 = s2.length();
-    int len = len1 < len2 ? len1 : len2;
-    for (int i = 0; i < len; i++)
-      if (s1.charAt(i) != s2.charAt(i))
-	return i;
-    return len;
-  }
+  

  /** Called to complete TermInfos creation. */
  final void close() throws IOException {
@ -190,4 +183,5 @@ final class TermInfosWriter {
    if (!isIndex)
      other.close();
  }
+
 }
--- a/src/java/org/apache/lucene/index/TermPositionVector.java
+++ b/src/java/org/apache/lucene/index/TermPositionVector.java
@ -0,0 +1,13 @@
+package org.apache.lucene.index;
+
+/** Extends <code>TermFreqVector</code> to provide additional information about
+ *  positions in which each of the terms is found.
+ */
+public interface TermPositionVector extends TermFreqVector {
+
+    /** Returns an array of positions in which the term is found.
+     *  Terms are identified by the index at which its number appears in the
+     *  term number array obtained from <code>getTermNumbers</code> method.
+     */
+    public int[] getTermPositions(int index);
+}
--- a/src/java/org/apache/lucene/index/TermVectorsReader.java
+++ b/src/java/org/apache/lucene/index/TermVectorsReader.java
@ -0,0 +1,221 @@
+package org.apache.lucene.index;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.InputStream;
+
+import java.io.IOException;
+
+/** TODO: relax synchro!
+ */
+class TermVectorsReader {
+  private FieldInfos fieldInfos;
+
+  private InputStream tvx;
+  private InputStream tvd;
+  private InputStream tvf;
+  private int size;
+
+  TermVectorsReader(Directory d, String segment, FieldInfos fieldInfos)
+    throws IOException {
+    if (d.fileExists(segment + TermVectorsWriter.TVX_EXTENSION)) {
+      tvx = d.openFile(segment + TermVectorsWriter.TVX_EXTENSION);
+      checkValidFormat(tvx);
+      tvd = d.openFile(segment + TermVectorsWriter.TVD_EXTENSION);
+      checkValidFormat(tvd);
+      tvf = d.openFile(segment + TermVectorsWriter.TVF_EXTENSION);
+      checkValidFormat(tvf);
+      size = (int) tvx.length() / 8;
+    }
+
+    this.fieldInfos = fieldInfos;
+  }
+  
+  private void checkValidFormat(InputStream in) throws IOException
+  {
+    int format = in.readInt();
+    if (format > TermVectorsWriter.FORMAT_VERSION)
+    {
+      throw new IOException("Incompatible format version: " + format + " expected " 
+              + TermVectorsWriter.FORMAT_VERSION + " or less");
+    }
+    
+  }
+
+  synchronized void close() throws IOException {
+    // why don't we trap the exception and at least make sure that
+    // all streams that we can close are closed?
+    if (tvx != null) tvx.close();
+    if (tvd != null) tvd.close();
+    if (tvf != null) tvf.close();
+  }
+
+  /**
+   * 
+   * @return The number of documents in the reader
+   */
+  int size() {
+    return size;
+  }
+
+  /**
+   * Retrieve the term vector for the given document and field
+   * @param docNum The document number to retrieve the vector for
+   * @param field The field within the document to retrieve
+   * @return The TermFreqVector for the document and field or null
+   */ 
+  synchronized TermFreqVector get(int docNum, String field) {
+    // Check if no term vectors are available for this segment at all
+    int fieldNumber = fieldInfos.fieldNumber(field);
+    TermFreqVector result = null;
+    if (tvx != null) {
+      try {
+        //We need to account for the FORMAT_SIZE at when seeking in the tvx
+        //We don't need to do this in other seeks because we already have the file pointer
+        //that was written in another file
+        tvx.seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
+        //System.out.println("TVX Pointer: " + tvx.getFilePointer());
+        long position = tvx.readLong();
+
+        tvd.seek(position);
+        int fieldCount = tvd.readVInt();
+        //System.out.println("Num Fields: " + fieldCount);
+        // There are only a few fields per document. We opt for a full scan
+        // rather then requiring that they be ordered. We need to read through
+        // all of the fields anyway to get to the tvf pointers.
+        int number = 0;
+        int found = -1;
+        for (int i = 0; i < fieldCount; i++) {
+          number += tvd.readVInt();
+          if (number == fieldNumber) found = i;
+        }
+  
+        // This field, although valid in the segment, was not found in this document
+        if (found != -1) {
+          // Compute position in the tvf file
+          position = 0;
+          for (int i = 0; i <= found; i++)
+          {
+            position += tvd.readVLong();
+          }
+          result = readTermVector(field, position);
+        }
+        else {
+          //System.out.println("Field not found");
+        }
+          
+      } catch (Exception e) {
+        //e.printStackTrace();
+      }
+    }
+    else
+    {
+      System.out.println("No tvx file");
+    }
+    return result;
+  }
+
+
+  /** Return all term vectors stored for this document or null if the could not be read in. */
+  synchronized TermFreqVector[] get(int docNum) {
+    TermFreqVector[] result = null;
+    // Check if no term vectors are available for this segment at all
+    if (tvx != null) {
+      try {
+        //We need to offset by
+        tvx.seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
+        long position = tvx.readLong();
+
+        tvd.seek(position);
+        int fieldCount = tvd.readVInt();
+
+        // No fields are vectorized for this document
+        if (fieldCount != 0) {
+          int number = 0;
+          String[] fields = new String[fieldCount];
+
+          for (int i = 0; i < fieldCount; i++) {
+            number += tvd.readVInt();
+            fields[i] = fieldInfos.fieldName(number);
+          }
+  
+          // Compute position in the tvf file
+          position = 0;
+          long[] tvfPointers = new long[fieldCount];
+          for (int i = 0; i < fieldCount; i++) {
+            position += tvd.readVLong();
+            tvfPointers[i] = position;
+          }
+
+          result = readTermVectors(fields, tvfPointers);
+        }
+      } catch (IOException e) {
+        e.printStackTrace();
+      }
+    }
+    else
+    {
+      System.out.println("No tvx file");
+    }
+    return result;
+  }
+
+
+  private SegmentTermVector[] readTermVectors(String fields[], long tvfPointers[])
+          throws IOException {
+    SegmentTermVector res[] = new SegmentTermVector[fields.length];
+    for (int i = 0; i < fields.length; i++) {
+      res[i] = readTermVector(fields[i], tvfPointers[i]);
+    }
+    return res;
+  }
+
+  /**
+   * 
+   * @param fieldNum The field to read in
+   * @param tvfPointer The pointer within the tvf file where we should start reading
+   * @return The TermVector located at that position
+   * @throws IOException
+   */ 
+  private SegmentTermVector readTermVector(String field, long tvfPointer)
+          throws IOException {
+
+    // Now read the data from specified position
+    //We don't need to offset by the FORMAT here since the pointer already includes the offset
+    tvf.seek(tvfPointer);
+
+    int numTerms = tvf.readVInt();
+    //System.out.println("Num Terms: " + numTerms);
+    // If no terms - return a constant empty termvector
+    if (numTerms == 0) return new SegmentTermVector(field, null, null);
+
+    int length = numTerms + tvf.readVInt();
+
+    String terms[] = new String[numTerms];
+    
+    int termFreqs[] = new int[numTerms];
+
+    int start = 0;
+    int deltaLength = 0;
+    int totalLength = 0;
+    char [] buffer = {};
+    String previousString = "";
+    for (int i = 0; i < numTerms; i++) {
+      start = tvf.readVInt();
+      deltaLength = tvf.readVInt();
+      totalLength = start + deltaLength;
+      if (buffer.length < totalLength)
+      {
+        buffer = new char[totalLength];
+        for (int j = 0; j < previousString.length(); j++)  // copy contents
+          buffer[j] = previousString.charAt(j);
+      }
+      tvf.readChars(buffer, start, deltaLength);
+      terms[i] = new String(buffer, 0, totalLength);
+      previousString = terms[i];
+      termFreqs[i] = tvf.readVInt();
+    }
+    SegmentTermVector tv = new SegmentTermVector(field, terms, termFreqs);
+    return tv;
+  }
+
+}
--- a/src/java/org/apache/lucene/index/TermVectorsWriter.java
+++ b/src/java/org/apache/lucene/index/TermVectorsWriter.java
@ -0,0 +1,301 @@
+package org.apache.lucene.index;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.OutputStream;
+import org.apache.lucene.util.StringHelper;
+
+import java.io.IOException;
+import java.util.Vector;
+
+/**
+ * Writer works by opening a document and then opening the fields within the document and then
+ * writing out the vectors for each field.
+ * 
+ * Rough usage:
+ *
+ <CODE>
+ for each document
+ {
+ writer.openDocument();
+ for each field on the document
+ {
+ writer.openField(field);
+ for all of the terms
+ {
+ writer.addTerm(...)
+ }
+ writer.closeField
+ }
+ writer.closeDocument()    
+ }
+ </CODE>
+ */
+final class TermVectorsWriter {
+  public static final int FORMAT_VERSION = 1;
+  //The size in bytes that the FORMAT_VERSION will take up at the beginning of each file 
+  public static final int FORMAT_SIZE = 4;
+  
+  //TODO: Figure out how to write with or w/o position information and read back in
+  public static final String TVX_EXTENSION = ".tvx";
+  public static final String TVD_EXTENSION = ".tvd";
+  public static final String TVF_EXTENSION = ".tvf";
+  private OutputStream tvx = null, tvd = null, tvf = null;
+  private Vector fields = null;
+  private Vector terms = null;
+  private FieldInfos fieldInfos;
+
+  private TVField currentField = null;
+  private long currentDocPointer = -1;
+
+  /** Create term vectors writer for the specified segment in specified
+   *  directory.  A new TermVectorsWriter should be created for each
+   *  segment. The parameter <code>maxFields</code> indicates how many total
+   *  fields are found in this document. Not all of these fields may require
+   *  termvectors to be stored, so the number of calls to
+   *  <code>openField</code> is less or equal to this number.
+   */
+  public TermVectorsWriter(Directory directory, String segment,
+                           FieldInfos fieldInfos)
+    throws IOException {
+    // Open files for TermVector storage
+    tvx = directory.createFile(segment + TVX_EXTENSION);
+    tvx.writeInt(FORMAT_VERSION);
+    tvd = directory.createFile(segment + TVD_EXTENSION);
+    tvd.writeInt(FORMAT_VERSION);
+    tvf = directory.createFile(segment + TVF_EXTENSION);
+    tvf.writeInt(FORMAT_VERSION);
+
+    this.fieldInfos = fieldInfos;
+    fields = new Vector(fieldInfos.size());
+    terms = new Vector();
+  }
+
+
+  public final void openDocument()
+          throws IOException {
+    closeDocument();
+
+    currentDocPointer = tvd.getFilePointer();
+  }
+
+
+  public final void closeDocument()
+          throws IOException {
+    if (isDocumentOpen()) {
+      closeField();
+      writeDoc();
+      fields.clear();
+      currentDocPointer = -1;
+    }
+  }
+
+
+  public final boolean isDocumentOpen() {
+    return currentDocPointer != -1;
+  }
+
+
+  /** Start processing a field. This can be followed by a number of calls to
+   *  addTerm, and a final call to closeField to indicate the end of
+   *  processing of this field. If a field was previously open, it is
+   *  closed automatically.
+   */
+  public final void openField(String field)
+          throws IOException {
+    if (!isDocumentOpen()) throw new IllegalStateException("Cannot open field when no document is open.");
+
+    closeField();
+    currentField = new TVField(fieldInfos.fieldNumber(field));
+  }
+
+  /** Finished processing current field. This should be followed by a call to
+   *  openField before future calls to addTerm.
+   */
+  public final void closeField()
+          throws IOException {
+    if (isFieldOpen()) {
+      /* DEBUG */
+      //System.out.println("closeField()");
+      /* DEBUG */
+
+      // save field and terms
+      writeField();
+      fields.add(currentField);
+      terms.clear();
+      currentField = null;
+    }
+  }
+
+  /** Return true if a field is currently open. */
+  public final boolean isFieldOpen() {
+    return currentField != null;
+  }
+
+  /** Add term to the field's term vector. Field must already be open
+   *  of NullPointerException is thrown. Terms should be added in
+   *  increasing order of terms, one call per unique termNum. ProxPointer
+   *  is a pointer into the TermPosition file (prx). Freq is the number of
+   *  times this term appears in this field, in this document.
+   */
+  public final void addTerm(String termText, int freq) {
+    if (!isDocumentOpen()) throw new IllegalStateException("Cannot add terms when document is not open");
+    if (!isFieldOpen()) throw new IllegalStateException("Cannot add terms when field is not open");
+
+    addTermInternal(termText, freq);
+  }
+
+  private final void addTermInternal(String termText, int freq) {
+    currentField.length += freq;
+    TVTerm term = new TVTerm();
+    term.termText = termText;
+    term.freq = freq;
+    terms.add(term);
+  }
+
+
+  /** Add specified vectors to the document.
+   */
+  public final void addVectors(TermFreqVector[] vectors)
+          throws IOException {
+    if (!isDocumentOpen()) throw new IllegalStateException("Cannot add term vectors when document is not open");
+    if (isFieldOpen()) throw new IllegalStateException("Cannot add term vectors when field is open");
+
+    for (int i = 0; i < vectors.length; i++) {
+      addTermFreqVector(vectors[i]);
+    }
+  }
+
+
+  /** Add specified vector to the document. Document must be open but no field
+   *  should be open or exception is thrown. The same document can have <code>addTerm</code>
+   *  and <code>addVectors</code> calls mixed, however a given field must either be
+   *  populated with <code>addTerm</code> or with <code>addVector</code>.     *
+   */
+  public final void addTermFreqVector(TermFreqVector vector)
+          throws IOException {
+    if (!isDocumentOpen()) throw new IllegalStateException("Cannot add term vector when document is not open");
+    if (isFieldOpen()) throw new IllegalStateException("Cannot add term vector when field is open");
+    addTermFreqVectorInternal(vector);
+  }
+
+  private final void addTermFreqVectorInternal(TermFreqVector vector)
+          throws IOException {
+    openField(vector.getField());
+    for (int i = 0; i < vector.size(); i++) {
+      addTermInternal(vector.getTerms()[i], vector.getTermFrequencies()[i]);
+    }
+    closeField();
+  }
+
+ 
+  
+  
+  /** Close all streams. */
+  final void close() throws IOException {
+    try {
+      closeDocument();
+    } finally {
+      // make an effort to close all streams we can but remember and re-throw
+      // the first exception encountered in this process
+      IOException keep = null;
+      if (tvx != null)
+        try {
+          tvx.close();
+        } catch (IOException e) {
+          if (keep == null) keep = e;
+        }
+      if (tvd != null)
+        try {
+          tvd.close();
+        } catch (IOException e) {
+          if (keep == null) keep = e;
+        }
+      if (tvf != null)
+        try {
+          tvf.close();
+        } catch (IOException e) {
+          if (keep == null) keep = e;
+        }
+      if (keep != null) throw (IOException) keep.fillInStackTrace();
+    }
+  }
+
+  
+
+  private void writeField() throws IOException {
+    // remember where this field is written
+    currentField.tvfPointer = tvf.getFilePointer();
+    //System.out.println("Field Pointer: " + currentField.tvfPointer);
+    final int size;
+
+    tvf.writeVInt(size = terms.size());
+    tvf.writeVInt(currentField.length - size);
+    String lastTermText = "";
+    // write term ids and positions
+    for (int i = 0; i < size; i++) {
+      TVTerm term = (TVTerm) terms.elementAt(i);
+      //tvf.writeString(term.termText);
+      int start = StringHelper.stringDifference(lastTermText, term.termText);
+      int length = term.termText.length() - start;
+      tvf.writeVInt(start);			  // write shared prefix length
+      tvf.writeVInt(length);			  // write delta length
+      tvf.writeChars(term.termText, start, length);  // write delta chars
+      tvf.writeVInt(term.freq);
+      lastTermText = term.termText;
+    }
+  }
+
+
+
+
+  private void writeDoc() throws IOException {
+    if (isFieldOpen()) throw new IllegalStateException("Field is still open while writing document");
+    //System.out.println("Writing doc pointer: " + currentDocPointer);
+    // write document index record
+    tvx.writeLong(currentDocPointer);
+
+    // write document data record
+    final int size;
+
+    // write the number of fields
+    tvd.writeVInt(size = fields.size());
+
+    // write field numbers
+    int lastFieldNumber = 0;
+    for (int i = 0; i < size; i++) {
+      TVField field = (TVField) fields.elementAt(i);
+      tvd.writeVInt(field.number - lastFieldNumber);
+
+      lastFieldNumber = field.number;
+    }
+
+    // write field pointers
+    long lastFieldPointer = 0;
+    for (int i = 0; i < size; i++) {
+      TVField field = (TVField) fields.elementAt(i);
+      tvd.writeVLong(field.tvfPointer - lastFieldPointer);
+
+      lastFieldPointer = field.tvfPointer;
+    }
+    //System.out.println("After writing doc pointer: " + tvx.getFilePointer());
+  }
+
+
+  private static class TVField {
+    int number;
+    long tvfPointer = 0;
+    int length = 0;   // number of distinct term positions
+
+    TVField(int number) {
+      this.number = number;
+    }
+  }
+
+  private static class TVTerm {
+    String termText;
+    int freq = 0;
+    //int positions[] = null;
+  }
+
+
+}
--- a/src/java/org/apache/lucene/search/QueryTermVector.java
+++ b/src/java/org/apache/lucene/search/QueryTermVector.java
@ -0,0 +1,216 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.TermFreqVector;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.*;
+
+/**
+ *
+ *
+ **/
+public class QueryTermVector implements TermFreqVector {
+  private String [] terms = new String[0];
+  private int [] termFreqs = new int[0];
+
+  public String getField() { return null;  }
+
+  /**
+   * 
+   * @param queryTerms The original list of terms from the query, can contain duplicates
+   */ 
+  public QueryTermVector(String [] queryTerms) {
+
+    processTerms(queryTerms);
+  }
+
+  public QueryTermVector(String queryString, Analyzer analyzer) {    
+    if (analyzer != null)
+    {
+      TokenStream stream = analyzer.tokenStream("", new StringReader(queryString));
+      if (stream != null)
+      {
+        Token next = null;
+        List terms = new ArrayList();
+        try {
+          while ((next = stream.next()) != null)
+          {
+            terms.add(next.termText());
+          }
+          processTerms((String[])terms.toArray(new String[terms.size()]));
+        } catch (IOException e) {
+        }
+      }
+    }                                                              
+  }
+  
+  private void processTerms(String[] queryTerms) {
+    if (queryTerms != null) {
+      Arrays.sort(queryTerms);
+      Map tmpSet = new HashMap(queryTerms.length);
+      //filter out duplicates
+      List tmpList = new ArrayList(queryTerms.length);
+      List tmpFreqs = new ArrayList(queryTerms.length);
+      int j = 0;
+      for (int i = 0; i < queryTerms.length; i++) {
+        String term = queryTerms[i];
+        Integer position = (Integer)tmpSet.get(term);
+        if (position == null) {
+          tmpSet.put(term, new Integer(j++));
+          tmpList.add(term);
+          tmpFreqs.add(new Integer(1));
+        }       
+        else {
+          Integer integer = (Integer)tmpFreqs.get(position.intValue());
+          tmpFreqs.set(position.intValue(), new Integer(integer.intValue() + 1));          
+        }
+      }
+      terms = (String[])tmpList.toArray(terms);
+      //termFreqs = (int[])tmpFreqs.toArray(termFreqs);
+      termFreqs = new int[tmpFreqs.size()];
+      int i = 0;
+      for (Iterator iter = tmpFreqs.iterator(); iter.hasNext();) {
+        Integer integer = (Integer) iter.next();
+        termFreqs[i++] = integer.intValue();
+      }
+    }
+  }
+  
+  public final String toString() {
+        StringBuffer sb = new StringBuffer();
+        sb.append('{');
+        for (int i=0; i<terms.length; i++) {
+            if (i>0) sb.append(", ");
+            sb.append(terms[i]).append('/').append(termFreqs[i]);
+        }
+        sb.append('}');
+        return sb.toString();
+    }
+  
+
+  /** 
+   * @return The number of terms in the term vector.
+   */
+  public int size() {
+    return terms.length;
+  }
+
+  /** Returns an array of positions in which the term is found or null if no position information is
+   * available or positions are not implemented.
+   *  Terms are identified by the index at which its number appears in the
+   *  term array obtained from <code>getTerms</code> method.
+   */
+  public int[] getTermPositions(int index) {
+    return null;
+  }
+
+  /** 
+   * @return An Array of term texts in ascending order.
+   */
+  public String[] getTerms() {
+    return terms;
+  }
+
+  /** Array of term frequencies. Locations of the array correspond one to one
+   *  to the term numbers in the array obtained from <code>getTermNumbers</code>
+   *  method. Each location in the array contains the number of times this
+   *  term occurs in the document or the document field.
+   */
+  public int[] getTermFrequencies() {
+    return termFreqs;
+  }
+
+  /** Return a string representation of the vector, but use the provided IndexReader
+   *  to obtain text for each term and include the text instead of term numbers.
+   */
+  public String toString(IndexReader ir) throws IOException {
+    return toString();
+  }
+
+  /** Return an index in the term numbers array returned from <code>getTermNumbers</code>
+   *  at which the term with the specified <code>termNumber</code> appears. If this
+   *  term does not appear in the array, return -1.
+   */
+  public int indexOf(String term) {
+    int res = Arrays.binarySearch(terms, term);
+        return res >= 0 ? res : -1;
+  }
+
+  /** Just like <code>indexOf(int)</code> but searches for a number of terms
+   *  at the same time. Returns an array that has the same size as the number
+   *  of terms searched for, each slot containing the result of searching for
+   *  that term number.
+   *
+   *  @param terms array containing terms to look for
+   *  @param start index in the array where the list of terms starts
+   *  @param len the number of terms in the list
+   */
+  public int[] indexesOf(String[] terms, int start, int len) {
+    int res[] = new int[len];
+
+    for (int i=0; i < len; i++) {
+        res[i] = indexOf(terms[i]);
+    }
+    return res;                  
+  }
+
+}
--- a/src/java/org/apache/lucene/util/StringHelper.java
+++ b/src/java/org/apache/lucene/util/StringHelper.java
@ -0,0 +1,32 @@
+package org.apache.lucene.util;
+
+
+
+/**
+ * Methods for manipulating strings
+ *
+ **/
+public abstract class StringHelper {
+  
+  /**
+   * 
+   * @param s1 The first string to compare
+   * @param s2 The second string to compare
+   * @return The first position where the two strings differ.
+   */ 
+  public static final int stringDifference(String s1, String s2) {
+    int len1 = s1.length();
+    int len2 = s2.length();
+    int len = len1 < len2 ? len1 : len2;
+    for (int i = 0; i < len; i++) {   
+      if (s1.charAt(i) != s2.charAt(i)) {
+	      return i;
+      }
+    }
+    return len;
+  }
+
+
+  private StringHelper() {
+  }
+}
--- a/src/test/org/apache/lucene/index/DocHelper.java
+++ b/src/test/org/apache/lucene/index/DocHelper.java
@ -0,0 +1,159 @@
+package org.apache.lucene.index;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: Grant Ingersoll
+ * Date: Feb 2, 2004
+ * Time: 6:16:12 PM
+ * $Id$
+ * Copyright 2004.  Center For Natural Language Processing
+ */
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.search.Similarity;
+import org.apache.lucene.store.Directory;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Enumeration;
+
+/**
+ *
+ *
+ **/
+class DocHelper {
+  public static final String FIELD_1_TEXT = "field one text";
+  public static final String TEXT_FIELD_1_KEY = "textField1";
+  public static Field textField1 = Field.Text(TEXT_FIELD_1_KEY, FIELD_1_TEXT, false);
+  
+  public static final String FIELD_2_TEXT = "field field field two text";
+  //Fields will be lexicographically sorted.  So, the order is: field, text, two
+  public static final int [] FIELD_2_FREQS = {3, 1, 1}; 
+  public static final String TEXT_FIELD_2_KEY = "textField2";
+  public static Field textField2 = Field.Text(TEXT_FIELD_2_KEY, FIELD_2_TEXT, true);
+  
+  public static final String KEYWORD_TEXT = "Keyword";
+  public static final String KEYWORD_FIELD_KEY = "keyField";
+  public static Field keyField = Field.Keyword(KEYWORD_FIELD_KEY, KEYWORD_TEXT);
+  
+  public static final String UNINDEXED_FIELD_TEXT = "unindexed field text";
+  public static final String UNINDEXED_FIELD_KEY = "unIndField";
+  public static Field unIndField = Field.UnIndexed(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT);
+  
+  public static final String UNSTORED_1_FIELD_TEXT = "unstored field text";
+  public static final String UNSTORED_FIELD_1_KEY = "unStoredField1";
+  public static Field unStoredField1 = Field.UnStored(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT, false);
+
+  public static final String UNSTORED_2_FIELD_TEXT = "unstored field text";
+  public static final String UNSTORED_FIELD_2_KEY = "unStoredField2";
+  public static Field unStoredField2 = Field.UnStored(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, true);
+
+//  public static Set fieldNamesSet = null;
+//  public static Set fieldValuesSet = null;
+  public static Map nameValues = null;
+  
+  static
+  {
+    
+    nameValues = new HashMap();
+    nameValues.put(TEXT_FIELD_1_KEY, FIELD_1_TEXT);
+    nameValues.put(TEXT_FIELD_2_KEY, FIELD_2_TEXT);
+    nameValues.put(KEYWORD_FIELD_KEY, KEYWORD_TEXT);
+    nameValues.put(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT);
+    nameValues.put(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT);
+    nameValues.put(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT);
+  }
+  
+  /**
+   * Adds the fields above to a document 
+   * @param doc The document to write
+   */ 
+  public static void setupDoc(Document doc) {
+    doc.add(textField1);
+    doc.add(textField2);
+    doc.add(keyField);
+    doc.add(unIndField);
+    doc.add(unStoredField1);
+    doc.add(unStoredField2);
+  }                         
+  /**
+   * Writes the document to the directory using a segment named "test"
+   * @param dir
+   * @param doc
+   */ 
+  public static void writeDoc(Directory dir, Document doc)
+  {
+    
+    writeDoc(dir, "test", doc);
+  }
+  /**
+   * Writes the document to the directory in the given segment
+   * @param dir
+   * @param segment
+   * @param doc
+   */ 
+  public static void writeDoc(Directory dir, String segment, Document doc)
+  {
+    Analyzer analyzer = new WhitespaceAnalyzer();
+    Similarity similarity = Similarity.getDefault();
+    writeDoc(dir, analyzer, similarity, segment, doc);
+  }
+  /**
+   * Writes the document to the directory segment named "test" using the specified analyzer and similarity
+   * @param dir
+   * @param analyzer
+   * @param similarity
+   * @param doc
+   */ 
+  public static void writeDoc(Directory dir, Analyzer analyzer, Similarity similarity, Document doc)
+  {
+    writeDoc(dir, analyzer, similarity, "test", doc);
+  }
+  /**
+   * Writes the document to the directory segment using the analyzer and the similarity score
+   * @param dir
+   * @param analyzer
+   * @param similarity
+   * @param segment
+   * @param doc
+   */ 
+  public static void writeDoc(Directory dir, Analyzer analyzer, Similarity similarity, String segment, Document doc)
+  {
+    DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50);
+    try {
+      writer.addDocument(segment, doc);
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+  }
+
+  public static int numFields(Document doc) {
+    Enumeration fields = doc.fields();
+    int result = 0;
+    while (fields.hasMoreElements()) {
+      fields.nextElement();
+      result++;
+    }
+    return result;
+  }
+}
+/*
+    fieldNamesSet = new HashSet();
+    fieldNamesSet.add(TEXT_FIELD_1_KEY);
+    fieldNamesSet.add(TEXT_FIELD_2_KEY);
+    fieldNamesSet.add(KEYWORD_FIELD_KEY);
+    fieldNamesSet.add(UNINDEXED_FIELD_KEY);
+    fieldNamesSet.add(UNSTORED_FIELD_1_KEY);
+    fieldNamesSet.add(UNSTORED_FIELD_2_KEY);
+    fieldValuesSet = new HashSet();
+    fieldValuesSet.add(FIELD_1_TEXT);
+    fieldValuesSet.add(FIELD_2_TEXT);
+    fieldValuesSet.add(KEYWORD_TEXT);
+    fieldValuesSet.add(UNINDEXED_FIELD_TEXT);
+    fieldValuesSet.add(UNSTORED_1_FIELD_TEXT);
+    fieldValuesSet.add(UNSTORED_2_FIELD_TEXT);
+*/
--- a/src/test/org/apache/lucene/index/TestDocumentWriter.java
+++ b/src/test/org/apache/lucene/index/TestDocumentWriter.java
@ -0,0 +1,121 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.search.Similarity;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+import java.io.IOException;
+
+public class TestDocumentWriter extends TestCase {
+  private RAMDirectory dir = new RAMDirectory();
+  private Document testDoc = new Document();
+
+
+  public TestDocumentWriter(String s) {
+    super(s);
+  }
+
+  protected void setUp() {
+    DocHelper.setupDoc(testDoc);
+  }
+
+  protected void tearDown() {
+
+  }
+
+  public void test() {
+    assertTrue(dir != null);
+
+  }
+
+  public void testAddDocument() {
+    Analyzer analyzer = new WhitespaceAnalyzer();
+    Similarity similarity = Similarity.getDefault();
+    DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50);
+    assertTrue(writer != null);
+    try {
+      writer.addDocument("test", testDoc);
+      //After adding the document, we should be able to read it back in
+      SegmentReader reader = new SegmentReader(new SegmentInfo("test", 1, dir));
+      assertTrue(reader != null);
+      Document doc = reader.document(0);
+      assertTrue(doc != null);
+      
+      //System.out.println("Document: " + doc);
+      Field [] fields = doc.getFields("textField2");
+      assertTrue(fields != null && fields.length == 1);
+      assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_2_TEXT));
+      assertTrue(fields[0].isTermVectorStored() == true);
+      
+      fields = doc.getFields("textField1");
+      assertTrue(fields != null && fields.length == 1);
+      assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_1_TEXT));
+      assertTrue(fields[0].isTermVectorStored() == false);
+      
+      fields = doc.getFields("keyField");
+      assertTrue(fields != null && fields.length == 1);
+      assertTrue(fields[0].stringValue().equals(DocHelper.KEYWORD_TEXT));
+    } catch (IOException e) {
+      e.printStackTrace();
+      assertTrue(false);
+    }
+  }
+}
--- a/src/test/org/apache/lucene/index/TestFieldInfos.java
+++ b/src/test/org/apache/lucene/index/TestFieldInfos.java
@ -0,0 +1,65 @@
+package org.apache.lucene.index;
+
+
+import junit.framework.TestCase;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.store.RAMOutputStream;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.store.OutputStream;
+
+import java.io.IOException;
+import java.util.Map;
+
+//import org.cnlp.utils.properties.ResourceBundleHelper;
+
+public class TestFieldInfos extends TestCase {
+
+  private Document testDoc = new Document();
+
+  public TestFieldInfos(String s) {
+    super(s);
+  }
+
+  protected void setUp() {
+    DocHelper.setupDoc(testDoc);
+  }
+
+  protected void tearDown() {
+  }
+
+  public void test() {
+    //Positive test of FieldInfos
+    assertTrue(testDoc != null);
+    FieldInfos fieldInfos = new FieldInfos();
+    fieldInfos.add(testDoc);
+    //Since the complement is stored as well in the fields map
+    assertTrue(fieldInfos.size() == 7); //this is 7 b/c we are using the no-arg constructor
+    RAMDirectory dir = new RAMDirectory();
+    String name = "testFile";
+    OutputStream output = dir.createFile(name);
+    assertTrue(output != null);
+    //Use a RAMOutputStream
+    
+    try {
+      fieldInfos.write(output);
+      output.close();
+      assertTrue(output.length() > 0);
+      FieldInfos readIn = new FieldInfos(dir, name);
+      assertTrue(fieldInfos.size() == readIn.size());
+      FieldInfo info = readIn.fieldInfo("textField1");
+      assertTrue(info != null);
+      assertTrue(info.storeTermVector == false);
+      
+      info = readIn.fieldInfo("textField2");
+      assertTrue(info != null);
+      assertTrue(info.storeTermVector == true);
+      
+      dir.close();
+
+    } catch (IOException e) {
+      assertTrue(false);
+    }
+
+  }
+}
--- a/src/test/org/apache/lucene/index/TestFieldsReader.java
+++ b/src/test/org/apache/lucene/index/TestFieldsReader.java
@ -0,0 +1,115 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.search.Similarity;
+
+import java.util.Map;
+import java.io.IOException;
+
+public class TestFieldsReader extends TestCase {
+  private RAMDirectory dir = new RAMDirectory();
+  private Document testDoc = new Document();
+  private FieldInfos fieldInfos = null;
+
+  public TestFieldsReader(String s) {
+    super(s);
+  }
+
+  protected void setUp() {
+    fieldInfos = new FieldInfos();
+    DocHelper.setupDoc(testDoc);
+    fieldInfos.add(testDoc);
+    DocumentWriter writer = new DocumentWriter(dir, new WhitespaceAnalyzer(),
+            Similarity.getDefault(), 50);
+    assertTrue(writer != null);
+    try {
+      writer.addDocument("test", testDoc);
+    }
+    catch (IOException e)
+    {
+      
+    }
+  }
+
+  protected void tearDown() {
+
+  }
+
+  public void test() {
+    assertTrue(dir != null);
+    assertTrue(fieldInfos != null);
+    try {
+      FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
+      assertTrue(reader != null);
+      assertTrue(reader.size() == 1);
+      Document doc = reader.doc(0);
+      assertTrue(doc != null);
+      assertTrue(doc.getField("textField1") != null);
+      Field field = doc.getField("textField2");
+      assertTrue(field != null);
+      assertTrue(field.isTermVectorStored() == true);
+      reader.close();
+    } catch (IOException e) {
+      e.printStackTrace();
+      assertTrue(false);
+    }
+  }
+}
--- a/src/test/org/apache/lucene/index/TestFilterIndexReader.java
+++ b/src/test/org/apache/lucene/index/TestFilterIndexReader.java
@ -79,7 +79,7 @@ public class TestFilterIndexReader extends TestCase {

  private static class TestReader extends FilterIndexReader {

-    /** Filter that only permits terms containing 'e'.*/
+     /** Filter that only permits terms containing 'e'.*/
    private static class TestTermEnum extends FilterTermEnum {
      public TestTermEnum(TermEnum enum)
        throws IOException {
--- a/src/test/org/apache/lucene/index/TestMultiReader.java
+++ b/src/test/org/apache/lucene/index/TestMultiReader.java
@ -0,0 +1,136 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+
+import java.io.IOException;
+
+public class TestMultiReader extends TestCase {
+  private Directory dir = new RAMDirectory();
+  private Document doc1 = new Document();
+  private Document doc2 = new Document();
+  private SegmentReader reader1;
+  private SegmentReader reader2;
+  private SegmentReader [] readers = new SegmentReader[2];
+  private SegmentInfos sis = new SegmentInfos();
+  
+  public TestMultiReader(String s) {
+    super(s);
+  }
+
+  protected void setUp() {
+    DocHelper.setupDoc(doc1);
+    DocHelper.setupDoc(doc2);
+    DocHelper.writeDoc(dir, "seg-1", doc1);
+    DocHelper.writeDoc(dir, "seg-2", doc2);
+    
+    try {
+      sis.write(dir);
+      reader1 = new SegmentReader(new SegmentInfo("seg-1", 1, dir));
+      reader2 = new SegmentReader(new SegmentInfo("seg-2", 1, dir));
+      readers[0] = reader1;
+      readers[1] = reader2;      
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+  }
+/*IndexWriter writer  = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+      writer.addDocument(doc1);
+      writer.addDocument(doc2);
+      writer.close();*/
+  protected void tearDown() {
+
+  }
+  
+  public void test() {
+    assertTrue(dir != null);
+    assertTrue(reader1 != null);
+    assertTrue(reader2 != null);
+    assertTrue(sis != null);
+  }    
+
+  public void testDocument() {
+    try {    
+      sis.read(dir);
+      MultiReader reader = new MultiReader(dir, readers);
+      assertTrue(reader != null);
+      Document newDoc1 = reader.document(0);
+      assertTrue(newDoc1 != null);
+      assertTrue(DocHelper.numFields(newDoc1) == DocHelper.numFields(doc1) - 2);
+      Document newDoc2 = reader.document(1);
+      assertTrue(newDoc2 != null);
+      assertTrue(DocHelper.numFields(newDoc2) == DocHelper.numFields(doc2) - 2);
+      TermFreqVector vector = reader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);
+      assertTrue(vector != null);
+    } catch (IOException e) {
+      e.printStackTrace();
+      assertTrue(false);
+    }
+  }
+  
+  public void testTermVectors() {
+    try {
+      MultiReader reader = new MultiReader(dir, readers);
+      assertTrue(reader != null);
+    } catch (IOException e) {
+      e.printStackTrace();
+      assertTrue(false);
+    }
+  }    
+}
--- a/src/test/org/apache/lucene/index/TestSegmentMerger.java
+++ b/src/test/org/apache/lucene/index/TestSegmentMerger.java
@ -0,0 +1,163 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.document.Document;
+
+import java.io.IOException;
+import java.util.Collection;
+
+public class TestSegmentMerger extends TestCase {
+  //The variables for the new merged segment
+  private Directory mergedDir = new RAMDirectory();
+  private String mergedSegment = "test";
+  //First segment to be merged
+  private Directory merge1Dir = new RAMDirectory();
+  private Document doc1 = new Document();
+  private String merge1Segment = "test-1";
+  private SegmentReader reader1 = null;
+  //Second Segment to be merged
+  private Directory merge2Dir = new RAMDirectory();
+  private Document doc2 = new Document();
+  private String merge2Segment = "test-2";
+  private SegmentReader reader2 = null;
+  
+
+  public TestSegmentMerger(String s) {
+    super(s);
+  }
+
+  protected void setUp() {
+    DocHelper.setupDoc(doc1);
+    DocHelper.writeDoc(merge1Dir, merge1Segment, doc1);
+    DocHelper.setupDoc(doc2);
+    DocHelper.writeDoc(merge2Dir, merge2Segment, doc2);
+    try {
+      reader1 = new SegmentReader(new SegmentInfo(merge1Segment, 1, merge1Dir));
+      reader2 = new SegmentReader(new SegmentInfo(merge2Segment, 1, merge2Dir));
+    } catch (IOException e) {
+      e.printStackTrace();                                                      
+    }
+
+  }
+
+  protected void tearDown() {
+
+  }
+
+  public void test() {
+    assertTrue(mergedDir != null);
+    assertTrue(merge1Dir != null);
+    assertTrue(merge2Dir != null);
+    assertTrue(reader1 != null);
+    assertTrue(reader2 != null);
+  }
+  
+  public void testMerge() {                             
+    //System.out.println("----------------TestMerge------------------");
+    SegmentMerger merger = new SegmentMerger(mergedDir, mergedSegment, false);
+    merger.add(reader1);
+    merger.add(reader2);
+    try {
+      int docsMerged = merger.merge();
+      assertTrue(docsMerged == 2);
+      //Should be able to open a new SegmentReader against the new directory
+      SegmentReader mergedReader = new SegmentReader(new SegmentInfo(mergedSegment, docsMerged, mergedDir));
+      assertTrue(mergedReader != null);
+      assertTrue(mergedReader.numDocs() == 2);
+      Document newDoc1 = mergedReader.document(0);
+      assertTrue(newDoc1 != null);
+      //There are 2 unstored fields on the document
+      assertTrue(DocHelper.numFields(newDoc1) == DocHelper.numFields(doc1) - 2);
+      Document newDoc2 = mergedReader.document(1);
+      assertTrue(newDoc2 != null);
+      assertTrue(DocHelper.numFields(newDoc2) == DocHelper.numFields(doc2) - 2);
+      
+      TermDocs termDocs = mergedReader.termDocs(new Term(DocHelper.TEXT_FIELD_2_KEY, "field"));
+      assertTrue(termDocs != null);
+      assertTrue(termDocs.next() == true);
+      
+      Collection stored = mergedReader.getIndexedFieldNames(true);
+      assertTrue(stored != null);
+      //System.out.println("stored size: " + stored.size());
+      assertTrue(stored.size() == 2);
+      
+      TermFreqVector vector = mergedReader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);
+      assertTrue(vector != null);
+      String [] terms = vector.getTerms();
+      assertTrue(terms != null);
+      //System.out.println("Terms size: " + terms.length);
+      assertTrue(terms.length == 3);
+      int [] freqs = vector.getTermFrequencies();
+      assertTrue(freqs != null);
+      //System.out.println("Freqs size: " + freqs.length);
+      
+      for (int i = 0; i < terms.length; i++) {
+        String term = terms[i];
+        int freq = freqs[i];
+        //System.out.println("Term: " + term + " Freq: " + freq);
+        assertTrue(DocHelper.FIELD_2_TEXT.indexOf(term) != -1);
+        assertTrue(DocHelper.FIELD_2_FREQS[i] == freq);
+      }                                                
+    } catch (IOException e) {
+      e.printStackTrace();
+      assertTrue(false);
+    }
+    //System.out.println("---------------------end TestMerge-------------------");
+  }    
+}
--- a/src/test/org/apache/lucene/index/TestSegmentReader.java
+++ b/src/test/org/apache/lucene/index/TestSegmentReader.java
@ -0,0 +1,250 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.Enumeration;
+
+public class TestSegmentReader extends TestCase {
+  private RAMDirectory dir = new RAMDirectory();
+  private Document testDoc = new Document();
+  private SegmentReader reader = null;
+
+  public TestSegmentReader(String s) {
+    super(s);
+  }
+  
+  //TODO: Setup the reader w/ multiple documents
+  protected void setUp() {
+
+    try {
+      DocHelper.setupDoc(testDoc);
+      DocHelper.writeDoc(dir, testDoc);
+      reader = new SegmentReader(new SegmentInfo("test", 1, dir));
+    } catch (IOException e) {
+      
+    }
+  }
+
+  protected void tearDown() {
+
+  }
+
+  public void test() {
+    assertTrue(dir != null);
+    assertTrue(reader != null);
+    assertTrue(DocHelper.nameValues.size() > 0);
+    assertTrue(DocHelper.numFields(testDoc) == 6);
+  }
+  
+  public void testDocument() {
+    try {
+      assertTrue(reader.numDocs() == 1);
+      assertTrue(reader.maxDoc() >= 1);
+      Document result = reader.document(0);
+      assertTrue(result != null);
+      //There are 2 unstored fields on the document that are not preserved across writing
+      assertTrue(DocHelper.numFields(result) == DocHelper.numFields(testDoc) - 2);
+      
+      Enumeration fields = result.fields();
+      while (fields.hasMoreElements()) {
+        Field field = (Field) fields.nextElement();
+        assertTrue(field != null);
+        assertTrue(DocHelper.nameValues.containsKey(field.name()));
+      }
+    } catch (IOException e) {
+      e.printStackTrace();
+      assertTrue(false);
+    }
+  }
+  
+  public void testDelete() {
+    Document docToDelete = new Document();
+    DocHelper.setupDoc(docToDelete);
+    DocHelper.writeDoc(dir, "seg-to-delete", docToDelete);
+    try {
+      SegmentReader deleteReader = new SegmentReader(new SegmentInfo("seg-to-delete", 1, dir));
+      assertTrue(deleteReader != null);
+      assertTrue(deleteReader.numDocs() == 1);
+      deleteReader.delete(0);
+      assertTrue(deleteReader.isDeleted(0) == true);
+      assertTrue(deleteReader.hasDeletions() == true);
+      assertTrue(deleteReader.numDocs() == 0);
+      try {
+        Document test = deleteReader.document(0);
+        assertTrue(false);
+      } catch (IllegalArgumentException e) {
+        assertTrue(true);
+      }
+    } catch (IOException e) {
+      e.printStackTrace();
+      assertTrue(false);
+    }
+  }    
+  
+  public void testGetFieldNameVariations() {
+    try {
+      Collection result = reader.getFieldNames();
+      assertTrue(result != null);
+      assertTrue(result.size() == 7);
+      for (Iterator iter = result.iterator(); iter.hasNext();) {
+        String s = (String) iter.next();
+        //System.out.println("Name: " + s);
+        assertTrue(DocHelper.nameValues.containsKey(s) == true || s.equals(""));
+      }                                                                               
+      result = reader.getFieldNames(true);
+      assertTrue(result != null);
+//      System.out.println("Size: " + result.size());
+      assertTrue(result.size() == 5);
+      for (Iterator iter = result.iterator(); iter.hasNext();) {
+        String s = (String) iter.next();
+        assertTrue(DocHelper.nameValues.containsKey(s) == true || s.equals(""));
+      }
+      
+      result = reader.getFieldNames(false);
+      assertTrue(result != null);
+      assertTrue(result.size() == 2);
+      //Get all indexed fields that are storing term vectors
+      result = reader.getIndexedFieldNames(true);
+      assertTrue(result != null);
+      assertTrue(result.size() == 2);
+      
+      result = reader.getIndexedFieldNames(false);
+      assertTrue(result != null);
+      assertTrue(result.size() == 3);
+      
+    } catch (IOException e) {
+      e.printStackTrace();
+      assertTrue(false);
+    }
+  } 
+  
+  public void testTerms() {
+    try {
+      TermEnum terms = reader.terms();
+      assertTrue(terms != null);
+      while (terms.next() == true)
+      {
+        Term term = terms.term();
+        assertTrue(term != null);
+        //System.out.println("Term: " + term);
+        String fieldValue = (String)DocHelper.nameValues.get(term.field());
+        assertTrue(fieldValue.indexOf(term.text()) != -1);
+      }
+      
+      TermDocs termDocs = reader.termDocs();
+      assertTrue(termDocs != null);
+      termDocs.seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "field"));
+      assertTrue(termDocs.next() == true);
+      
+      TermPositions positions = reader.termPositions();
+      positions.seek(new Term(DocHelper.TEXT_FIELD_1_KEY, "field"));
+      assertTrue(positions != null);
+      assertTrue(positions.doc() == 0);
+      assertTrue(positions.nextPosition() >= 0);
+      
+    } catch (IOException e) {
+      e.printStackTrace();
+      assertTrue(false);
+    }
+  }    
+  
+  public void testNorms() {
+    //TODO: Not sure how these work/should be tested
+/*
+    try {
+      byte [] norms = reader.norms(DocHelper.TEXT_FIELD_1_KEY);
+      System.out.println("Norms: " + norms);
+      assertTrue(norms != null);
+    } catch (IOException e) {
+      e.printStackTrace();
+      assertTrue(false);
+    }
+*/
+
+  }    
+  
+  public void testTermVectors() {
+    try {
+      TermFreqVector result = reader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);
+      assertTrue(result != null);
+      String [] terms = result.getTerms();
+      int [] freqs = result.getTermFrequencies();
+      assertTrue(terms != null && terms.length == 3 && freqs != null && freqs.length == 3);
+      for (int i = 0; i < terms.length; i++) {
+        String term = terms[i];
+        int freq = freqs[i];
+        assertTrue(DocHelper.FIELD_2_TEXT.indexOf(term) != -1);
+        assertTrue(freq > 0);
+      }
+  
+      TermFreqVector [] results = reader.getTermFreqVectors(0);
+      assertTrue(results != null);
+      assertTrue(results.length == 2);      
+      
+    } catch (IOException e) {
+      e.printStackTrace();
+      assertTrue(false);
+    }
+  }    
+  
+}
--- a/src/test/org/apache/lucene/index/TestSegmentTermDocs.java
+++ b/src/test/org/apache/lucene/index/TestSegmentTermDocs.java
@ -0,0 +1,137 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.search.Similarity;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+import java.io.IOException;
+
+public class TestSegmentTermDocs extends TestCase {
+  private Document testDoc = new Document();
+  private Directory dir = new RAMDirectory();
+
+  public TestSegmentTermDocs(String s) {
+    super(s);
+  }
+
+  protected void setUp() {
+    DocHelper.setupDoc(testDoc);
+    DocHelper.writeDoc(dir, testDoc);
+  }
+
+
+  protected void tearDown() {
+
+  }
+
+  public void test() {
+    assertTrue(dir != null);
+  }
+  
+  public void testTermDocs() {
+    try {
+      //After adding the document, we should be able to read it back in
+      SegmentReader reader = new SegmentReader(new SegmentInfo("test", 1, dir));
+      assertTrue(reader != null);
+      SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
+      assertTrue(segTermDocs != null);
+      segTermDocs.seek(new Term(DocHelper.TEXT_FIELD_2_KEY, "field"));
+      if (segTermDocs.next() == true)
+      {
+        int docId = segTermDocs.doc();
+        assertTrue(docId == 0);
+        int freq = segTermDocs.freq();
+        assertTrue(freq == 3);  
+      }
+      reader.close();
+    } catch (IOException e) {
+      assertTrue(false);
+    }
+  }  
+  
+  public void testBadSeek() {
+    try {
+      //After adding the document, we should be able to read it back in
+      SegmentReader reader = new SegmentReader(new SegmentInfo("test", 3, dir));
+      assertTrue(reader != null);
+      SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
+      assertTrue(segTermDocs != null);
+      segTermDocs.seek(new Term("textField2", "bad"));
+      assertTrue(segTermDocs.next() == false);
+      reader.close();
+    } catch (IOException e) {
+      assertTrue(false);
+    }
+    try {
+      //After adding the document, we should be able to read it back in
+      SegmentReader reader = new SegmentReader(new SegmentInfo("test", 3, dir));
+      assertTrue(reader != null);
+      SegmentTermDocs segTermDocs = new SegmentTermDocs(reader);
+      assertTrue(segTermDocs != null);
+      segTermDocs.seek(new Term("junk", "bad"));
+      assertTrue(segTermDocs.next() == false);
+      reader.close();
+    } catch (IOException e) {
+      assertTrue(false);
+    }
+  }    
+}
--- a/src/test/org/apache/lucene/index/TestTermVectorsReader.java
+++ b/src/test/org/apache/lucene/index/TestTermVectorsReader.java
@ -0,0 +1,106 @@
+package org.apache.lucene.index;
+
+
+import junit.framework.TestCase;
+import org.apache.lucene.store.RAMDirectory;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+public class TestTermVectorsReader extends TestCase {
+  private TermVectorsWriter writer = null;
+  //Must be lexicographically sorted, will do in setup, versus trying to maintain here
+  private String [] testFields = {"f1", "f2", "f3"};
+  private String [] testTerms = {"this", "is", "a", "test"};
+  private RAMDirectory dir = new RAMDirectory();
+  private String seg = "testSegment";
+  private FieldInfos fieldInfos = new FieldInfos();
+
+  public TestTermVectorsReader(String s) {
+    super(s);
+  }
+
+  protected void setUp() {
+    for (int i = 0; i < testFields.length; i++) {
+      fieldInfos.add(testFields[i], true, true);
+    }
+    
+    try {
+      Arrays.sort(testTerms);
+      for (int j = 0; j < 5; j++) {
+        writer = new TermVectorsWriter(dir, seg, fieldInfos);
+        writer.openDocument();
+
+        for (int k = 0; k < testFields.length; k++) {
+          writer.openField(testFields[k]);
+          for (int i = 0; i < testTerms.length; i++) {
+            writer.addTerm(testTerms[i], i);      
+          }
+          writer.closeField();
+        }
+        writer.closeDocument();
+        writer.close();
+      }
+
+    } catch (IOException e) {
+      e.printStackTrace();
+      assertTrue(false);
+    }    
+  }
+
+  protected void tearDown() {
+
+  }
+
+  public void test() {
+      //Check to see the files were created properly in setup
+      assertTrue(writer.isDocumentOpen() == false);          
+      assertTrue(dir.fileExists(seg + TermVectorsWriter.TVD_EXTENSION));
+      assertTrue(dir.fileExists(seg + TermVectorsWriter.TVX_EXTENSION));
+  }
+  
+  public void testReader() {
+    try {
+      TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
+      assertTrue(reader != null);
+      TermFreqVector vector = reader.get(0, testFields[0]);
+      assertTrue(vector != null);
+      String [] terms = vector.getTerms();
+      assertTrue(terms != null);
+      assertTrue(terms.length == testTerms.length);
+      for (int i = 0; i < terms.length; i++) {
+        String term = terms[i];
+        //System.out.println("Term: " + term);
+        assertTrue(term.equals(testTerms[i]));
+      }
+      
+    } catch (IOException e) {
+      e.printStackTrace();
+      assertTrue(false);
+    }
+  }  
+
+  /**
+   * Make sure exceptions and bad params are handled appropriately
+   */ 
+  public void testBadParams() {
+    try {
+      TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
+      assertTrue(reader != null);
+      //Bad document number, good field number
+      TermFreqVector vector = reader.get(50, testFields[0]);
+      assertTrue(vector == null);      
+    } catch (Exception e) {
+      assertTrue(false);
+    }
+    try {
+      TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
+      assertTrue(reader != null);
+      //good document number, bad field number
+      TermFreqVector vector = reader.get(0, "f50");
+      assertTrue(vector == null);      
+    } catch (Exception e) {
+      assertTrue(false);
+    }
+  }    
+}
--- a/src/test/org/apache/lucene/index/TestTermVectorsWriter.java
+++ b/src/test/org/apache/lucene/index/TestTermVectorsWriter.java
@ -0,0 +1,240 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.store.RAMDirectory;
+
+import java.io.IOException;
+
+public class TestTermVectorsWriter extends TestCase {
+
+  private String[] testTerms = {"this", "is", "a", "test"};
+  private String [] testFields = {"f1", "f2", "f3"};
+  private int[][] positions = new int[testTerms.length][];
+  private RAMDirectory dir = new RAMDirectory();
+  private String seg = "testSegment";
+  private FieldInfos fieldInfos = new FieldInfos();
+
+  public TestTermVectorsWriter(String s) {
+    super(s);
+  }
+
+  protected void setUp() {
+
+    for (int i = 0; i < testFields.length; i++) {
+      fieldInfos.add(testFields[i], true, true);
+    }
+    
+
+    for (int i = 0; i < testTerms.length; i++) {
+      positions[i] = new int[5];
+      for (int j = 0; j < positions[i].length; j++) {
+        positions[i][j] = i * 100;
+      }
+    }
+  }
+
+  protected void tearDown() {
+  }
+
+  public void test() {
+    assertTrue(dir != null);
+    assertTrue(positions != null);
+  }
+  
+  /*public void testWriteNoPositions() {
+    try {
+      TermVectorsWriter writer = new TermVectorsWriter(dir, seg, 50);
+      writer.openDocument();
+      assertTrue(writer.isDocumentOpen() == true);
+      writer.openField(0);
+      assertTrue(writer.isFieldOpen() == true);
+      for (int i = 0; i < testTerms.length; i++) {
+        writer.addTerm(testTerms[i], i);
+      }
+      writer.closeField();
+      
+      writer.closeDocument();
+      writer.close();
+      assertTrue(writer.isDocumentOpen() == false);
+      //Check to see the files were created
+      assertTrue(dir.fileExists(seg + TermVectorsWriter.TVD_EXTENSION));
+      assertTrue(dir.fileExists(seg + TermVectorsWriter.TVX_EXTENSION));
+      //Now read it back in
+      TermVectorsReader reader = new TermVectorsReader(dir, seg);
+      assertTrue(reader != null);
+      checkTermVector(reader, 0, 0);
+    } catch (IOException e) {
+      e.printStackTrace();
+      assertTrue(false);
+    }
+  }  */  
+
+  public void testWriter() {
+    try {
+      TermVectorsWriter writer = new TermVectorsWriter(dir, seg, fieldInfos);
+      writer.openDocument();
+      assertTrue(writer.isDocumentOpen() == true);
+      writeField(writer, testFields[0]);
+      writer.closeDocument();
+      writer.close();
+      assertTrue(writer.isDocumentOpen() == false);
+      //Check to see the files were created
+      assertTrue(dir.fileExists(seg + TermVectorsWriter.TVD_EXTENSION));
+      assertTrue(dir.fileExists(seg + TermVectorsWriter.TVX_EXTENSION));
+      //Now read it back in
+      TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
+      assertTrue(reader != null);
+      checkTermVector(reader, 0, testFields[0]);
+    } catch (IOException e) {
+      e.printStackTrace();
+      assertTrue(false);
+    }
+  }
+  private void checkTermVector(TermVectorsReader reader, int docNum, String field) throws IOException {
+    TermFreqVector vector = reader.get(docNum, field);
+    assertTrue(vector != null);
+    String[] terms = vector.getTerms();
+    assertTrue(terms != null);
+    assertTrue(terms.length == testTerms.length);
+    for (int i = 0; i < terms.length; i++) {
+      String term = terms[i];
+      assertTrue(term.equals(testTerms[i]));
+    }
+  }
+
+  /**
+   * Test one document, multiple fields
+   */
+  public void testMultipleFields() {
+    try {
+      TermVectorsWriter writer = new TermVectorsWriter(dir, seg, fieldInfos);
+      writeDocument(writer, testFields.length);
+
+      writer.close();
+
+      assertTrue(writer.isDocumentOpen() == false);
+      //Check to see the files were created
+      assertTrue(dir.fileExists(seg + TermVectorsWriter.TVD_EXTENSION));
+      assertTrue(dir.fileExists(seg + TermVectorsWriter.TVX_EXTENSION));
+      //Now read it back in
+      TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
+      assertTrue(reader != null);
+
+      for (int j = 0; j < testFields.length; j++) {
+        checkTermVector(reader, 0, testFields[j]);
+      }
+    } catch (IOException e) {
+      e.printStackTrace();
+      assertTrue(false);
+    }
+  }
+
+  private void writeDocument(TermVectorsWriter writer, int numFields) throws IOException {
+    writer.openDocument();
+    assertTrue(writer.isDocumentOpen() == true);
+
+    for (int j = 0; j < numFields; j++) {
+      writeField(writer, testFields[j]);
+    }
+    writer.closeDocument();
+    assertTrue(writer.isDocumentOpen() == false);
+  }
+
+  /**
+   * 
+   * @param writer The writer to write to
+   * @param j The field number
+   * @throws IOException
+   */
+  private void writeField(TermVectorsWriter writer, String f) throws IOException {
+    writer.openField(f);
+    assertTrue(writer.isFieldOpen() == true);
+    for (int i = 0; i < testTerms.length; i++) {
+      writer.addTerm(testTerms[i], i);
+    }
+    writer.closeField();
+  }
+
+
+  public void testMultipleDocuments() {
+
+    try {
+      TermVectorsWriter writer = new TermVectorsWriter(dir, seg, fieldInfos);
+      assertTrue(writer != null);
+      for (int i = 0; i < 10; i++) {
+        writeDocument(writer, testFields.length);
+      }
+      writer.close();
+    } catch (IOException e) {
+      e.printStackTrace();
+      assertTrue(false);
+    }      
+    //Do some arbitrary tests
+    try {
+      TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
+      for (int i = 0; i < 10; i++) {        
+        assertTrue(reader != null);
+        checkTermVector(reader, 5, testFields[0]);
+        checkTermVector(reader, 2, testFields[2]);
+      }
+    } catch (IOException e) {
+      e.printStackTrace();
+      assertTrue(false);
+    }
+  }
+
+}
--- a/src/test/org/apache/lucene/search/TestBasics.java
+++ b/src/test/org/apache/lucene/search/TestBasics.java
@ -103,7 +103,7 @@ public class TestBasics extends TestCase {

    searcher = new IndexSearcher(directory);
  }
-
+  
  public void testTerm() throws Exception {
    Query query = new TermQuery(new Term("field", "seventy"));
    checkHits(query, new int[]
--- a/src/test/org/apache/lucene/search/TestQueryTermVector.java
+++ b/src/test/org/apache/lucene/search/TestQueryTermVector.java
@ -0,0 +1,104 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+
+public class TestQueryTermVector extends TestCase {
+
+
+  public TestQueryTermVector(String s) {
+    super(s);
+  }
+
+  protected void setUp() {
+  }
+
+  protected void tearDown() {
+
+  }
+
+  public void testConstructor() {
+    String [] queryTerm = {"foo", "bar", "foo", "again", "foo", "bar", "go", "go", "go"};
+    //Items are sorted lexicographically
+    String [] gold = {"again", "bar", "foo", "go"};
+    int [] goldFreqs = {1, 2, 3, 3};
+    QueryTermVector result = new QueryTermVector(queryTerm);
+    assertTrue(result != null);
+    String [] terms = result.getTerms();
+    assertTrue(terms.length == 4);
+    int [] freq = result.getTermFrequencies();
+    assertTrue(freq.length == 4);
+    checkGold(terms, gold, freq, goldFreqs);
+    result = new QueryTermVector(null);
+    assertTrue(result.getTerms().length == 0);
+    
+    result = new QueryTermVector("foo bar foo again foo bar go go go", new WhitespaceAnalyzer());
+    assertTrue(result != null);
+    terms = result.getTerms();
+    assertTrue(terms.length == 4);
+    freq = result.getTermFrequencies();
+    assertTrue(freq.length == 4);
+    checkGold(terms, gold, freq, goldFreqs);
+  }
+
+  private void checkGold(String[] terms, String[] gold, int[] freq, int[] goldFreqs) {
+    for (int i = 0; i < terms.length; i++) {
+      assertTrue(terms[i].equals(gold[i]));
+      assertTrue(freq[i] == goldFreqs[i]);
+    }
+  }
+}
--- a/src/test/org/apache/lucene/search/TestTermVectors.java
+++ b/src/test/org/apache/lucene/search/TestTermVectors.java
@ -0,0 +1,261 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.*;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.English;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+public class TestTermVectors extends TestCase {
+  private IndexSearcher searcher;
+  private RAMDirectory directory = new RAMDirectory();
+  public TestTermVectors(String s) {
+    super(s);
+  }
+
+  public void setUp() throws Exception {                  
+    IndexWriter writer
+            = new IndexWriter(directory, new SimpleAnalyzer(), true);
+    //writer.setUseCompoundFile(true);
+    //writer.infoStream = System.out;
+    StringBuffer buffer = new StringBuffer();
+    for (int i = 0; i < 1000; i++) {
+      Document doc = new Document();
+      doc.add(Field.Text("field", English.intToEnglish(i), true));
+      writer.addDocument(doc);
+    }
+    writer.close();
+    searcher = new IndexSearcher(directory);
+  }
+
+  protected void tearDown() {
+
+  }
+
+  public void test() {
+    assertTrue(searcher != null);
+  }
+
+  public void testTermVectors() {
+    Query query = new TermQuery(new Term("field", "seventy"));
+    try {
+      Hits hits = searcher.search(query);
+      assertEquals(100, hits.length());
+      
+      for (int i = 0; i < hits.length(); i++)
+      {
+        TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i));
+        assertTrue(vector != null);
+        assertTrue(vector.length == 1);
+        //assertTrue();
+      }
+      TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(50));
+      //System.out.println("Explain: " + searcher.explain(query, hits.id(50)));
+      //System.out.println("Vector: " + vector[0].toString());
+    } catch (IOException e) {
+      assertTrue(false);
+    }
+  }
+  
+  public void testTermPositionVectors() {
+    Query query = new TermQuery(new Term("field", "fifty"));
+    try {
+      Hits hits = searcher.search(query);
+      assertEquals(100, hits.length());
+      
+      for (int i = 0; i < hits.length(); i++)
+      {
+        TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i));
+        assertTrue(vector != null);
+        assertTrue(vector.length == 1);
+        //assertTrue();
+      }
+    } catch (IOException e) {
+      assertTrue(false);
+    }
+  }
+  
+  public void testKnownSetOfDocuments() {
+    String [] termArray = {"eating", "chocolate", "in", "a", "computer", "lab", "grows", "old", "colored",
+                      "with", "an"};
+    String test1 = "eating chocolate in a computer lab"; //6 terms
+    String test2 = "computer in a computer lab"; //5 terms
+    String test3 = "a chocolate lab grows old"; //5 terms
+    String test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms
+    Map test4Map = new HashMap();
+    test4Map.put("chocolate", new Integer(3));
+    test4Map.put("lab", new Integer(2));
+    test4Map.put("eating", new Integer(1));
+    test4Map.put("computer", new Integer(1));
+    test4Map.put("with", new Integer(1));
+    test4Map.put("a", new Integer(1));
+    test4Map.put("colored", new Integer(1));
+    test4Map.put("in", new Integer(1));
+    test4Map.put("an", new Integer(1));
+    test4Map.put("computer", new Integer(1));
+    test4Map.put("old", new Integer(1));
+    
+    Document testDoc1 = new Document();
+    setupDoc(testDoc1, test1);
+    Document testDoc2 = new Document();
+    setupDoc(testDoc2, test2);
+    Document testDoc3 = new Document();
+    setupDoc(testDoc3, test3);
+    Document testDoc4 = new Document();
+    setupDoc(testDoc4, test4);
+        
+    Directory dir = new RAMDirectory();
+    
+    try {
+      IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true);
+      assertTrue(writer != null);
+      writer.addDocument(testDoc1);
+      writer.addDocument(testDoc2);
+      writer.addDocument(testDoc3);
+      writer.addDocument(testDoc4);
+      writer.close();
+      IndexSearcher knownSearcher = new IndexSearcher(dir);
+      TermEnum termEnum = knownSearcher.reader.terms();
+      TermDocs termDocs = knownSearcher.reader.termDocs();
+      //System.out.println("Terms: " + termEnum.size() + " Orig Len: " + termArray.length);
+      
+      Similarity sim = knownSearcher.getSimilarity();
+      while (termEnum.next() == true)
+      {
+        Term term = termEnum.term();
+        //System.out.println("Term: " + term);
+        termDocs.seek(term);
+        while (termDocs.next())
+        {
+          int docId = termDocs.doc();
+          int freq = termDocs.freq();
+          //System.out.println("Doc Id: " + docId + " freq " + freq);
+          TermFreqVector vector = knownSearcher.reader.getTermFreqVector(docId, "field");
+          float tf = sim.tf(freq);
+          float idf = sim.idf(term, knownSearcher);
+          //float qNorm = sim.queryNorm()
+          //This is fine since we don't have stop words
+          float lNorm = sim.lengthNorm("field", vector.getTerms().length);
+          //float coord = sim.coord()
+          //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm);
+          assertTrue(vector != null);
+          String[] vTerms = vector.getTerms();
+          int [] freqs = vector.getTermFrequencies();
+          for (int i = 0; i < vTerms.length; i++)
+          {
+            if (term.text().equals(vTerms[i]) == true)
+            {
+              assertTrue(freqs[i] == freq);
+            }
+          }
+          
+        }
+        //System.out.println("--------");
+      }
+      Query query = new TermQuery(new Term("field", "chocolate"));
+      Hits hits = knownSearcher.search(query);
+      //doc 3 should be the first hit b/c it is the shortest match
+      assertTrue(hits.length() == 3);
+      float score = hits.score(0);
+      /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString());
+      System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0)));
+      System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString());
+      System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1)));
+      System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " +  hits.doc(2).toString());
+      System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/
+      assertTrue(testDoc3.toString().equals(hits.doc(0).toString()));
+      assertTrue(testDoc4.toString().equals(hits.doc(1).toString()));
+      assertTrue(testDoc1.toString().equals(hits.doc(2).toString()));
+      TermFreqVector vector = knownSearcher.reader.getTermFreqVector(hits.id(1), "field");
+      assertTrue(vector != null);
+      //System.out.println("Vector: " + vector);
+      String[] terms = vector.getTerms();
+      int [] freqs = vector.getTermFrequencies();
+      assertTrue(terms != null && terms.length == 10);
+      for (int i = 0; i < terms.length; i++) {
+        String term = terms[i];
+        //System.out.println("Term: " + term);
+        int freq = freqs[i];
+        assertTrue(test4.indexOf(term) != -1);
+        Integer freqInt = (Integer)test4Map.get(term);
+        assertTrue(freqInt != null);
+        assertTrue(freqInt.intValue() == freq);        
+      } 
+      knownSearcher.close();
+    } catch (IOException e) {
+      e.printStackTrace();
+      assertTrue(false);
+    }
+
+
+  } 
+  
+  private void setupDoc(Document doc, String text)
+  {
+    doc.add(Field.Text("field", text, true));
+    //System.out.println("Document: " + doc);
+  }
+  
+  
+}
--- a/src/test/org/apache/lucene/util/StringHelperTest.java
+++ b/src/test/org/apache/lucene/util/StringHelperTest.java
@ -0,0 +1,88 @@
+package org.apache.lucene.util;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import junit.framework.TestCase;
+
+public class StringHelperTest extends TestCase {
+
+
+  public StringHelperTest(String s) {
+    super(s);
+  }
+
+  protected void setUp() {
+  }
+
+  protected void tearDown() {
+
+  }
+
+  public void testStringDifference() {
+    String test1 = "test";
+    String test2 = "testing";
+    
+    int result = StringHelper.stringDifference(test1, test2);
+    assertTrue(result == 4);
+    
+    test2 = "foo";
+    result = StringHelper.stringDifference(test1, test2);
+    assertTrue(result == 0);
+    
+    test2 = "test";
+    result = StringHelper.stringDifference(test1, test2);
+    assertTrue(result == 4);
+  }
+}
--- a/xdocs/fileformats.xml
+++ b/xdocs/fileformats.xml
@ -14,7 +14,7 @@

            <p>
                This document defines the index file formats used
-                in Lucene version 1.3.
+                in Lucene version 1.4.
            </p>

            <p>
@ -224,7 +224,11 @@
                        multiplied into the score for hits on that field.
                    </p>
                </li>
-
+                <li><p>Term Vectors.  For each field in each document, the term vector
+                       (sometimes called document vector) is stored.  A term vector consists
+                       of the term text, term frequency and term position.
+                    </p>
+                </li>              
                <li><p>Deleted documents.
                        An optional file indicating which documents are deleted.
                    </p>
@ -804,9 +808,10 @@
                </p>

                <p>
-                    Currently only the low-order bit is used of FieldBits is used.  It is
-                    one for
-                    indexed fields, and zero for non-indexed fields.
+                    The low-order bit is one for
+                    indexed fields, and zero for non-indexed fields.  The second lowest-order
+                    bit is one for fields that have term vectors stored, and zero for fields
+                    without term vectors.
                </p>

                <p>
@ -1113,6 +1118,52 @@
                </ol>

            </subsection>
+            <subsection name="Term Vectors">
+              Term Vector support is an optional on a field by field basis.  It consists of 4
+              files.
+              <ol>
+                <li>
+                  <p>The Document Index or .tvx file.</p>
+                  <p>This contains, for each document, a pointer to the document data in the Document 
+                    (.tvd) file.
+                  </p>
+                  <p>DocumentIndex (.tvx) --&gt; FormatVersion&lt;DocumentPosition&gt;<sup>NumDocs</sup></p>
+                  <p>FormatVersion --&gt; Int</p>
+                  <p>DocumentPosition   --&gt; UInt64</p>
+                  <p>This is used to find the position of the Document in the .tvd file.</p>
+                </li>
+                <li>
+                  <p>The Document or .tvd file.</p>
+                  <p>This contains, for each document, the number of fields, a list of the fields with
+                  term vector info and finally a list of pointers to the field information in the .tvf 
+                  (Term Vector Fields) file.</p>
+                  <p>
+                    Document (.tvd) --&gt; FormatVersion&lt;NumFields, FieldNums, FieldPositions,&gt;<sup>NumDocs</sup>
+                  </p>
+                  <p>FormatVersion --&gt; Int</p>
+                  <p>NumFields --&gt; VInt</p>
+                  <p>FieldNums --&gt; &lt;FieldNumDelta&gt;<sup>NumFields</sup></p>
+                  <p>FieldNumDelta --&gt; VInt</p>
+                  <p>FieldPositions --&gt; &lt;FieldPosition&gt;<sup>NumFields</sup></p>
+                  <p>FieldPosition --&gt; VLong</p>
+                  <p>The .tvd file is used to map out the fields that have term vectors stored and
+                  where the field information is in the .tvf file.</p>
+                </li>
+                <li>
+                  <p>The Field or .tvf file.</p>
+                  <p>This file contains, for each field that has a term vector stored, a list of
+                  the terms and their frequencies.</p>
+                  <p>Field (.tvf) --&gt; FormatVersion&lt;NumTerms, NumDistinct, TermFreqs&gt;<sup>NumFields</sup></p>
+                  <p>FormatVersion --&gt; Int</p>
+                  <p>NumTerms --&gt; VInt</p>
+                  <p>NumDistinct --&gt; VInt -- Future Use</p>
+                  <p>TermFreqs --&gt; &lt;TermText, TermFreq&gt;<sup>NumTerms</sup></p>
+                  <p>TermText --&gt; String</p>
+                  <p>TermFreq --&gt; VInt</p>
+                  <p></p>
+                </li>
+              </ol>
+            </subsection>

            <subsection name="Deleted Documents">