Grant's nw termvector patch (Bug #18927) applied with

some modifications. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150566 13f79535-47bb-0310-9956-ffa450edef68
2004-10-05 17:30:48 +00:00 · 2004-10-05 17:30:48 +00:00 · f1667be0fc
parent c6d6a390df
commit f1667be0fc
22 changed files with 1046 additions and 296 deletions
--- a/src/java/org/apache/lucene/document/Field.java
+++ b/src/java/org/apache/lucene/document/Field.java
@ -38,6 +38,8 @@ public final class Field implements java.io.Serializable {
  private Object fieldsData = null;
  
  private boolean storeTermVector = false;
+  private boolean storeOffsetWithTermVector = false; 
+  private boolean storePositionWithTermVector = false;
  private boolean isStored = false;
  private boolean isIndexed = true;
  private boolean isTokenized = true;
@ -55,16 +57,19 @@ public final class Field implements java.io.Serializable {
    public String toString() {
      return name;
    }
+    
    /** Store the original field value in the index in a compressed form. This is
     * useful for long documents and for binary valued fields.
     */
    public static final Store COMPRESS = new Store("COMPRESS");
+    
    /** Store the original field value in the index. This is useful for short texts
     * like a document's title which should be displayed with the results. The
     * value is stored in its original form, i.e. no analyzer is used before it is
     * stored. 
     */
    public static final Store YES = new Store("YES");
+    
    /** Do not store the field value in the index. */
    public static final Store NO = new Store("NO");
  }
@ -100,15 +105,41 @@ public final class Field implements java.io.Serializable {
    private TermVector(String name) {
      this.name = name;
    }
+
    public String toString() {
      return name;
    }
+    
    /** Do not store term vectors. 
     */
    public static final TermVector NO = new TermVector("NO");
+    
    /** Store the term vectors of each document. A term vector is a list
     * of the document's terms and their number of occurences in that document. */
    public static final TermVector YES = new TermVector("YES");
+    
+    /**
+     * Store the term vector + token position information
+     * 
+     * @see #YES
+     */ 
+    public static final TermVector WITH_POSITIONS = new TermVector("WITH_POSITIONS");
+    
+    /**
+     * Store the term vector + Token offset information
+     * 
+     * @see #YES
+     */ 
+    public static final TermVector WITH_OFFSETS = new TermVector("WITH_OFFSETS");
+    
+    /**
+     * Store the term vector + Token position and offset information
+     * 
+     * @see #YES
+     * @see #WITH_POSITIONS
+     * @see #WITH_OFFSETS
+     */ 
+    public static final TermVector WITH_POSITIONS_OFFSETS = new TermVector("WITH_POSITIONS_OFFSETS");
  }
  
  /** Sets the boost factor hits on this field.  This value will be
@ -290,14 +321,18 @@ public final class Field implements java.io.Serializable {
    this.name = name.intern();        // field names are interned
    this.fieldsData = value;

-    if (store == Store.YES)
+    if (store == Store.YES){
      this.isStored = true;
+      this.isCompressed = false;
+    }
    else if (store == Store.COMPRESS) {
      this.isStored = true;
      this.isCompressed = true;
    }
-    else if (store == Store.NO)
+    else if (store == Store.NO){
      this.isStored = false;
+      this.isCompressed = false;
+    }
    else
      throw new IllegalArgumentException("unknown store parameter " + store);
   
@ -314,6 +349,8 @@ public final class Field implements java.io.Serializable {
      throw new IllegalArgumentException("unknown index parameter " + index);
    }
    
+    this.isBinary = false;
+
    setStoreTermVector(termVector);
  }

@ -343,11 +380,18 @@ public final class Field implements java.io.Serializable {
      throw new NullPointerException("name cannot be null");
    if (reader == null)
      throw new NullPointerException("reader cannot be null");
+    
    this.name = name.intern();        // field names are interned
    this.fieldsData = reader;
+    
    this.isStored = false;
+    this.isCompressed = false;
+    
    this.isIndexed = true;
    this.isTokenized = true;
+    
+    this.isBinary = false;
+    
    setStoreTermVector(termVector);
  }

@ -374,21 +418,29 @@ public final class Field implements java.io.Serializable {
      throw new IllegalArgumentException("name cannot be null");
    if (value == null)
      throw new IllegalArgumentException("value cannot be null");
-    if (store == Store.NO)
-      throw new IllegalArgumentException("binary values can't be unstored");
-    if (store == Store.COMPRESS)
-      this.isCompressed = true;
    
    this.name = name.intern();
-    //wrap the byte[] to a ByteBuffer object
    this.fieldsData = value;
    
-    this.isBinary    = true;
-    this.isStored    = true;
+    if (store == Store.YES){
+      this.isStored = true;
+      this.isCompressed = false;
+    }
+    else if (store == Store.COMPRESS) {
+      this.isStored = true;
+      this.isCompressed = true;
+    }
+    else if (store == Store.NO)
+      throw new IllegalArgumentException("binary values can't be unstored");
+    else
+      throw new IllegalArgumentException("unknown store parameter " + store);
    
    this.isIndexed   = false;
    this.isTokenized = false;
-    this.storeTermVector = false;
+    
+    this.isBinary    = true;
+    
+    setStoreTermVector(TermVector.NO);
  }
  
  /**
@ -422,9 +474,30 @@ public final class Field implements java.io.Serializable {
  private void setStoreTermVector(TermVector termVector) {
    if (termVector == TermVector.NO) {
      this.storeTermVector = false;
-    } else if (termVector == TermVector.YES) {
+      this.storePositionWithTermVector = false;
+      this.storeOffsetWithTermVector = false;
+    } 
+    else if (termVector == TermVector.YES) {
      this.storeTermVector = true;
-    } else {
+      this.storePositionWithTermVector = false;
+      this.storeOffsetWithTermVector = false;
+    }
+    else if (termVector == TermVector.WITH_POSITIONS) {
+      this.storeTermVector = true;
+      this.storePositionWithTermVector = true;
+      this.storeOffsetWithTermVector = false;
+    } 
+    else if (termVector == TermVector.WITH_OFFSETS) {
+      this.storeTermVector = true;
+      this.storePositionWithTermVector = false;
+      this.storeOffsetWithTermVector = true;
+    } 
+    else if (termVector == TermVector.WITH_POSITIONS_OFFSETS) {
+      this.storeTermVector = true;
+      this.storePositionWithTermVector = true;
+      this.storeOffsetWithTermVector = true;
+    } 
+    else {
      throw new IllegalArgumentException("unknown termVector parameter " + termVector);
    }
  }
@ -456,6 +529,23 @@ public final class Field implements java.io.Serializable {
   */
  public final boolean isTermVectorStored() { return storeTermVector; }
  
+  /**
+   * True iff terms are stored as term vector together with their offsets 
+   * (start and end positon in source text).
+   * @return
+   */
+  public boolean isStoreOffsetWithTermVector(){ 
+    return storeOffsetWithTermVector; 
+  } 
+  
+  /**
+   * True iff terms are stored as term vector together with their token positions.
+   * @return
+   */
+  public boolean isStorePositionWithTermVector(){ 
+    return storePositionWithTermVector; 
+  }
+      
  /** True iff the value of the filed is stored as binary */
  public final boolean  isBinary()      { return isBinary; }
  
@ -479,6 +569,16 @@ public final class Field implements java.io.Serializable {
        result.append(",");
      result.append("termVector");
    }
+    if (storeOffsetWithTermVector) { 
+      if (result.length() > 0) 
+        result.append(","); 
+      result.append("termVectorOffsets"); 
+    } 
+    if (storePositionWithTermVector) { 
+      if (result.length() > 0) 
+        result.append(","); 
+      result.append("termVectorPosition"); 
+    } 
    if (isBinary) {
      if (result.length() > 0)
        result.append(",");
--- a/src/java/org/apache/lucene/index/DocumentWriter.java
+++ b/src/java/org/apache/lucene/index/DocumentWriter.java
@ -74,6 +74,7 @@ final class DocumentWriter {
    postingTable.clear();			  // clear postingTable
    fieldLengths = new int[fieldInfos.size()];    // init fieldLengths
    fieldPositions = new int[fieldInfos.size()];  // init fieldPositions
+    fieldOffsets = new int[fieldInfos.size()];    // init fieldOffsets

    fieldBoosts = new float[fieldInfos.size()];	  // init fieldBoosts
    Arrays.fill(fieldBoosts, doc.getBoost());
@ -100,7 +101,7 @@ final class DocumentWriter {
    writePostings(postings, segment);

    // write norms of indexed fields
-    writeNorms(doc, segment);
+    writeNorms(segment);

  }

@ -109,6 +110,7 @@ final class DocumentWriter {
  private final Hashtable postingTable = new Hashtable();
  private int[] fieldLengths;
  private int[] fieldPositions;
+  private int[] fieldOffsets;
  private float[] fieldBoosts;

  // Tokenizes the fields of a document into Postings.
@ -122,12 +124,19 @@ final class DocumentWriter {

      int length = fieldLengths[fieldNumber];     // length of field
      int position = fieldPositions[fieldNumber]; // position in field
+      int offset = fieldOffsets[fieldNumber];       // offset field

      if (field.isIndexed()) {
        if (!field.isTokenized()) {		  // un-tokenized field
-          addPosition(fieldName, field.stringValue(), position++);
+          String stringValue = field.stringValue();
+          if(field.isStoreOffsetWithTermVector())
+            addPosition(fieldName, stringValue, position++, new TermVectorOffsetInfo(offset, offset + stringValue.length()));
+          else
+            addPosition(fieldName, stringValue, position++, null);
+          offset += stringValue.length();
          length++;
-        } else {
+        } else 
+        {
          Reader reader;			  // find or make Reader
          if (field.readerValue() != null)
            reader = field.readerValue();
@ -140,11 +149,23 @@ final class DocumentWriter {
          // Tokenize field and add to postingTable
          TokenStream stream = analyzer.tokenStream(fieldName, reader);
          try {
+            Token lastToken = null;
            for (Token t = stream.next(); t != null; t = stream.next()) {
              position += (t.getPositionIncrement() - 1);
-              addPosition(fieldName, t.termText(), position++);
-              if (++length > maxFieldLength) break;
+              
+              if(field.isStoreOffsetWithTermVector())
+                addPosition(fieldName, t.termText(), position++, new TermVectorOffsetInfo(offset + t.startOffset(), offset + t.endOffset()));
+              else
+                addPosition(fieldName, t.termText(), position++, null);
+              
+              lastToken = t;
+              if (++length > maxFieldLength) 
+                break;
            }
+            
+            if(lastToken != null)
+              offset += lastToken.endOffset() + 1;
+            
          } finally {
            stream.close();
          }
@ -153,14 +174,16 @@ final class DocumentWriter {
        fieldLengths[fieldNumber] = length;	  // save field length
        fieldPositions[fieldNumber] = position;	  // save field position
        fieldBoosts[fieldNumber] *= field.getBoost();
+        fieldOffsets[fieldNumber] = offset;
      }
    }
  }

  private final Term termBuffer = new Term("", ""); // avoid consing

-  private final void addPosition(String field, String text, int position) {
+  private final void addPosition(String field, String text, int position, TermVectorOffsetInfo offset) {
    termBuffer.set(field, text);
+    //System.out.println("Offset: " + offset);
    Posting ti = (Posting) postingTable.get(termBuffer);
    if (ti != null) {				  // word seen before
      int freq = ti.freq;
@ -172,10 +195,23 @@ final class DocumentWriter {
        ti.positions = newPositions;
      }
      ti.positions[freq] = position;		  // add new position
+
+      if (offset != null) {
+        if (ti.offsets.length == freq){
+          TermVectorOffsetInfo [] newOffsets = new TermVectorOffsetInfo[freq*2];
+          TermVectorOffsetInfo [] offsets = ti.offsets;
+          for (int i = 0; i < freq; i++)
+          {
+            newOffsets[i] = offsets[i];
+          }
+          ti.offsets = newOffsets;
+        }
+        ti.offsets[freq] = offset;
+      }
      ti.freq = freq + 1;			  // update frequency
    } else {					  // word not seen before
      Term term = new Term(field, text, false);
-      postingTable.put(term, new Posting(term, position));
+      postingTable.put(term, new Posting(term, position, offset));
    }
  }

@ -294,12 +330,13 @@ final class DocumentWriter {
              termVectorWriter.openDocument();
            }
            termVectorWriter.openField(currentField);
+
          } else if (termVectorWriter != null) {
            termVectorWriter.closeField();
          }
        }
        if (termVectorWriter != null && termVectorWriter.isFieldOpen()) {
-          termVectorWriter.addTerm(posting.term.text(), postingFreq);
+            termVectorWriter.addTerm(posting.term.text(), postingFreq, posting.positions, posting.offsets);
        }
      }
      if (termVectorWriter != null)
@ -316,7 +353,7 @@ final class DocumentWriter {
    }
  }

-  private final void writeNorms(Document doc, String segment) throws IOException { 
+  private final void writeNorms(String segment) throws IOException { 
    for(int n = 0; n < fieldInfos.size(); n++){
      FieldInfo fi = fieldInfos.fieldInfo(n);
      if(fi.isIndexed){
@ -336,11 +373,18 @@ final class Posting {				  // info about a Term in a doc
  Term term;					  // the Term
  int freq;					  // its frequency in doc
  int[] positions;				  // positions it occurs at
+  TermVectorOffsetInfo [] offsets;

-  Posting(Term t, int position) {
+  Posting(Term t, int position, TermVectorOffsetInfo offset) {
    term = t;
    freq = 1;
    positions = new int[1];
    positions[0] = position;
+    if(offset != null){
+    offsets = new TermVectorOffsetInfo[1];
+    offsets[0] = offset;
+    }
+    else
+      offsets = null;
  }
 }
--- a/src/java/org/apache/lucene/index/FieldInfo.java
+++ b/src/java/org/apache/lucene/index/FieldInfo.java
@ -23,11 +23,16 @@ final class FieldInfo {

  // true if term vector for this field should be stored
  boolean storeTermVector;
+  boolean storeOffsetWithTermVector;
+  boolean storePositionWithTermVector;

-  FieldInfo(String na, boolean tk, int nu, boolean storeTermVector) {
+  FieldInfo(String na, boolean tk, int nu, boolean storeTermVector, 
+            boolean storePositionWithTermVector,  boolean storeOffsetWithTermVector) {
    name = na;
    isIndexed = tk;
    number = nu;
    this.storeTermVector = storeTermVector;
+    this.storeOffsetWithTermVector = storeOffsetWithTermVector;
+    this.storePositionWithTermVector = storePositionWithTermVector;
  }
 }
--- a/src/java/org/apache/lucene/index/FieldInfos.java
+++ b/src/java/org/apache/lucene/index/FieldInfos.java
@ -33,6 +33,12 @@ import org.apache.lucene.store.IndexInput;
 *  accessing this object.
 */
 final class FieldInfos {
+  
+  static final byte IS_INDEXED = 0x1;
+  static final byte STORE_TERMVECTOR = 0x2;
+  static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x4;
+  static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x8;
+  
  private ArrayList byNumber = new ArrayList();
  private HashMap byName = new HashMap();

@ -61,23 +67,30 @@ final class FieldInfos {
    Enumeration fields = doc.fields();
    while (fields.hasMoreElements()) {
      Field field = (Field) fields.nextElement();
-      add(field.name(), field.isIndexed(), field.isTermVectorStored());
+      add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
+              field.isStoreOffsetWithTermVector());
    }
  }
  
  /**
+   * Add fields that are indexed. Whether they have termvectors has to be specified.
+   * 
   * @param names The names of the fields
   * @param storeTermVectors Whether the fields store term vectors or not
+   * @param storePositionWithTermVector treu if positions should be stored.
+   * @param storeOffsetWithTermVector true if offsets should be stored
   */
-  public void addIndexed(Collection names, boolean storeTermVectors) {
+  public void addIndexed(Collection names, boolean storeTermVectors, boolean storePositionWithTermVector, 
+                         boolean storeOffsetWithTermVector) {
    Iterator i = names.iterator();
    while (i.hasNext()) {
-      add((String)i.next(), true, storeTermVectors);
+      add((String)i.next(), true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
    }
  }

  /**
-   * Assumes the field is not storing term vectors 
+   * Assumes the fields are not storing term vectors.
+   * 
   * @param names The names of the fields
   * @param isIndexed Whether the fields are indexed or not
   * 
@ -91,28 +104,43 @@ final class FieldInfos {
  }

  /**
-   * Calls three parameter add with false for the storeTermVector parameter 
+   * Calls 5 parameter add with false for all TermVector parameters.
+   * 
   * @param name The name of the Field
   * @param isIndexed true if the field is indexed
-   * @see #add(String, boolean, boolean)
+   * @see #add(String, boolean, boolean, boolean, boolean)
   */
  public void add(String name, boolean isIndexed) {
-    add(name, isIndexed, false);
+    add(name, isIndexed, false, false, false);
  }

+  /**
+   * Calls 5 parameter add with false for term vector positions and offsets.
+   * 
+   * @param name The name of the field
+   * @param isIndexed  true if the field is indexed
+   * @param storeTermVector true if the term vector should be stored
+   */
+  public void add(String name, boolean isIndexed, boolean storeTermVector){
+    add(name, isIndexed, storeTermVector, false, false);
+  }
  
  /** If the field is not yet known, adds it. If it is known, checks to make
   *  sure that the isIndexed flag is the same as was given previously for this
-   *  field. If not - marks it as being indexed.  Same goes for storeTermVector
+   *  field. If not - marks it as being indexed.  Same goes for the TermVector
+   * parameters.
   * 
   * @param name The name of the field
   * @param isIndexed true if the field is indexed
   * @param storeTermVector true if the term vector should be stored
+   * @param storePositionWithTermVector true if the term vector with positions should be stored
+   * @param storeOffsetWithTermVector true if the term vector with offsets should be stored
   */
-  public void add(String name, boolean isIndexed, boolean storeTermVector) {
+  public void add(String name, boolean isIndexed, boolean storeTermVector,
+                  boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
    FieldInfo fi = fieldInfo(name);
    if (fi == null) {
-      addInternal(name, isIndexed, storeTermVector);
+      addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector);
    } else {
      if (fi.isIndexed != isIndexed) {
        fi.isIndexed = true;                      // once indexed, always index
@ -120,13 +148,21 @@ final class FieldInfos {
      if (fi.storeTermVector != storeTermVector) {
        fi.storeTermVector = true;                // once vector, always vector
      }
+      if (fi.storePositionWithTermVector != storePositionWithTermVector) {
+        fi.storePositionWithTermVector = true;                // once vector, always vector
+      }
+      if (fi.storeOffsetWithTermVector != storeOffsetWithTermVector) {
+        fi.storeOffsetWithTermVector = true;                // once vector, always vector
+      }
    }
  }

  private void addInternal(String name, boolean isIndexed,
-                           boolean storeTermVector) {
+                           boolean storeTermVector, boolean storePositionWithTermVector, 
+                           boolean storeOffsetWithTermVector) {
    FieldInfo fi =
-      new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector);
+      new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector,
+              storeOffsetWithTermVector);
    byNumber.add(fi);
    byName.put(name, fi);
  }
@ -180,11 +216,11 @@ final class FieldInfos {
    for (int i = 0; i < size(); i++) {
      FieldInfo fi = fieldInfo(i);
      byte bits = 0x0;
-      if (fi.isIndexed) bits |= 0x1;
-      if (fi.storeTermVector) bits |= 0x2;
+      if (fi.isIndexed) bits |= IS_INDEXED;
+      if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
+      if (fi.storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR;
+      if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR;
      output.writeString(fi.name);
-      //Was REMOVE
-      //output.writeByte((byte)(fi.isIndexed ? 1 : 0));
      output.writeByte(bits);
    }
  }
@ -194,9 +230,11 @@ final class FieldInfos {
    for (int i = 0; i < size; i++) {
      String name = input.readString().intern();
      byte bits = input.readByte();
-      boolean isIndexed = (bits & 0x1) != 0;
-      boolean storeTermVector = (bits & 0x2) != 0;
-      addInternal(name, isIndexed, storeTermVector);
+      boolean isIndexed = (bits & IS_INDEXED) != 0;
+      boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
+      boolean storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
+      boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
+      addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector);
    }    
  }

--- a/src/java/org/apache/lucene/index/FilterIndexReader.java
+++ b/src/java/org/apache/lucene/index/FilterIndexReader.java
@ -16,11 +16,12 @@ package org.apache.lucene.index;
 * limitations under the License.
 */

+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
 import java.io.IOException;
 import java.util.Collection;

-import org.apache.lucene.document.Document;
-
 /**  A <code>FilterIndexReader</code> contains another IndexReader, which it
 * uses as its basic source of data, possibly transforming the data along the
 * way or providing additional functionality. The class
@ -146,4 +147,8 @@ public class FilterIndexReader extends IndexReader {
  public Collection getIndexedFieldNames(boolean storedTermVector) {
    return in.getIndexedFieldNames(storedTermVector);
  }
+
+  public Collection getIndexedFieldNames (Field.TermVector tvSpec){
+    return in.getIndexedFieldNames(tvSpec);
+  }
 }
--- a/src/java/org/apache/lucene/index/IndexReader.java
+++ b/src/java/org/apache/lucene/index/IndexReader.java
@ -16,16 +16,16 @@ package org.apache.lucene.index;
 * limitations under the License.
 */

-import java.io.IOException;
-import java.io.File;
-import java.util.Collection;
-
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.search.Similarity;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.Lock;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;          // for javadoc
-import org.apache.lucene.search.Similarity;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Collection;

 /** IndexReader is an abstract class, providing an interface for accessing an
 index.  Search of an index is done entirely through this abstract interface,
@ -209,23 +209,37 @@ public abstract class IndexReader {
    return SegmentInfos.readCurrentVersion(directory);
  }

-  /** Return an array of term frequency vectors for the specified document.
+  /**
+   *  Return an array of term frequency vectors for the specified document.
   *  The array contains a vector for each vectorized field in the document.
-   *  Each vector contains terms and frequencies for all terms
-   *  in a given vectorized field.
-   *  If no such fields existed, the method returns null.
+   *  Each vector contains terms and frequencies for all terms in a given vectorized field.
+   *  If no such fields existed, the method returns null. The term vectors that are
+   * returned my either be of type TermFreqVector or of type TermPositionsVector if
+   * positions or offsets have been stored.
   * 
-   * @see Field#isTermVectorStored()
+   * @param docNumber document for which term frequency vectors are returned
+   * @return array of term frequency vectors. May be null if no term vectors have been
+   *  stored for the specified document.
+   * @throws IOException if index cannot be accessed
+   * @see Field#TermVector
   */
  abstract public TermFreqVector[] getTermFreqVectors(int docNumber)
          throws IOException;

-  /** Return a term frequency vector for the specified document and field. The
-   *  vector returned contains terms and frequencies for those terms in
-   *  the specified field of this document, if the field had storeTermVector
-   *  flag set.  If the flag was not set, the method returns null.
+  
+  /**
+   *  Return a term frequency vector for the specified document and field. The
+   *  returned vector contains terms and frequencies for the terms in
+   *  the specified field of this document, if the field had the storeTermVector
+   *  flag set. If termvectors had been stored with positions or offsets, a 
+   *  TermPositionsVector is returned.
   * 
-   * @see Field#isTermVectorStored()
+   * @param docNumber document for which the term frequency vector is returned
+   * @param field field for which the term frequency vector is returned.
+   * @return term frequency vector May be null if field does not exist in the specified
+   * document or term vector was not stored.
+   * @throws IOException if index cannot be accessed
+   * @see Field#TermVector
   */
  abstract public TermFreqVector getTermFreqVector(int docNumber, String field)
          throws IOException;
@ -547,9 +561,20 @@ public abstract class IndexReader {
   * @param storedTermVector if true, returns only Indexed fields that have term vector info, 
   *                        else only indexed fields without term vector info 
   * @return Collection of Strings indicating the names of the fields
+   * 
+   * @deprecated  Replaced by {@link #getIndexedFieldNames (Field.TermVector tvSpec)}
   */ 
  public abstract Collection getIndexedFieldNames(boolean storedTermVector);
  
+  /**
+   * Get a list of unique field names that exist in this index, are indexed, and have
+   * the specified term vector information.
+   * 
+   * @param tvSpec specifies which term vector information shoul dbe available for the fields
+   * @return Collection of Strings indicating the names of the fields
+   */
+  public abstract Collection getIndexedFieldNames(Field.TermVector tvSpec);
+
  /**
   * Returns <code>true</code> iff the index in the named directory is
   * currently locked.
@ -560,7 +585,6 @@ public abstract class IndexReader {
    return
            directory.makeLock(IndexWriter.WRITE_LOCK_NAME).isLocked() ||
            directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).isLocked();
-
  }

  /**
--- a/src/java/org/apache/lucene/index/MultiReader.java
+++ b/src/java/org/apache/lucene/index/MultiReader.java
@ -16,16 +16,13 @@ package org.apache.lucene.index;
 * limitations under the License.
 */

-import java.io.IOException;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Hashtable;
-import java.util.Iterator;
-import java.util.Set;
-
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
 import org.apache.lucene.store.Directory;

+import java.io.IOException;
+import java.util.*;
+
 /** An IndexReader which reads multiple indexes, appending their content.
 *
 * @version $Id$
@ -219,11 +216,7 @@ public class MultiReader extends IndexReader {
    for (int i = 0; i < subReaders.length; i++) {
      IndexReader reader = subReaders[i];
      Collection names = reader.getFieldNames();
-      // iterate through the field names and add them to the set
-      for (Iterator iterator = names.iterator(); iterator.hasNext();) {
-        String s = (String) iterator.next();
-        fieldSet.add(s);
-      }
+      fieldSet.addAll(names);
    }
    return fieldSet;
  }
@ -253,6 +246,17 @@ public class MultiReader extends IndexReader {
    return fieldSet;
  }

+  public Collection getIndexedFieldNames (Field.TermVector tvSpec){
+    // maintain a unique set of field names
+    Set fieldSet = new HashSet();
+    for (int i = 0; i < subReaders.length; i++) {
+      IndexReader reader = subReaders[i];
+      Collection names = reader.getIndexedFieldNames(tvSpec);
+      fieldSet.addAll(names);
+    }
+    return fieldSet;
+  }
+
 }

 class MultiTermEnum extends TermEnum {
--- a/src/java/org/apache/lucene/index/SegmentMerger.java
+++ b/src/java/org/apache/lucene/index/SegmentMerger.java
@ -20,6 +20,7 @@ import java.util.Vector;
 import java.util.Iterator;
 import java.io.IOException;

+import org.apache.lucene.document.Field;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.RAMOutputStream;
@ -157,8 +158,11 @@ final class SegmentMerger {
    int docCount = 0;
    for (int i = 0; i < readers.size(); i++) {
      IndexReader reader = (IndexReader) readers.elementAt(i);
-      fieldInfos.addIndexed(reader.getIndexedFieldNames(true), true);
-      fieldInfos.addIndexed(reader.getIndexedFieldNames(false), false);
+      fieldInfos.addIndexed(reader.getIndexedFieldNames(Field.TermVector.WITH_POSITIONS_OFFSETS), true, true, true);
+      fieldInfos.addIndexed(reader.getIndexedFieldNames(Field.TermVector.WITH_POSITIONS), true, true, false);
+      fieldInfos.addIndexed(reader.getIndexedFieldNames(Field.TermVector.WITH_OFFSETS), true, false, true);
+      fieldInfos.addIndexed(reader.getIndexedFieldNames(Field.TermVector.YES), true, false, false);
+      fieldInfos.addIndexed(reader.getIndexedFieldNames(Field.TermVector.NO), false, false, false);
      fieldInfos.add(reader.getFieldNames(false), false);
    }
    fieldInfos.write(directory, segment + ".fnm");
@ -195,29 +199,9 @@ final class SegmentMerger {
        int maxDoc = reader.maxDoc();
        for (int docNum = 0; docNum < maxDoc; docNum++) {
          // skip deleted docs
-          if (reader.isDeleted(docNum)) {
+          if (reader.isDeleted(docNum)) 
            continue;
-          }
-          termVectorsWriter.openDocument();
-
-          // get all term vectors
-          TermFreqVector[] sourceTermVector =
-            reader.getTermFreqVectors(docNum);
-
-          if (sourceTermVector != null) {
-            for (int f = 0; f < sourceTermVector.length; f++) {
-              // translate field numbers
-              TermFreqVector termVector = sourceTermVector[f];
-              termVectorsWriter.openField(termVector.getField());
-              String [] terms = termVector.getTerms();
-              int [] freqs = termVector.getTermFrequencies();
-              
-              for (int t = 0; t < terms.length; t++) {
-                termVectorsWriter.addTerm(terms[t], freqs[t]);
-              }
-            }
-            termVectorsWriter.closeDocument();
-          }
+          termVectorsWriter.addAllDocVectors(reader.getTermFreqVectors(docNum));
        }
      }
    } finally {
--- a/src/java/org/apache/lucene/index/SegmentReader.java
+++ b/src/java/org/apache/lucene/index/SegmentReader.java
@ -25,6 +25,7 @@ import java.util.Set;
 import java.util.Vector;

 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.Directory;
@ -191,7 +192,9 @@ class SegmentReader extends IndexReader {
      proxStream.close();

    closeNorms();
-    if (termVectorsReader != null) termVectorsReader.close();
+    
+    if (termVectorsReader != null) 
+      termVectorsReader.close();

    if (cfsReader != null)
      cfsReader.close();
@ -342,16 +345,63 @@ class SegmentReader extends IndexReader {
   * @return Collection of Strings indicating the names of the fields
   */
  public Collection getIndexedFieldNames(boolean storedTermVector) {
+    if(storedTermVector){
+      Set fieldSet = new HashSet();
+      fieldSet.addAll(getIndexedFieldNames(Field.TermVector.YES));
+      fieldSet.addAll(getIndexedFieldNames(Field.TermVector.WITH_POSITIONS));
+      fieldSet.addAll(getIndexedFieldNames(Field.TermVector.WITH_OFFSETS));
+      fieldSet.addAll(getIndexedFieldNames(Field.TermVector.WITH_POSITIONS_OFFSETS));
+      return fieldSet;
+    }
+    else
+      return getIndexedFieldNames(Field.TermVector.NO);
+  }
+  
+  public Collection getIndexedFieldNames (Field.TermVector tvSpec){
+    boolean storedTermVector;
+    boolean storePositionWithTermVector;
+    boolean storeOffsetWithTermVector;
+    
+    if(tvSpec == Field.TermVector.NO){
+      storedTermVector = false;
+      storePositionWithTermVector = false;
+      storeOffsetWithTermVector = false;
+    }
+    else if(tvSpec == Field.TermVector.YES){
+      storedTermVector = true;
+      storePositionWithTermVector = false;
+      storeOffsetWithTermVector = false;
+    }
+    else if(tvSpec == Field.TermVector.WITH_POSITIONS){
+      storedTermVector = true;
+      storePositionWithTermVector = true;
+      storeOffsetWithTermVector = false;
+    }
+    else if(tvSpec == Field.TermVector.WITH_OFFSETS){
+      storedTermVector = true;
+      storePositionWithTermVector = false;
+      storeOffsetWithTermVector = true;
+    }
+    else if(tvSpec == Field.TermVector.WITH_POSITIONS_OFFSETS){
+      storedTermVector = true;
+      storePositionWithTermVector = true;
+      storeOffsetWithTermVector = true;
+    }
+    else{
+      throw new IllegalArgumentException("unknown termVector parameter " + tvSpec);
+    }
+    
    // maintain a unique set of field names
    Set fieldSet = new HashSet();
    for (int i = 0; i < fieldInfos.size(); i++) {
      FieldInfo fi = fieldInfos.fieldInfo(i);
-      if (fi.isIndexed == true && fi.storeTermVector == storedTermVector){
+      if (fi.isIndexed && fi.storeTermVector == storedTermVector && 
+          fi.storePositionWithTermVector == storePositionWithTermVector && 
+          fi.storeOffsetWithTermVector == storeOffsetWithTermVector){
        fieldSet.add(fi.name);
      }
    }
    return fieldSet;    
-
  }

  public synchronized byte[] norms(String field) throws IOException {
@ -429,11 +479,13 @@ class SegmentReader extends IndexReader {
   *  vector returned contains term numbers and frequencies for all terms in
   *  the specified field of this document, if the field had storeTermVector
   *  flag set.  If the flag was not set, the method returns null.
+   * @throws IOException
   */
-  public TermFreqVector getTermFreqVector(int docNumber, String field) {
+  public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
    // Check if this field is invalid or has no stored term vector
    FieldInfo fi = fieldInfos.fieldInfo(field);
-    if (fi == null || !fi.storeTermVector) return null;
+    if (fi == null || !fi.storeTermVector || termVectorsReader == null) 
+      return null;

    return termVectorsReader.get(docNumber, field);
  }
@ -444,8 +496,9 @@ class SegmentReader extends IndexReader {
   *  Each vector vector contains term numbers and frequencies for all terms
   *  in a given vectorized field.
   *  If no such fields existed, the method returns null.
+   * @throws IOException
   */
-  public TermFreqVector[] getTermFreqVectors(int docNumber) {
+  public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
    if (termVectorsReader == null)
      return null;

--- a/src/java/org/apache/lucene/index/SegmentTermPositionVector.java
+++ b/src/java/org/apache/lucene/index/SegmentTermPositionVector.java
@ -0,0 +1,64 @@
+package org.apache.lucene.index;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class SegmentTermPositionVector extends SegmentTermVector implements TermPositionVector {
+  protected int[][] positions;
+  protected TermVectorOffsetInfo[][] offsets;
+  public static final int[] EMPTY_TERM_POS = new int[0];
+  
+  public SegmentTermPositionVector(String field, String terms[], int termFreqs[], int[][] positions, TermVectorOffsetInfo[][] offsets) {
+    super(field, terms, termFreqs);
+    this.offsets = offsets;
+    this.positions = positions;
+  }
+
+  /**
+   * Returns an array of TermVectorOffsetInfo in which the term is found.
+   *
+   * @param index The position in the array to get the offsets from
+   * @return An array of TermVectorOffsetInfo objects or the empty list
+   * @see org.apache.lucene.analysis.Token
+   */
+  public TermVectorOffsetInfo[] getOffsets(int index) {
+    TermVectorOffsetInfo[] result = TermVectorOffsetInfo.EMPTY_OFFSET_INFO;
+    if(offsets == null)
+      return null;
+    if (index >=0 && index < offsets.length)
+    {
+      result = offsets[index];
+    }
+    return result;
+  }
+  
+  /**
+   * Returns an array of positions in which the term is found.
+   * Terms are identified by the index at which its number appears in the
+   * term String array obtained from the <code>indexOf</code> method.
+   */
+  public int[] getTermPositions(int index) {
+    int[] result = EMPTY_TERM_POS;
+    if(positions == null)
+      return null;
+    if (index >=0 && index < positions.length)
+    {
+      result = positions[index];
+    }
+    
+    return result;
+  }
+}
--- a/src/java/org/apache/lucene/index/SegmentTermVector.java
+++ b/src/java/org/apache/lucene/index/SegmentTermVector.java
@ -1,4 +1,21 @@
 package org.apache.lucene.index;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 import java.util.*;

 /**
@ -26,11 +43,14 @@ class SegmentTermVector implements TermFreqVector {
    StringBuffer sb = new StringBuffer();
    sb.append('{');
    sb.append(field).append(": ");
-    for (int i=0; i<terms.length; i++) {
-      if (i>0) sb.append(", ");
-      sb.append(terms[i]).append('/').append(termFreqs[i]);
+    if(terms != null){
+      for (int i=0; i<terms.length; i++) {
+        if (i>0) sb.append(", ");
+        sb.append(terms[i]).append('/').append(termFreqs[i]);
+      }
    }
    sb.append('}');
+    
    return sb.toString();
  }

@ -47,6 +67,8 @@ class SegmentTermVector implements TermFreqVector {
  }

  public int indexOf(String termText) {
+    if(terms == null)
+      return -1;
    int res = Arrays.binarySearch(terms, termText);
    return res >= 0 ? res : -1;
  }
@ -60,7 +82,7 @@ class SegmentTermVector implements TermFreqVector {
    int res[] = new int[len];

    for (int i=0; i < len; i++) {
-      res[i] = indexOf(termNumbers[i]);
+      res[i] = indexOf(termNumbers[start+ i]);
    }
    return res;
  }
--- a/src/java/org/apache/lucene/index/TermFreqVector.java
+++ b/src/java/org/apache/lucene/index/TermFreqVector.java
@ -1,5 +1,21 @@
 package org.apache.lucene.index;

+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 /** Provides access to stored term vector of 
 *  a document field.
 */
--- a/src/java/org/apache/lucene/index/TermPositionVector.java
+++ b/src/java/org/apache/lucene/index/TermPositionVector.java
@ -1,13 +1,42 @@
 package org.apache.lucene.index;

+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 /** Extends <code>TermFreqVector</code> to provide additional information about
- *  positions in which each of the terms is found.
+ *  positions in which each of the terms is found. A TermPositionVector not necessarily
+ * contains both positions and offsets, but at least one of these arrays exists.
 */
 public interface TermPositionVector extends TermFreqVector {
  
    /** Returns an array of positions in which the term is found.
     *  Terms are identified by the index at which its number appears in the
-     *  term number array obtained from <code>getTermNumbers</code> method.
+     *  term String array obtained from the <code>indexOf</code> method.
+     *  May return null if positions have not been stored.
     */
    public int[] getTermPositions(int index);
+  
+    /**
+     * Returns an array of TermVectorOffsetInfo in which the term is found.
+     * May return null if offsets have not been stored.
+     * 
+     * @see org.apache.lucene.analysis.Token
+     * 
+     * @param index The position in the array to get the offsets from
+     * @return An array of TermVectorOffsetInfo objects or the empty list
+     */ 
+    public TermVectorOffsetInfo [] getOffsets(int index);
 }
--- a/src/java/org/apache/lucene/index/TermVectorOffsetInfo.java
+++ b/src/java/org/apache/lucene/index/TermVectorOffsetInfo.java
@ -0,0 +1,66 @@
+package org.apache.lucene.index;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TermVectorOffsetInfo {
+    public static final TermVectorOffsetInfo [] EMPTY_OFFSET_INFO = new TermVectorOffsetInfo[0];
+    private int startOffset;
+    private int endOffset;
+
+  public TermVectorOffsetInfo() {
+  }
+
+  public TermVectorOffsetInfo(int startOffset, int endOffset) {
+    this.endOffset = endOffset;
+    this.startOffset = startOffset;
+  }
+
+  public int getEndOffset() {
+    return endOffset;
+  }
+
+  public void setEndOffset(int endOffset) {
+    this.endOffset = endOffset;
+  }
+
+  public int getStartOffset() {
+    return startOffset;
+  }
+
+  public void setStartOffset(int startOffset) {
+    this.startOffset = startOffset;
+  }
+
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (!(o instanceof TermVectorOffsetInfo)) return false;
+
+    final TermVectorOffsetInfo termVectorOffsetInfo = (TermVectorOffsetInfo) o;
+
+    if (endOffset != termVectorOffsetInfo.endOffset) return false;
+    if (startOffset != termVectorOffsetInfo.startOffset) return false;
+
+    return true;
+  }
+
+  public int hashCode() {
+    int result;
+    result = startOffset;
+    result = 29 * result + endOffset;
+    return result;
+  }
+}
--- a/src/java/org/apache/lucene/index/TermVectorsReader.java
+++ b/src/java/org/apache/lucene/index/TermVectorsReader.java
@ -34,22 +34,25 @@ class TermVectorsReader {
  private IndexInput tvf;
  private int size;
  
+  private int tvdFormat;
+  private int tvfFormat;
+
  TermVectorsReader(Directory d, String segment, FieldInfos fieldInfos)
    throws IOException {
    if (d.fileExists(segment + TermVectorsWriter.TVX_EXTENSION)) {
      tvx = d.openInput(segment + TermVectorsWriter.TVX_EXTENSION);
      checkValidFormat(tvx);
      tvd = d.openInput(segment + TermVectorsWriter.TVD_EXTENSION);
-      checkValidFormat(tvd);
+      tvdFormat = checkValidFormat(tvd);
      tvf = d.openInput(segment + TermVectorsWriter.TVF_EXTENSION);
-      checkValidFormat(tvf);
+      tvfFormat = checkValidFormat(tvf);
      size = (int) tvx.length() / 8;
    }

    this.fieldInfos = fieldInfos;
  }
  
-  private void checkValidFormat(IndexInput in) throws IOException
+  private int checkValidFormat(IndexInput in) throws IOException
  {
    int format = in.readInt();
    if (format > TermVectorsWriter.FORMAT_VERSION)
@ -57,7 +60,7 @@ class TermVectorsReader {
      throw new IOException("Incompatible format version: " + format + " expected " 
              + TermVectorsWriter.FORMAT_VERSION + " or less");
    }
-    
+    return format;
  }

  void close() throws IOException {
@ -82,100 +85,101 @@ class TermVectorsReader {
   * Retrieve the term vector for the given document and field
   * @param docNum The document number to retrieve the vector for
   * @param field The field within the document to retrieve
-   * @return The TermFreqVector for the document and field or null
+   * @return The TermFreqVector for the document and field or null if there is no termVector for this field.
+   * @throws IOException
   */ 
-  synchronized TermFreqVector get(int docNum, String field) {
+  synchronized TermFreqVector get(int docNum, String field) throws IOException {
    // Check if no term vectors are available for this segment at all
    int fieldNumber = fieldInfos.fieldNumber(field);
    TermFreqVector result = null;
    if (tvx != null) {
-      try {
-        //We need to account for the FORMAT_SIZE at when seeking in the tvx
-        //We don't need to do this in other seeks because we already have the file pointer
-        //that was written in another file
-        tvx.seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
-        //System.out.println("TVX Pointer: " + tvx.getFilePointer());
-        long position = tvx.readLong();
+      //We need to account for the FORMAT_SIZE at when seeking in the tvx
+      //We don't need to do this in other seeks because we already have the
+      // file pointer
+      //that was written in another file
+      tvx.seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
+      //System.out.println("TVX Pointer: " + tvx.getFilePointer());
+      long position = tvx.readLong();

-        tvd.seek(position);
-        int fieldCount = tvd.readVInt();
-        //System.out.println("Num Fields: " + fieldCount);
-        // There are only a few fields per document. We opt for a full scan
-        // rather then requiring that they be ordered. We need to read through
-        // all of the fields anyway to get to the tvf pointers.
-        int number = 0;
-        int found = -1;
-        for (int i = 0; i < fieldCount; i++) {
+      tvd.seek(position);
+      int fieldCount = tvd.readVInt();
+      //System.out.println("Num Fields: " + fieldCount);
+      // There are only a few fields per document. We opt for a full scan
+      // rather then requiring that they be ordered. We need to read through
+      // all of the fields anyway to get to the tvf pointers.
+      int number = 0;
+      int found = -1;
+      for (int i = 0; i < fieldCount; i++) {
+        if(tvdFormat == TermVectorsWriter.FORMAT_VERSION)
+          number = tvd.readVInt();
+        else
          number += tvd.readVInt();
-          if (number == fieldNumber) found = i;
-        }
        
-        // This field, although valid in the segment, was not found in this document
-        if (found != -1) {
-          // Compute position in the tvf file
-          position = 0;
-          for (int i = 0; i <= found; i++)
-          {
-            position += tvd.readVLong();
-          }
-          result = readTermVector(field, position);
-        }
-        else {
-          //System.out.println("Field not found");
-        }
-          
-      } catch (Exception e) {
-        //e.printStackTrace();
+        if (number == fieldNumber)
+          found = i;
      }
-    }
-    else
-    {
-      System.out.println("No tvx file");
+
+      // This field, although valid in the segment, was not found in this
+      // document
+      if (found != -1) {
+        // Compute position in the tvf file
+        position = 0;
+        for (int i = 0; i <= found; i++)
+          position += tvd.readVLong();
+
+        result = readTermVector(field, position);
+      } else {
+        //System.out.println("Field not found");
+      }
+    } else {
+      //System.out.println("No tvx file");
    }
    return result;
  }


-  /** Return all term vectors stored for this document or null if the could not be read in. */
-  synchronized TermFreqVector[] get(int docNum) {
+  /**
+   * Return all term vectors stored for this document or null if there are no term vectors
+   * for the document.
+   * @throws IOException
+   */
+  synchronized TermFreqVector[] get(int docNum) throws IOException {
    TermFreqVector[] result = null;
    // Check if no term vectors are available for this segment at all
    if (tvx != null) {
-      try {
-        //We need to offset by
-        tvx.seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
-        long position = tvx.readLong();
+      //We need to offset by
+      tvx.seek((docNum * 8L) + TermVectorsWriter.FORMAT_SIZE);
+      long position = tvx.readLong();

-        tvd.seek(position);
-        int fieldCount = tvd.readVInt();
+      tvd.seek(position);
+      int fieldCount = tvd.readVInt();

-        // No fields are vectorized for this document
-        if (fieldCount != 0) {
-          int number = 0;
-          String[] fields = new String[fieldCount];
+      // No fields are vectorized for this document
+      if (fieldCount != 0) {
+        int number = 0;
+        String[] fields = new String[fieldCount];
        
-          for (int i = 0; i < fieldCount; i++) {
+        for (int i = 0; i < fieldCount; i++) {
+          if(tvdFormat == TermVectorsWriter.FORMAT_VERSION)
+            number = tvd.readVInt();
+          else
            number += tvd.readVInt();
-            fields[i] = fieldInfos.fieldName(number);
-          }

-          // Compute position in the tvf file
-          position = 0;
-          long[] tvfPointers = new long[fieldCount];
-          for (int i = 0; i < fieldCount; i++) {
-            position += tvd.readVLong();
-            tvfPointers[i] = position;
-          }
-
-          result = readTermVectors(fields, tvfPointers);
+          fields[i] = fieldInfos.fieldName(number);
        }
-      } catch (IOException e) {
-        e.printStackTrace();
+
+        // Compute position in the tvf file
+        position = 0;
+        long[] tvfPointers = new long[fieldCount];
+        for (int i = 0; i < fieldCount; i++) {
+          position += tvd.readVLong();
+          tvfPointers[i] = position;
+        }
+
+        result = readTermVectors(fields, tvfPointers);
      }
-    }
-    else
-    {
-      System.out.println("No tvx file");
+    } else {
+      //System.out.println("No tvx file");
    }
    return result;
  }
@ -206,20 +210,41 @@ class TermVectorsReader {

    int numTerms = tvf.readVInt();
    //System.out.println("Num Terms: " + numTerms);
-    // If no terms - return a constant empty termvector
-    if (numTerms == 0) return new SegmentTermVector(field, null, null);
+    // If no terms - return a constant empty termvector. However, this should never occur!
+    if (numTerms == 0) 
+      return new SegmentTermVector(field, null, null);
    
-    tvf.readVInt();
+    boolean storePositions;
+    boolean storeOffsets;
+    
+    if(tvfFormat == TermVectorsWriter.FORMAT_VERSION){
+      byte bits = tvf.readByte();
+      storePositions = (bits & TermVectorsWriter.STORE_POSITIONS_WITH_TERMVECTOR) != 0;
+      storeOffsets = (bits & TermVectorsWriter.STORE_OFFSET_WITH_TERMVECTOR) != 0;
+    }
+    else{
+      tvf.readVInt();
+      storePositions = false;
+      storeOffsets = false;
+    }

    String terms[] = new String[numTerms];
-    
    int termFreqs[] = new int[numTerms];
    
+    //  we may not need these, but declare them
+    int positions[][] = null;
+    TermVectorOffsetInfo offsets[][] = null;
+    if(storePositions)
+      positions = new int[numTerms][];
+    if(storeOffsets)
+      offsets = new TermVectorOffsetInfo[numTerms][];
+    
    int start = 0;
    int deltaLength = 0;
    int totalLength = 0;
    char [] buffer = {};
    String previousString = "";
+    
    for (int i = 0; i < numTerms; i++) {
      start = tvf.readVInt();
      deltaLength = tvf.readVInt();
@ -233,9 +258,40 @@ class TermVectorsReader {
      tvf.readChars(buffer, start, deltaLength);
      terms[i] = new String(buffer, 0, totalLength);
      previousString = terms[i];
-      termFreqs[i] = tvf.readVInt();
+      int freq = tvf.readVInt();
+      termFreqs[i] = freq;
+      
+      if (storePositions) { //read in the positions
+        int [] pos = new int[freq];
+        positions[i] = pos;
+        int prevPosition = 0;
+        for (int j = 0; j < freq; j++)
+        {
+          pos[j] = prevPosition + tvf.readVInt();
+          prevPosition = pos[j];
+        }
+      }
+      
+      if (storeOffsets) {
+        TermVectorOffsetInfo[] offs = new TermVectorOffsetInfo[freq];
+        offsets[i] = offs;
+        int prevOffset = 0;
+        for (int j = 0; j < freq; j++) {
+          int startOffset = prevOffset + tvf.readVInt();
+          int endOffset = startOffset + tvf.readVInt();
+          offs[j] = new TermVectorOffsetInfo(startOffset, endOffset);
+          prevOffset = endOffset;
+        }
+      }
+    }
+    
+    SegmentTermVector tv;
+    if (storePositions || storeOffsets){
+      tv = new SegmentTermPositionVector(field, terms, termFreqs, positions, offsets);
+    }
+    else {
+      tv = new SegmentTermVector(field, terms, termFreqs);
    }
-    SegmentTermVector tv = new SegmentTermVector(field, terms, termFreqs);
    return tv;
  }

--- a/src/java/org/apache/lucene/index/TermVectorsWriter.java
+++ b/src/java/org/apache/lucene/index/TermVectorsWriter.java
@ -50,14 +50,17 @@ import java.util.Vector;
 * 
 */
 final class TermVectorsWriter {
-  public static final int FORMAT_VERSION = 1;
+  public static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x1;
+  public static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x2;
+  
+  public static final int FORMAT_VERSION = 2;
  //The size in bytes that the FORMAT_VERSION will take up at the beginning of each file 
  public static final int FORMAT_SIZE = 4;
  
-  //TODO: Figure out how to write with or w/o position information and read back in
  public static final String TVX_EXTENSION = ".tvx";
  public static final String TVD_EXTENSION = ".tvd";
  public static final String TVF_EXTENSION = ".tvf";
+  
  private IndexOutput tvx = null, tvd = null, tvf = null;
  private Vector fields = null;
  private Vector terms = null;
@ -66,13 +69,6 @@ final class TermVectorsWriter {
  private TVField currentField = null;
  private long currentDocPointer = -1;

-  /** Create term vectors writer for the specified segment in specified
-   *  directory.  A new TermVectorsWriter should be created for each
-   *  segment. The parameter <code>maxFields</code> indicates how many total
-   *  fields are found in this document. Not all of these fields may require
-   *  termvectors to be stored, so the number of calls to
-   *  <code>openField</code> is less or equal to this number.
-   */
  public TermVectorsWriter(Directory directory, String segment,
                           FieldInfos fieldInfos)
    throws IOException {
@ -93,7 +89,6 @@ final class TermVectorsWriter {
  public final void openDocument()
          throws IOException {
    closeDocument();
-
    currentDocPointer = tvd.getFilePointer();
  }

@ -119,12 +114,17 @@ final class TermVectorsWriter {
   *  processing of this field. If a field was previously open, it is
   *  closed automatically.
   */
-  public final void openField(String field)
-          throws IOException {
-    if (!isDocumentOpen()) throw new IllegalStateException("Cannot open field when no document is open.");
+  public final void openField(String field) throws IOException {
+    FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+    openField(fieldInfo.number, fieldInfo.storePositionWithTermVector, fieldInfo.storeOffsetWithTermVector);
+  }
  
+  private void openField(int fieldNumber, boolean storePositionWithTermVector, 
+      boolean storeOffsetWithTermVector) throws IOException{
+    if (!isDocumentOpen()) 
+      throw new IllegalStateException("Cannot open field when no document is open.");
    closeField();
-    currentField = new TVField(fieldInfos.fieldNumber(field));
+    currentField = new TVField(fieldNumber, storePositionWithTermVector, storeOffsetWithTermVector);
  }

  /** Finished processing current field. This should be followed by a call to
@ -157,58 +157,81 @@ final class TermVectorsWriter {
   *  times this term appears in this field, in this document.
   */
  public final void addTerm(String termText, int freq) {
-    if (!isDocumentOpen()) throw new IllegalStateException("Cannot add terms when document is not open");
-    if (!isFieldOpen()) throw new IllegalStateException("Cannot add terms when field is not open");
-
-    addTermInternal(termText, freq);
+    addTerm(termText, freq, null, null);
  }
  
-  private final void addTermInternal(String termText, int freq) {
-    currentField.length += freq;
+  public final void addTerm(String termText, int freq, int [] positions, TermVectorOffsetInfo [] offsets)
+  {
+    if (!isDocumentOpen()) 
+      throw new IllegalStateException("Cannot add terms when document is not open");
+    if (!isFieldOpen()) 
+      throw new IllegalStateException("Cannot add terms when field is not open");
+    
+    addTermInternal(termText, freq, positions, offsets);
+  }
+
+  private final void addTermInternal(String termText, int freq, int [] positions, TermVectorOffsetInfo [] offsets) {
    TVTerm term = new TVTerm();
    term.termText = termText;
    term.freq = freq;
+    term.positions = positions;
+    term.offsets = offsets;
    terms.add(term);
  }

-
-  /** Add specified vectors to the document.
+  /**
+   * Add a complete document specified by all its term vectors. If document has no
+   * term vectors, add value for tvx.
+   * 
+   * @param vectors
+   * @throws IOException
   */
-  public final void addVectors(TermFreqVector[] vectors)
-          throws IOException {
-    if (!isDocumentOpen()) throw new IllegalStateException("Cannot add term vectors when document is not open");
-    if (isFieldOpen()) throw new IllegalStateException("Cannot add term vectors when field is open");
+  public final void addAllDocVectors(TermFreqVector[] vectors)
+      throws IOException {
+    openDocument();

-    for (int i = 0; i < vectors.length; i++) {
-      addTermFreqVector(vectors[i]);
+    if (vectors != null) {
+      for (int i = 0; i < vectors.length; i++) {
+        boolean storePositionWithTermVector = false;
+        boolean storeOffsetWithTermVector = false;
+
+        try {
+
+          TermPositionVector tpVector = (TermPositionVector) vectors[i];
+
+          if (tpVector.size() > 0 && tpVector.getTermPositions(0) != null)
+            storePositionWithTermVector = true;
+          if (tpVector.size() > 0 && tpVector.getOffsets(0) != null)
+            storeOffsetWithTermVector = true;
+
+          FieldInfo fieldInfo = fieldInfos.fieldInfo(tpVector.getField());
+          openField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector);
+
+          for (int j = 0; j < tpVector.size(); j++)
+            addTermInternal(tpVector.getTerms()[j], tpVector.getTermFrequencies()[j], tpVector.getTermPositions(j),
+                tpVector.getOffsets(j));
+
+          closeField();
+
+        } catch (ClassCastException ignore) {
+
+          TermFreqVector tfVector = vectors[i];
+
+          FieldInfo fieldInfo = fieldInfos.fieldInfo(tfVector.getField());
+          openField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector);
+
+          for (int j = 0; j < tfVector.size(); j++)
+            addTermInternal(tfVector.getTerms()[j], tfVector.getTermFrequencies()[j], null, null);
+
+          closeField();
+
+        }
+      }
    }
+
+    closeDocument();
  }
  
-
-  /** Add specified vector to the document. Document must be open but no field
-   *  should be open or exception is thrown. The same document can have <code>addTerm</code>
-   *  and <code>addVectors</code> calls mixed, however a given field must either be
-   *  populated with <code>addTerm</code> or with <code>addVector</code>.     *
-   */
-  public final void addTermFreqVector(TermFreqVector vector)
-          throws IOException {
-    if (!isDocumentOpen()) throw new IllegalStateException("Cannot add term vector when document is not open");
-    if (isFieldOpen()) throw new IllegalStateException("Cannot add term vector when field is open");
-    addTermFreqVectorInternal(vector);
-  }
-
-  private final void addTermFreqVectorInternal(TermFreqVector vector)
-          throws IOException {
-    openField(vector.getField());
-    for (int i = 0; i < vector.size(); i++) {
-      addTermInternal(vector.getTerms()[i], vector.getTermFrequencies()[i]);
-    }
-    closeField();
-  }
-
- 
-  
-  
  /** Close all streams. */
  final void close() throws IOException {
    try {
@ -245,47 +268,74 @@ final class TermVectorsWriter {
    // remember where this field is written
    currentField.tvfPointer = tvf.getFilePointer();
    //System.out.println("Field Pointer: " + currentField.tvfPointer);
-    final int size;
    
-    tvf.writeVInt(size = terms.size());
-    tvf.writeVInt(currentField.length - size);
+    final int size = terms.size();
+    tvf.writeVInt(size);
+    
+    boolean storePositions = currentField.storePositions;
+    boolean storeOffsets = currentField.storeOffsets;
+    byte bits = 0x0;
+    if (storePositions) 
+      bits |= STORE_POSITIONS_WITH_TERMVECTOR;
+    if (storeOffsets) 
+      bits |= STORE_OFFSET_WITH_TERMVECTOR;
+    tvf.writeByte(bits);
+    
    String lastTermText = "";
-    // write term ids and positions
    for (int i = 0; i < size; i++) {
      TVTerm term = (TVTerm) terms.elementAt(i);
-      //tvf.writeString(term.termText);
      int start = StringHelper.stringDifference(lastTermText, term.termText);
      int length = term.termText.length() - start;
-      tvf.writeVInt(start);			  // write shared prefix length
-      tvf.writeVInt(length);			  // write delta length
+      tvf.writeVInt(start);       // write shared prefix length
+      tvf.writeVInt(length);        // write delta length
      tvf.writeChars(term.termText, start, length);  // write delta chars
      tvf.writeVInt(term.freq);
      lastTermText = term.termText;
+      
+      if(storePositions){
+        if(term.positions == null)
+          throw new IllegalStateException("Trying to write positions that are null!");
+        
+        // use delta encoding for positions
+        int position = 0;
+        for (int j = 0; j < term.freq; j++){
+          tvf.writeVInt(term.positions[j] - position);
+          position = term.positions[j];
+        }
+      }
+      
+      if(storeOffsets){
+        if(term.offsets == null)
+          throw new IllegalStateException("Trying to write offsets that are null!");
+        
+        // use delta encoding for offsets
+        int position = 0;
+        for (int j = 0; j < term.freq; j++) {
+          tvf.writeVInt(term.offsets[j].getStartOffset() - position);
+          tvf.writeVInt(term.offsets[j].getEndOffset() - term.offsets[j].getStartOffset()); //Save the diff between the two.
+          position = term.offsets[j].getEndOffset();
+        }
+      }
    }
  }

-
-
-
  private void writeDoc() throws IOException {
-    if (isFieldOpen()) throw new IllegalStateException("Field is still open while writing document");
+    if (isFieldOpen()) 
+      throw new IllegalStateException("Field is still open while writing document");
    //System.out.println("Writing doc pointer: " + currentDocPointer);
    // write document index record
    tvx.writeLong(currentDocPointer);

    // write document data record
-    final int size;
+    final int size = fields.size();

    // write the number of fields
-    tvd.writeVInt(size = fields.size());
+    tvd.writeVInt(size);

    // write field numbers
-    int lastFieldNumber = 0;
    for (int i = 0; i < size; i++) {
      TVField field = (TVField) fields.elementAt(i);
-      tvd.writeVInt(field.number - lastFieldNumber);
-
-      lastFieldNumber = field.number;
+      tvd.writeVInt(field.number);
    }

    // write field pointers
@ -293,7 +343,6 @@ final class TermVectorsWriter {
    for (int i = 0; i < size; i++) {
      TVField field = (TVField) fields.elementAt(i);
      tvd.writeVLong(field.tvfPointer - lastFieldPointer);
-
      lastFieldPointer = field.tvfPointer;
    }
    //System.out.println("After writing doc pointer: " + tvx.getFilePointer());
@ -303,17 +352,20 @@ final class TermVectorsWriter {
  private static class TVField {
    int number;
    long tvfPointer = 0;
-    int length = 0;   // number of distinct term positions
-
-    TVField(int number) {
+    boolean storePositions = false;
+    boolean storeOffsets = false;
+    TVField(int number, boolean storePos, boolean storeOff) {
      this.number = number;
+      storePositions = storePos;
+      storeOffsets = storeOff;
    }
  }

  private static class TVTerm {
    String termText;
    int freq = 0;
-    //int positions[] = null;
+    int positions[] = null;
+    TermVectorOffsetInfo [] offsets = null;
  }


--- a/src/test/org/apache/lucene/index/DocHelper.java
+++ b/src/test/org/apache/lucene/index/DocHelper.java
@ -34,7 +34,7 @@ class DocHelper {
  //Fields will be lexicographically sorted.  So, the order is: field, text, two
  public static final int [] FIELD_2_FREQS = {3, 1, 1}; 
  public static final String TEXT_FIELD_2_KEY = "textField2";
-  public static Field textField2 = Field.Text(TEXT_FIELD_2_KEY, FIELD_2_TEXT, true);
+  public static Field textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
  
  public static final String KEYWORD_TEXT = "Keyword";
  public static final String KEYWORD_FIELD_KEY = "keyField";
@ -135,7 +135,7 @@ class DocHelper {
    Enumeration fields = doc.fields();
    int result = 0;
    while (fields.hasMoreElements()) {
-      fields.nextElement();
+      String name = fields.nextElement().toString();
      result++;
    }
    return result;
--- a/src/test/org/apache/lucene/index/TestSegmentMerger.java
+++ b/src/test/org/apache/lucene/index/TestSegmentMerger.java
@ -109,6 +109,7 @@ public class TestSegmentMerger extends TestCase {
      int [] freqs = vector.getTermFrequencies();
      assertTrue(freqs != null);
      //System.out.println("Freqs size: " + freqs.length);
+      assertTrue(vector instanceof TermPositionVector == true);
      
      for (int i = 0; i < terms.length; i++) {
        String term = terms[i];
--- a/src/test/org/apache/lucene/index/TestSegmentReader.java
+++ b/src/test/org/apache/lucene/index/TestSegmentReader.java
@ -178,7 +178,7 @@ public class TestSegmentReader extends TestCase {

  }    
  
-  public void testTermVectors() {
+  public void testTermVectors() throws IOException {
    TermFreqVector result = reader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);
    assertTrue(result != null);
    String [] terms = result.getTerms();
--- a/src/test/org/apache/lucene/index/TestTermVectorsReader.java
+++ b/src/test/org/apache/lucene/index/TestTermVectorsReader.java
@ -11,7 +11,11 @@ public class TestTermVectorsReader extends TestCase {
  private TermVectorsWriter writer = null;
  //Must be lexicographically sorted, will do in setup, versus trying to maintain here
  private String [] testFields = {"f1", "f2", "f3"};
+  private boolean [] testFieldsStorePos = {true, false, true, false};
+  private boolean [] testFieldsStoreOff = {true, false, false, true};  
  private String [] testTerms = {"this", "is", "a", "test"};
+  private int [][] positions = new int[testTerms.length][];
+  private TermVectorOffsetInfo [][] offsets = new TermVectorOffsetInfo[testTerms.length][];
  private RAMDirectory dir = new RAMDirectory();
  private String seg = "testSegment";
  private FieldInfos fieldInfos = new FieldInfos();
@ -22,9 +26,22 @@ public class TestTermVectorsReader extends TestCase {

  protected void setUp() {
    for (int i = 0; i < testFields.length; i++) {
-      fieldInfos.add(testFields[i], true, true);
+      fieldInfos.add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]);
    }
    
+    for (int i = 0; i < testTerms.length; i++)
+    {
+      positions[i] = new int[3];
+      for (int j = 0; j < positions[i].length; j++) {
+        // poditions are always sorted in increasing order
+        positions[i][j] = (int)(j * 10 + Math.random() * 10);
+      }
+      offsets[i] = new TermVectorOffsetInfo[3];
+      for (int j = 0; j < offsets[i].length; j++){
+        // ofsets are alway sorted in increasing order
+        offsets[i][j] = new TermVectorOffsetInfo(j * 10, j * 10 + testTerms[i].length());
+      }        
+    }
    try {
      Arrays.sort(testTerms);
      for (int j = 0; j < 5; j++) {
@ -34,7 +51,7 @@ public class TestTermVectorsReader extends TestCase {
        for (int k = 0; k < testFields.length; k++) {
          writer.openField(testFields[k]);
          for (int i = 0; i < testTerms.length; i++) {
-            writer.addTerm(testTerms[i], i);      
+            writer.addTerm(testTerms[i], 3, positions[i], offsets[i]);      
          }
          writer.closeField();
        }
@ -80,6 +97,103 @@ public class TestTermVectorsReader extends TestCase {
    }
  }  
  
+  public void testPositionReader() {
+    try {
+      TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
+      assertTrue(reader != null);
+      TermPositionVector vector;
+      String [] terms;
+      vector = (TermPositionVector)reader.get(0, testFields[0]);
+      assertTrue(vector != null);      
+      terms = vector.getTerms();
+      assertTrue(terms != null);
+      assertTrue(terms.length == testTerms.length);
+      for (int i = 0; i < terms.length; i++) {
+        String term = terms[i];
+        //System.out.println("Term: " + term);
+        assertTrue(term.equals(testTerms[i]));
+        int [] positions = vector.getTermPositions(i);
+        assertTrue(positions != null);
+        assertTrue(positions.length == this.positions[i].length);
+        for (int j = 0; j < positions.length; j++) {
+          int position = positions[j];
+          assertTrue(position == this.positions[i][j]);
+        }
+        TermVectorOffsetInfo [] offset = vector.getOffsets(i);
+        assertTrue(offset != null);
+        assertTrue(offset.length == this.offsets[i].length);
+        for (int j = 0; j < offset.length; j++) {
+          TermVectorOffsetInfo termVectorOffsetInfo = offset[j];
+          assertTrue(termVectorOffsetInfo.equals(offsets[i][j]));
+        }
+      }
+      
+      TermFreqVector freqVector = (TermFreqVector)reader.get(0, testFields[1]); //no pos, no offset
+      assertTrue(freqVector != null);      
+      assertTrue(freqVector instanceof TermPositionVector == false);
+      terms = freqVector.getTerms();
+      assertTrue(terms != null);
+      assertTrue(terms.length == testTerms.length);
+      for (int i = 0; i < terms.length; i++) {
+        String term = terms[i];
+        //System.out.println("Term: " + term);
+        assertTrue(term.equals(testTerms[i]));        
+      }
+      
+      
+    } catch (IOException e) {
+      e.printStackTrace();
+      assertTrue(false);
+    }
+    catch (ClassCastException cce)
+    {
+      cce.printStackTrace();
+      assertTrue(false);
+    }
+  }
+  
+  public void testOffsetReader() {
+    try {
+      TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
+      assertTrue(reader != null);
+      TermPositionVector vector = (TermPositionVector)reader.get(0, testFields[0]);
+      assertTrue(vector != null);
+      String [] terms = vector.getTerms();
+      assertTrue(terms != null);
+      assertTrue(terms.length == testTerms.length);
+      for (int i = 0; i < terms.length; i++) {
+        String term = terms[i];
+        //System.out.println("Term: " + term);
+        assertTrue(term.equals(testTerms[i]));
+        int [] positions = vector.getTermPositions(i);
+        assertTrue(positions != null);
+        assertTrue(positions.length == this.positions[i].length);
+        for (int j = 0; j < positions.length; j++) {
+          int position = positions[j];
+          assertTrue(position == this.positions[i][j]);
+        }
+        TermVectorOffsetInfo [] offset = vector.getOffsets(i);
+        assertTrue(offset != null);
+        assertTrue(offset.length == this.offsets[i].length);
+        for (int j = 0; j < offset.length; j++) {
+          TermVectorOffsetInfo termVectorOffsetInfo = offset[j];
+          assertTrue(termVectorOffsetInfo.equals(offsets[i][j]));
+        }
+      }
+      
+      
+    } catch (IOException e) {
+      e.printStackTrace();
+      assertTrue(false);
+    }
+    catch (ClassCastException cce)
+    {
+      cce.printStackTrace();
+      assertTrue(false);
+    }
+  }
+  
+
  /**
   * Make sure exceptions and bad params are handled appropriately
   */ 
@ -89,9 +203,9 @@ public class TestTermVectorsReader extends TestCase {
      assertTrue(reader != null);
      //Bad document number, good field number
      TermFreqVector vector = reader.get(50, testFields[0]);
-      assertTrue(vector == null);      
-    } catch (Exception e) {
      assertTrue(false);      
+    } catch (Exception e) {
+      assertTrue(true);
    }
    try {
      TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
--- a/src/test/org/apache/lucene/index/TestTermVectorsWriter.java
+++ b/src/test/org/apache/lucene/index/TestTermVectorsWriter.java
@ -44,7 +44,7 @@ public class TestTermVectorsWriter extends TestCase {
    for (int i = 0; i < testTerms.length; i++) {
      positions[i] = new int[5];
      for (int j = 0; j < positions[i].length; j++) {
-        positions[i][j] = i * 100;
+        positions[i][j] = j * 10;
      }
    }
  }
@ -107,7 +107,7 @@ public class TestTermVectorsWriter extends TestCase {
    }
  }
  
-  private void checkTermVector(TermVectorsReader reader, int docNum, String field) {
+  private void checkTermVector(TermVectorsReader reader, int docNum, String field) throws IOException {
    TermFreqVector vector = reader.get(docNum, field);
    assertTrue(vector != null);
    String[] terms = vector.getTerms();
--- a/src/test/org/apache/lucene/search/TestTermVectors.java
+++ b/src/test/org/apache/lucene/search/TestTermVectors.java
@ -43,8 +43,23 @@ public class TestTermVectors extends TestCase {
    //writer.infoStream = System.out;
    for (int i = 0; i < 1000; i++) {
      Document doc = new Document();
+      Field.TermVector termVector;
+      int mod3 = i % 3;
+      int mod2 = i % 2;
+      if (mod2 == 0 && mod3 == 0){
+        termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
+      }
+      else if (mod2 == 0){
+        termVector = Field.TermVector.WITH_POSITIONS;
+      }
+      else if (mod3 == 0){
+        termVector = Field.TermVector.WITH_OFFSETS;
+      }
+      else {
+        termVector = Field.TermVector.YES;
+      }
      doc.add(new Field("field", English.intToEnglish(i),
-          Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
+          Field.Store.YES, Field.Index.TOKENIZED, termVector));
      writer.addDocument(doc);
    }
    writer.close();
@ -70,17 +85,74 @@ public class TestTermVectors extends TestCase {
        TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i));
        assertTrue(vector != null);
        assertTrue(vector.length == 1);
-        //assertTrue();
      }
-      TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(50));
-      //System.out.println("Explain: " + searcher.explain(query, hits.id(50)));
-      //System.out.println("Vector: " + vector[0].toString());
    } catch (IOException e) {
      assertTrue(false);
    }
  }
  
  public void testTermPositionVectors() {
+    Query query = new TermQuery(new Term("field", "zero"));
+    try {
+      Hits hits = searcher.search(query);
+      assertEquals(1, hits.length());
+      
+      for (int i = 0; i < hits.length(); i++)
+      {
+        TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i));
+        assertTrue(vector != null);
+        assertTrue(vector.length == 1);
+        
+        boolean shouldBePosVector = (hits.id(i) % 2 == 0) ? true : false;
+        assertTrue((shouldBePosVector == false) || (shouldBePosVector == true && (vector[0] instanceof TermPositionVector == true)));
+       
+        boolean shouldBeOffVector = (hits.id(i) % 3 == 0) ? true : false;
+        assertTrue((shouldBeOffVector == false) || (shouldBeOffVector == true && (vector[0] instanceof TermPositionVector == true)));
+        
+        if(shouldBePosVector || shouldBeOffVector){
+          TermPositionVector posVec = (TermPositionVector)vector[0];
+          String [] terms = posVec.getTerms();
+          assertTrue(terms != null && terms.length > 0);
+          
+          for (int j = 0; j < terms.length; j++) {
+            int [] positions = posVec.getTermPositions(j);
+            TermVectorOffsetInfo [] offsets = posVec.getOffsets(j);
+            
+            if(shouldBePosVector){
+              assertTrue(positions != null);
+              assertTrue(positions.length > 0);
+            }
+            else
+              assertTrue(positions == null);
+            
+            if(shouldBeOffVector){
+              assertTrue(offsets != null);
+              assertTrue(offsets.length > 0);
+            }
+            else
+              assertTrue(offsets == null);
+          }
+        }
+        else{
+          try{
+            TermPositionVector posVec = (TermPositionVector)vector[0];
+            assertTrue(false);
+          }
+          catch(ClassCastException ignore){
+            TermFreqVector freqVec = vector[0];
+            String [] terms = freqVec.getTerms();
+            assertTrue(terms != null && terms.length > 0);
+          }
+          
+        }
+       
+      }
+    } catch (IOException e) {
+      assertTrue(false);
+    }
+  }
+  
+  public void testTermOffsetVectors() {
    Query query = new TermQuery(new Term("field", "fifty"));
    try {
      Hits hits = searcher.search(query);
@ -91,6 +163,7 @@ public class TestTermVectors extends TestCase {
        TermFreqVector [] vector = searcher.reader.getTermFreqVectors(hits.id(i));
        assertTrue(vector != null);
        assertTrue(vector.length == 1);
+        
        //assertTrue();
      }
    } catch (IOException e) {
@ -164,7 +237,7 @@ public class TestTermVectors extends TestCase {
          int [] freqs = vector.getTermFrequencies();
          for (int i = 0; i < vTerms.length; i++)
          {
-            if (term.text().equals(vTerms[i]) == true)
+            if (term.text().equals(vTerms[i]))
            {
              assertTrue(freqs[i] == freq);
            }
@ -184,9 +257,9 @@ public class TestTermVectors extends TestCase {
      System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1)));
      System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " +  hits.doc(2).toString());
      System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/
-      assertTrue(testDoc3.toString().equals(hits.doc(0).toString()));
-      assertTrue(testDoc4.toString().equals(hits.doc(1).toString()));
-      assertTrue(testDoc1.toString().equals(hits.doc(2).toString()));
+      assertTrue(hits.id(0) == 2);
+      assertTrue(hits.id(1) == 3);
+      assertTrue(hits.id(2) == 0);
      TermFreqVector vector = knownSearcher.reader.getTermFreqVector(hits.id(1), "field");
      assertTrue(vector != null);
      //System.out.println("Vector: " + vector);