LUCENE-868: New Term Vector access mechanism. Allows for applications to define how they access term vector information instead of having to pack/unpack the TV info returned by the old way.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@558592 13f79535-47bb-0310-9956-ffa450edef68
2025-02-06 10:08:58 +00:00 · 2007-07-23 03:17:25 +00:00 · 2007-07-23 03:17:25 +00:00 · e97d5830ce
commit e97d5830ce
parent 86432275f6
16 changed files with 986 additions and 149 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -54,6 +54,10 @@ New features

 2. LUCENE-960: Added a SpanQueryFilter and related classes to allow for not only filtering, but knowing where in a Document a Filter matches (Grant Ingersoll)

+ 3. LUCENE-868: Added new Term Vector access features.  New callback mechanism allows application to define how and where to read Term Vectors from disk.
+    This implementation contains several extensions of the new abstract TermVectorMapper class.  The new API should be back-compatible.  No changes in the
+     actual storage of Term Vectors has taken place.
+
 Optimizations

 1. LUCENE-937: CachingTokenFilter now uses an iterator to access the 
--- a/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@ -17,6 +17,16 @@ package org.apache.lucene.index.memory;
 * limitations under the License.
 */

+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
@ -30,22 +40,13 @@ import org.apache.lucene.index.TermEnum;
 import org.apache.lucene.index.TermFreqVector;
 import org.apache.lucene.index.TermPositionVector;
 import org.apache.lucene.index.TermPositions;
+import org.apache.lucene.index.TermVectorMapper;
 import org.apache.lucene.search.HitCollector;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Searcher;
 import org.apache.lucene.search.Similarity;

-import java.io.IOException;
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-
 /**
 * High-performance single-document main memory Apache Lucene fulltext search index. 
 * 
@ -935,8 +936,47 @@ public class MemoryIndex {
      }
      return vectors;
    }
-    
-    public TermFreqVector getTermFreqVector(int docNumber, final String fieldName) {
+
+      public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException
+      {
+          if (DEBUG) System.err.println("MemoryIndexReader.getTermFreqVectors");
+
+    //      if (vectors.length == 0) return null;
+          for (Iterator iterator = fields.keySet().iterator(); iterator.hasNext();)
+          {
+            String fieldName = (String) iterator.next();
+            getTermFreqVector(docNumber, fieldName, mapper);
+          }
+      }
+
+      public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException
+      {
+        if (DEBUG) System.err.println("MemoryIndexReader.getTermFreqVector");
+        final Info info = getInfo(field);
+          if (info == null){
+              return;
+          }
+          info.sortTerms();
+          mapper.setExpectations(field, info.sortedTerms.length, stride != 1, true);
+          for (int i = info.sortedTerms.length; --i >=0;){
+
+              ArrayIntList positions = (ArrayIntList) info.sortedTerms[i].getValue();
+              int size = positions.size();
+              org.apache.lucene.index.TermVectorOffsetInfo[] offsets =
+                new org.apache.lucene.index.TermVectorOffsetInfo[size / stride];
+
+              for (int k=0, j=1; j < size; k++, j += stride) {
+                int start = positions.get(j);
+                int end = positions.get(j+1);
+                offsets[k] = new org.apache.lucene.index.TermVectorOffsetInfo(start, end);
+              }
+              mapper.map((String)info.sortedTerms[i].getKey(),
+                         numPositions((ArrayIntList) info.sortedTerms[i].getValue()),
+                         offsets, ((ArrayIntList) info.sortedTerms[i].getValue()).toArray(stride));
+          }
+      }
+
+      public TermFreqVector getTermFreqVector(int docNumber, final String fieldName) {
      if (DEBUG) System.err.println("MemoryIndexReader.getTermFreqVector");
      final Info info = getInfo(fieldName);
      if (info == null) return null; // TODO: or return empty vector impl???
--- a/src/java/org/apache/lucene/index/FieldSortedTermVectorMapper.java
+++ b/src/java/org/apache/lucene/index/FieldSortedTermVectorMapper.java
@ -0,0 +1,70 @@
+package org.apache.lucene.index;
+
+import java.util.*;
+
+/**
+ * Copyright 2007 The Apache Software Foundation
+ * <p/>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * For each Field, store a sorted collection of {@link TermVectorEntry}s
+ * <p/>
+ * This is not thread-safe.
+ */
+public class FieldSortedTermVectorMapper extends TermVectorMapper{
+  private Map fieldToTerms = new HashMap();
+  private SortedSet currentSet;
+  private String currentField;
+  private Comparator comparator;
+
+  /**
+   *
+   * @param comparator A Comparator for sorting {@link TermVectorEntry}s
+   */
+  public FieldSortedTermVectorMapper(Comparator comparator) {
+    this(false, false, comparator);
+  }
+
+
+  public FieldSortedTermVectorMapper(boolean ignoringPositions, boolean ignoringOffsets, Comparator comparator) {
+    super(ignoringPositions, ignoringOffsets);
+    this.comparator = comparator;
+  }
+
+  public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+    TermVectorEntry entry = new TermVectorEntry(currentField, term, frequency, offsets, positions);
+    currentSet.add(entry);
+  }
+
+  public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) {
+    currentSet = new TreeSet(comparator);
+    currentField = field;
+    fieldToTerms.put(field, currentSet);
+  }
+
+  /**
+   * Get the mapping between fields and terms, sorted by the comparator
+   *
+   * @return A map between field names and {@link java.util.SortedSet}s per field.  SortedSet entries are {@link TermVectorEntry}
+   */
+  public Map getFieldToTerms() {
+    return fieldToTerms;
+  }
+
+
+  public Comparator getComparator() {
+    return comparator;
+  }
+}
--- a/src/java/org/apache/lucene/index/FilterIndexReader.java
+++ b/src/java/org/apache/lucene/index/FilterIndexReader.java
@ -115,6 +115,18 @@ public class FilterIndexReader extends IndexReader {
    return in.getTermFreqVector(docNumber, field);
  }

+
+  public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
+    ensureOpen();
+    in.getTermFreqVector(docNumber, field, mapper);
+
+  }
+
+  public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
+    ensureOpen();
+    in.getTermFreqVector(docNumber, mapper);
+  }
+
  public int numDocs() {
    // Don't call ensureOpen() here (it could affect performance)
    return in.numDocs();
--- a/src/java/org/apache/lucene/index/IndexReader.java
+++ b/src/java/org/apache/lucene/index/IndexReader.java
@ -20,12 +20,7 @@ package org.apache.lucene.index;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldSelector;
 import org.apache.lucene.search.Similarity;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.Lock;
-import org.apache.lucene.store.LockObtainFailedException;
-import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.store.*;

 import java.io.File;
 import java.io.FileOutputStream;
@ -385,6 +380,25 @@ public abstract class IndexReader {
  abstract public TermFreqVector getTermFreqVector(int docNumber, String field)
          throws IOException;

+  /**
+   * Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of
+   * the {@link TermFreqVector}.
+   * @param docNumber The number of the document to load the vector for
+   * @param field The name of the field to load
+   * @param mapper The {@link TermVectorMapper} to process the vector.  Must not be null
+   * @throws IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified.
+   * 
+   */
+  abstract public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException;
+
+  /**
+   * Map all the term vectors for all fields in a Document
+   * @param docNumber The number of the document to load the vector for
+   * @param mapper The {@link TermVectorMapper} to process the vector.  Must not be null
+   * @throws IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified.
+   */
+  abstract public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException;
+
  /**
   * Returns <code>true</code> if an index exists at the specified directory.
   * If the directory does not exist or if there is no index in it.
--- a/src/java/org/apache/lucene/index/MultiReader.java
+++ b/src/java/org/apache/lucene/index/MultiReader.java
@ -85,6 +85,19 @@ public class MultiReader extends IndexReader {
    return subReaders[i].getTermFreqVector(n - starts[i], field);
  }

+
+  public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
+    ensureOpen();
+    int i = readerIndex(docNumber);        // find segment num
+    subReaders[i].getTermFreqVector(docNumber - starts[i], field, mapper);
+  }
+
+  public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
+    ensureOpen();
+    int i = readerIndex(docNumber);        // find segment num
+    subReaders[i].getTermFreqVector(docNumber - starts[i], mapper);
+  }
+
  public synchronized int numDocs() {
    // Don't call ensureOpen() here (it could affect performance)
    if (numDocs == -1) {        // check cache
--- a/src/java/org/apache/lucene/index/ParallelReader.java
+++ b/src/java/org/apache/lucene/index/ParallelReader.java
@ -194,6 +194,29 @@ public class ParallelReader extends IndexReader {
    return reader==null ? null : reader.getTermFreqVector(n, field);
  }

+
+  public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
+    ensureOpen();
+    IndexReader reader = ((IndexReader)fieldToReader.get(field));
+    if (reader != null) {
+      reader.getTermFreqVector(docNumber, field, mapper); 
+    }
+  }
+
+  public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
+    ensureOpen();
+    ensureOpen();
+
+    Iterator i = fieldToReader.entrySet().iterator();
+    while (i.hasNext()) {
+      Map.Entry e = (Map.Entry)i.next();
+      String field = (String)e.getKey();
+      IndexReader reader = (IndexReader)e.getValue();
+      reader.getTermFreqVector(docNumber, field, mapper);
+    }
+
+  }
+
  public boolean hasNorms(String field) throws IOException {
    ensureOpen();
    IndexReader reader = ((IndexReader)fieldToReader.get(field));
--- a/src/java/org/apache/lucene/index/SegmentReader.java
+++ b/src/java/org/apache/lucene/index/SegmentReader.java
@ -20,10 +20,10 @@ package org.apache.lucene.index;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldSelector;
 import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.store.BufferedIndexInput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.BufferedIndexInput;
 import org.apache.lucene.util.BitVector;

 import java.io.IOException;
@ -643,6 +643,35 @@ class SegmentReader extends IndexReader {
  }


+  public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
+    ensureOpen();
+    FieldInfo fi = fieldInfos.fieldInfo(field);
+    if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
+      throw new IOException("field does not contain term vectors");
+
+    TermVectorsReader termVectorsReader = getTermVectorsReader();
+    if (termVectorsReader == null)
+    {
+      throw new IOException("Cannot open a reader for the term vectors");
+    }
+
+
+    termVectorsReader.get(docNumber, field, mapper);
+  }
+
+
+  public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
+    ensureOpen();
+    if (termVectorsReaderOrig == null)
+      return;
+
+    TermVectorsReader termVectorsReader = getTermVectorsReader();
+    if (termVectorsReader == null)
+      return;
+
+    termVectorsReader.get(docNumber, mapper);
+  }
+
  /** Return an array of term frequency vectors for the specified document.
   *  The array contains a vector for each vectorized field in the document.
   *  Each vector vector contains term numbers and frequencies for all terms
--- a/src/java/org/apache/lucene/index/SortedTermVectorMapper.java
+++ b/src/java/org/apache/lucene/index/SortedTermVectorMapper.java
@ -0,0 +1,129 @@
+package org.apache.lucene.index;
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.*;
+
+/**
+ * Store a sorted collection of {@link org.apache.lucene.index.TermVectorEntry}s.  Collects all term information
+ * into a single, SortedSet.
+ * <br/>
+ * NOTE: This Mapper ignores all Field information for the Document.  This means that if you are using offset/positions you will not
+ * know what Fields they correlate with.
+ *  <br/>
+ * This is not thread-safe  
+ */
+public class SortedTermVectorMapper extends TermVectorMapper{
+
+
+  private SortedSet currentSet;
+  private Map termToTVE = new HashMap();
+  private boolean storeOffsets;
+  private boolean storePositions;
+  /**
+   * Stand-in name for the field in {@link TermVectorEntry}.
+   */
+  public static final String ALL = "_ALL_";
+
+  /**
+   *
+   * @param comparator A Comparator for sorting {@link TermVectorEntry}s
+   */
+  public SortedTermVectorMapper(Comparator comparator) {
+    this(false, false, comparator);
+  }
+
+
+  public SortedTermVectorMapper(boolean ignoringPositions, boolean ignoringOffsets, Comparator comparator) {
+    super(ignoringPositions, ignoringOffsets);
+    currentSet = new TreeSet(comparator);
+  }
+
+  /**
+   *
+   * @param term The term to map
+   * @param frequency The frequency of the term
+   * @param offsets Offset information, may be null
+   * @param positions Position information, may be null
+   */
+  //We need to combine any previous mentions of the term
+  public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+    TermVectorEntry entry = (TermVectorEntry) termToTVE.get(term);
+    if (entry == null) {
+      entry = new TermVectorEntry(ALL, term, frequency, 
+              storeOffsets == true ? offsets : null,
+              storePositions == true ? positions : null);
+      termToTVE.put(term, entry);
+      currentSet.add(entry);
+    } else {
+      entry.setFrequency(entry.getFrequency() + frequency);
+      if (storeOffsets)
+      {
+        TermVectorOffsetInfo [] existingOffsets = entry.getOffsets();
+        //A few diff. cases here:  offsets is null, existing offsets is null, both are null, same for positions
+        if (existingOffsets != null && offsets != null && offsets.length > 0)
+        {
+          //copy over the existing offsets
+          TermVectorOffsetInfo [] newOffsets = new TermVectorOffsetInfo[existingOffsets.length + offsets.length];
+          System.arraycopy(existingOffsets, 0, newOffsets, 0, existingOffsets.length);
+          System.arraycopy(offsets, 0, newOffsets, existingOffsets.length, offsets.length);
+          entry.setOffsets(newOffsets);
+        }
+        else if (existingOffsets == null && offsets != null && offsets.length > 0)
+        {
+          entry.setOffsets(offsets);
+        }
+        //else leave it alone
+      }
+      if (storePositions)
+      {
+        int [] existingPositions = entry.getPositions();
+        if (existingPositions != null && positions != null && positions.length > 0)
+        {
+          int [] newPositions = new int[existingPositions.length + positions.length];
+          System.arraycopy(existingPositions, 0, newPositions, 0, existingPositions.length);
+          System.arraycopy(positions, 0, newPositions, existingPositions.length, positions.length);
+          entry.setPositions(newPositions);
+        }
+        else if (existingPositions == null && positions != null && positions.length > 0)
+        {
+          entry.setPositions(positions);
+        }
+      }
+    }
+
+
+  }
+
+  public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) {
+
+    this.storeOffsets = storeOffsets;
+    this.storePositions = storePositions;
+  }
+
+  /**
+   * The TermVectorEntrySet.  A SortedSet of {@link TermVectorEntry} objects.  Sort is by the comparator passed into the constructor.
+   *<br/>
+   * This set will be empty until after the mapping process takes place.
+   *
+   * @return The SortedSet of {@link TermVectorEntry}.
+   */
+  public SortedSet getTermVectorEntrySet()
+  {
+    return currentSet;
+  }
+
+}
--- a/src/java/org/apache/lucene/index/TermVectorEntry.java
+++ b/src/java/org/apache/lucene/index/TermVectorEntry.java
@ -0,0 +1,98 @@
+package org.apache.lucene.index;
+
+/**
+ * Copyright 2007 The Apache Software Foundation
+ * <p/>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Convenience class for holding TermVector information.
+ */
+public class TermVectorEntry {
+  private String field;
+  private String term;
+  private int frequency;
+  private TermVectorOffsetInfo [] offsets;
+  int [] positions;
+
+
+  public TermVectorEntry() {
+  }
+
+  public TermVectorEntry(String field, String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+    this.field = field;
+    this.term = term;
+    this.frequency = frequency;
+    this.offsets = offsets;
+    this.positions = positions;
+  }
+
+
+  public String getField() {
+    return field;
+  }
+
+  public int getFrequency() {
+    return frequency;
+  }
+
+  public TermVectorOffsetInfo[] getOffsets() {
+    return offsets;
+  }
+
+  public int[] getPositions() {
+    return positions;
+  }
+
+  public String getTerm() {
+    return term;
+  }
+
+  //Keep package local
+  void setFrequency(int frequency) {
+    this.frequency = frequency;
+  }
+
+  void setOffsets(TermVectorOffsetInfo[] offsets) {
+    this.offsets = offsets;
+  }
+
+  void setPositions(int[] positions) {
+    this.positions = positions;
+  }
+
+
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (o == null || getClass() != o.getClass()) return false;
+
+    TermVectorEntry that = (TermVectorEntry) o;
+
+    if (term != null ? !term.equals(that.term) : that.term != null) return false;
+
+    return true;
+  }
+
+  public int hashCode() {
+    return (term != null ? term.hashCode() : 0);
+  }
+
+  public String toString() {
+    return "TermVectorEntry{" +
+            "field='" + field + '\'' +
+            ", term='" + term + '\'' +
+            ", frequency=" + frequency +
+            '}';
+  }
+}
--- a/src/java/org/apache/lucene/index/TermVectorEntryFreqSortedComparator.java
+++ b/src/java/org/apache/lucene/index/TermVectorEntryFreqSortedComparator.java
@ -0,0 +1,42 @@
+package org.apache.lucene.index;
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import java.util.Comparator;
+
+/**
+ * Compares {@link org.apache.lucene.index.TermVectorEntry}s first by frequency and then by
+ * the term (case-sensitive)
+ *
+ **/
+public class TermVectorEntryFreqSortedComparator implements Comparator {
+  public int compare(Object object, Object object1) {
+    int result = 0;
+    TermVectorEntry entry = (TermVectorEntry) object;
+    TermVectorEntry entry1 = (TermVectorEntry) object1;
+    result = entry1.getFrequency() - entry.getFrequency();
+    if (result == 0)
+    {
+      result = entry.getTerm().compareTo(entry1.getTerm());
+      if (result == 0)
+      {
+        result = entry.getField().compareTo(entry1.getField());
+      }
+    }
+    return result;
+  }
+}
--- a/src/java/org/apache/lucene/index/TermVectorMapper.java
+++ b/src/java/org/apache/lucene/index/TermVectorMapper.java
@ -0,0 +1,88 @@
+package org.apache.lucene.index;
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ * The TermVectorMapper can be used to map Term Vectors into your own
+ * structure instead of the parallel array structure used by
+ * {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
+ * <p/>
+ * It is up to the implementation to make sure it is thread-safe.
+ *
+ *
+ **/
+public abstract class TermVectorMapper {
+
+  private boolean ignoringPositions;
+  private boolean ignoringOffsets;
+
+
+  protected TermVectorMapper() {
+  }
+
+  /**
+   *
+   * @param ignoringPositions true if this mapper should tell Lucene to ignore positions even if they are stored
+   * @param ignoringOffsets similar to ignoringPositions
+   */
+  protected TermVectorMapper(boolean ignoringPositions, boolean ignoringOffsets) {
+    this.ignoringPositions = ignoringPositions;
+    this.ignoringOffsets = ignoringOffsets;
+  }
+
+  /**
+   * Tell the mapper what to expect in regards to field, number of terms, offset and position storage.
+   * This method will be called once before retrieving the vector for a field.
+   *
+   * This method will be called before {@link #map(String,int,TermVectorOffsetInfo[],int[])}.
+   * @param field The field the vector is for
+   * @param numTerms The number of terms that need to be mapped
+   * @param storeOffsets true if the mapper should expect offset information
+   * @param storePositions true if the mapper should expect positions info
+   */
+  public abstract void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions);
+  /**
+   * Map the Term Vector information into your own structure
+   * @param term The term to add to the vector
+   * @param frequency The frequency of the term in the document
+   * @param offsets null if the offset is not specified, otherwise the offset into the field of the term
+   * @param positions null if the position is not specified, otherwise the position in the field of the term
+   */
+  public abstract void map(String term, int frequency, TermVectorOffsetInfo [] offsets, int [] positions);
+
+  /**
+   * Indicate to Lucene that even if there are positions stored, this mapper is not interested in them and they
+   * can be skipped over.  Derived classes should set this to true if they want to ignore positions.  The default
+   * is false, meaning positions will be loaded if they are stored.
+   * @return false
+   */
+  public boolean isIgnoringPositions()
+  {
+    return ignoringPositions;
+  }
+
+  /**
+   *
+   * @see #isIgnoringPositions() Same principal as {@link #isIgnoringPositions()}, but applied to offsets.  false by default.
+   * @return false
+   */
+  public boolean isIgnoringOffsets()
+  {
+    return ignoringOffsets;
+  }
+
+}
--- a/src/java/org/apache/lucene/index/TermVectorsReader.java
+++ b/src/java/org/apache/lucene/index/TermVectorsReader.java
@ -17,9 +17,9 @@ package org.apache.lucene.index;
 * limitations under the License.
 */

+import org.apache.lucene.store.BufferedIndexInput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.BufferedIndexInput;

 import java.io.IOException;

@ -104,18 +104,9 @@ class TermVectorsReader implements Cloneable {
    return size;
  }

-  /**
-   * Retrieve the term vector for the given document and field
-   * @param docNum The document number to retrieve the vector for
-   * @param field The field within the document to retrieve
-   * @return The TermFreqVector for the document and field or null if there is no termVector for this field.
-   * @throws IOException if there is an error reading the term vector files
-   */ 
-  TermFreqVector get(int docNum, String field) throws IOException {
-    // Check if no term vectors are available for this segment at all
-    int fieldNumber = fieldInfos.fieldNumber(field);
-    TermFreqVector result = null;
+  public void get(int docNum, String field, TermVectorMapper mapper) throws IOException {
    if (tvx != null) {
+      int fieldNumber = fieldInfos.fieldNumber(field);
      //We need to account for the FORMAT_SIZE at when seeking in the tvx
      //We don't need to do this in other seeks because we already have the
      // file pointer
@ -137,7 +128,7 @@ class TermVectorsReader implements Cloneable {
          number = tvd.readVInt();
        else
          number += tvd.readVInt();
-        
+
        if (number == fieldNumber)
          found = i;
      }
@ -150,14 +141,30 @@ class TermVectorsReader implements Cloneable {
        for (int i = 0; i <= found; i++)
          position += tvd.readVLong();

-        result = readTermVector(field, position);
+        readTermVector(field, position, mapper);
      } else {
        //System.out.println("Fieldable not found");
      }
    } else {
      //System.out.println("No tvx file");
    }
-    return result;
+  }
+
+
+
+  /**
+   * Retrieve the term vector for the given document and field
+   * @param docNum The document number to retrieve the vector for
+   * @param field The field within the document to retrieve
+   * @return The TermFreqVector for the document and field or null if there is no termVector for this field.
+   * @throws IOException if there is an error reading the term vector files
+   */ 
+  TermFreqVector get(int docNum, String field) throws IOException {
+    // Check if no term vectors are available for this segment at all
+    ParallelArrayTermVectorMapper mapper = new ParallelArrayTermVectorMapper();
+    get(docNum, field, mapper);
+
+    return mapper.materializeVector();
  }

  /**
@ -169,7 +176,6 @@ class TermVectorsReader implements Cloneable {
   */
  TermFreqVector[] get(int docNum) throws IOException {
    TermFreqVector[] result = null;
-    // Check if no term vectors are available for this segment at all
    if (tvx != null) {
      //We need to offset by
      tvx.seek(((docNum + docStoreOffset) * 8L) + TermVectorsWriter.FORMAT_SIZE);
@ -182,7 +188,7 @@ class TermVectorsReader implements Cloneable {
      if (fieldCount != 0) {
        int number = 0;
        String[] fields = new String[fieldCount];
-        
+
        for (int i = 0; i < fieldCount; i++) {
          if(tvdFormat == TermVectorsWriter.FORMAT_VERSION)
            number = tvd.readVInt();
@ -208,24 +214,76 @@ class TermVectorsReader implements Cloneable {
    return result;
  }

+  public void get(int docNumber, TermVectorMapper mapper) throws IOException {
+    // Check if no term vectors are available for this segment at all
+    if (tvx != null) {
+      //We need to offset by
+      tvx.seek((docNumber * 8L) + TermVectorsWriter.FORMAT_SIZE);
+      long position = tvx.readLong();
+
+      tvd.seek(position);
+      int fieldCount = tvd.readVInt();
+
+      // No fields are vectorized for this document
+      if (fieldCount != 0) {
+        int number = 0;
+        String[] fields = new String[fieldCount];
+
+        for (int i = 0; i < fieldCount; i++) {
+          if(tvdFormat == TermVectorsWriter.FORMAT_VERSION)
+            number = tvd.readVInt();
+          else
+            number += tvd.readVInt();
+
+          fields[i] = fieldInfos.fieldName(number);
+        }
+
+        // Compute position in the tvf file
+        position = 0;
+        long[] tvfPointers = new long[fieldCount];
+        for (int i = 0; i < fieldCount; i++) {
+          position += tvd.readVLong();
+          tvfPointers[i] = position;
+        }
+
+        readTermVectors(fields, tvfPointers, mapper);
+      }
+    } else {
+      //System.out.println("No tvx file");
+    }
+  }
+

  private SegmentTermVector[] readTermVectors(String fields[], long tvfPointers[])
          throws IOException {
    SegmentTermVector res[] = new SegmentTermVector[fields.length];
    for (int i = 0; i < fields.length; i++) {
-      res[i] = readTermVector(fields[i], tvfPointers[i]);
+      ParallelArrayTermVectorMapper mapper = new ParallelArrayTermVectorMapper();
+       readTermVector(fields[i], tvfPointers[i], mapper);
+      res[i] = (SegmentTermVector) mapper.materializeVector();
    }
    return res;
  }

+  private void readTermVectors(String fields[], long tvfPointers[], TermVectorMapper mapper)
+          throws IOException {
+    for (int i = 0; i < fields.length; i++) {
+       readTermVector(fields[i], tvfPointers[i], mapper);
+    }
+
+  }
+
+
  /**
   * 
   * @param field The field to read in
   * @param tvfPointer The pointer within the tvf file where we should start reading
+   * @param mapper The mapper used to map the TermVector
   * @return The TermVector located at that position
   * @throws IOException
+
   */ 
-  private SegmentTermVector readTermVector(String field, long tvfPointer)
+  private void readTermVector(String field, long tvfPointer, TermVectorMapper mapper)
          throws IOException {

    // Now read the data from specified position
@ -236,7 +294,7 @@ class TermVectorsReader implements Cloneable {
    //System.out.println("Num Terms: " + numTerms);
    // If no terms - return a constant empty termvector. However, this should never occur!
    if (numTerms == 0) 
-      return new SegmentTermVector(field, null, null);
+      return;
    
    boolean storePositions;
    boolean storeOffsets;
@ -251,18 +309,7 @@ class TermVectorsReader implements Cloneable {
      storePositions = false;
      storeOffsets = false;
    }
-
-    String terms[] = new String[numTerms];
-    int termFreqs[] = new int[numTerms];
-    
-    //  we may not need these, but declare them
-    int positions[][] = null;
-    TermVectorOffsetInfo offsets[][] = null;
-    if(storePositions)
-      positions = new int[numTerms][];
-    if(storeOffsets)
-      offsets = new TermVectorOffsetInfo[numTerms][];
-    
+    mapper.setExpectations(field, numTerms, storeOffsets, storePositions);
    int start = 0;
    int deltaLength = 0;
    int totalLength = 0;
@ -282,45 +329,54 @@ class TermVectorsReader implements Cloneable {
      }
      
      tvf.readChars(buffer, start, deltaLength);
-      terms[i] = new String(buffer, 0, totalLength);
+      String term = new String(buffer, 0, totalLength);
      previousBuffer = buffer;
      int freq = tvf.readVInt();
-      termFreqs[i] = freq;
-      
+      int [] positions = null;
      if (storePositions) { //read in the positions
-        int [] pos = new int[freq];
-        positions[i] = pos;
-        int prevPosition = 0;
-        for (int j = 0; j < freq; j++)
-        {
-          pos[j] = prevPosition + tvf.readVInt();
-          prevPosition = pos[j];
+        //does the mapper even care about positions?
+        if (mapper.isIgnoringPositions() == false) {
+          positions = new int[freq];
+          int prevPosition = 0;
+          for (int j = 0; j < freq; j++)
+          {
+            positions[j] = prevPosition + tvf.readVInt();
+            prevPosition = positions[j];
+          }
+        } else {
+          //we need to skip over the positions.  Since these are VInts, I don't believe there is anyway to know for sure how far to skip
+          //
+          for (int j = 0; j < freq; j++)
+          {
+            tvf.readVInt();
+          }
        }
      }
-      
+      TermVectorOffsetInfo[] offsets = null;
      if (storeOffsets) {
-        TermVectorOffsetInfo[] offs = new TermVectorOffsetInfo[freq];
-        offsets[i] = offs;
-        int prevOffset = 0;
-        for (int j = 0; j < freq; j++) {
-          int startOffset = prevOffset + tvf.readVInt();
-          int endOffset = startOffset + tvf.readVInt();
-          offs[j] = new TermVectorOffsetInfo(startOffset, endOffset);
-          prevOffset = endOffset;
+        //does the mapper even care about offsets?
+        if (mapper.isIgnoringOffsets() == false) {
+          offsets = new TermVectorOffsetInfo[freq];
+          int prevOffset = 0;
+          for (int j = 0; j < freq; j++) {
+            int startOffset = prevOffset + tvf.readVInt();
+            int endOffset = startOffset + tvf.readVInt();
+            offsets[j] = new TermVectorOffsetInfo(startOffset, endOffset);
+            prevOffset = endOffset;
+          }
+        } else {
+          for (int j = 0; j < freq; j++){
+            tvf.readVInt();
+            tvf.readVInt();
+          }
        }
      }
+      mapper.map(term, freq, offsets, positions);
    }
-    
-    SegmentTermVector tv;
-    if (storePositions || storeOffsets){
-      tv = new SegmentTermPositionVector(field, terms, termFreqs, positions, offsets);
-    }
-    else {
-      tv = new SegmentTermVector(field, terms, termFreqs);
-    }
-    return tv;
  }

+
+
  protected Object clone() {
    
    if (tvx == null || tvd == null || tvf == null)
@ -337,4 +393,67 @@ class TermVectorsReader implements Cloneable {
    
    return clone;
  }
+
+
+
 }
+
+/**
+ * Models the existing parallel array structure
+ */
+class ParallelArrayTermVectorMapper extends TermVectorMapper
+{
+
+  private int numTerms;
+  private String[] terms;
+  private int[] termFreqs;
+  private int positions[][] = null;
+  private TermVectorOffsetInfo offsets[][] = null;
+  private int currentPosition;
+  private boolean storingOffsets;
+  private boolean storingPositions;
+  private String field;
+
+  public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) {
+    this.numTerms = numTerms;
+    this.field = field;
+    terms = new String[numTerms];
+    termFreqs = new int[numTerms];
+    this.storingOffsets = storeOffsets;
+    this.storingPositions = storePositions;
+    if(storePositions)
+      this.positions = new int[numTerms][];
+    if(storeOffsets)
+      this.offsets = new TermVectorOffsetInfo[numTerms][];
+  }
+
+  public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+    terms[currentPosition] = term;
+    termFreqs[currentPosition] = frequency;
+    if (storingOffsets)
+    {
+      this.offsets[currentPosition] = offsets;
+    }
+    if (storingPositions)
+    {
+      this.positions[currentPosition] = positions; 
+    }
+    currentPosition++;
+  }
+
+  /**
+   * Construct the vector
+   * @return
+   */
+  public TermFreqVector materializeVector() {
+    SegmentTermVector tv = null;
+    if (field != null && terms != null) {
+      if (storingPositions || storingOffsets) {
+        tv = new SegmentTermPositionVector(field, terms, termFreqs, positions, offsets);
+      } else {
+        tv = new SegmentTermVector(field, terms, termFreqs);
+      }
+    }
+    return tv;
+  }
+}
--- a/src/test/org/apache/lucene/index/TestIndexReader.java
+++ b/src/test/org/apache/lucene/index/TestIndexReader.java
@ -21,29 +21,20 @@ package org.apache.lucene.index;
 import junit.framework.TestCase;
 import junit.framework.TestSuite;
 import junit.textui.TestRunner;
-
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.store.LockObtainFailedException;
-import org.apache.lucene.store.AlreadyClosedException;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
-
-import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.*;
 import org.apache.lucene.util._TestUtil;

-import java.util.Collection;
-import java.util.Arrays;
-import java.io.IOException;
-import java.io.FileNotFoundException;
 import java.io.File;
-
-import org.apache.lucene.store.MockRAMDirectory;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.*;

 public class TestIndexReader extends TestCase
 {
@ -180,8 +171,43 @@ public class TestIndexReader extends TestCase
        d.close();
    }

+  public void testTermVectors() throws Exception {
+    RAMDirectory d = new MockRAMDirectory();
+    // set up writer
+    IndexWriter writer = new IndexWriter(d, new StandardAnalyzer(), true);
+    // want to get some more segments here
+    // new termvector fields
+    for (int i = 0; i < 5 * writer.getMergeFactor(); i++) {
+      Document doc = new Document();
+        doc.add(new Field("tvnot","one two two three three three", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
+        doc.add(new Field("termvector","one two two three three three", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
+        doc.add(new Field("tvoffset","one two two three three three", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_OFFSETS));
+        doc.add(new Field("tvposition","one two two three three three", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS));
+        doc.add(new Field("tvpositionoffset","one two two three three three", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));

-    private void assertTermDocsCount(String msg,
+        writer.addDocument(doc);
+    }
+    writer.close();
+    IndexReader reader = IndexReader.open(d);
+    FieldSortedTermVectorMapper mapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
+    reader.getTermFreqVector(0, mapper);
+    Map map = mapper.getFieldToTerms();
+    assertTrue("map is null and it shouldn't be", map != null);
+    assertTrue("map Size: " + map.size() + " is not: " + 4, map.size() == 4);
+    Set set = (Set) map.get("termvector");
+    for (Iterator iterator = set.iterator(); iterator.hasNext();) {
+      TermVectorEntry entry = (TermVectorEntry) iterator.next();
+      assertTrue("entry is null and it shouldn't be", entry != null);
+      System.out.println("Entry: " + entry);
+    }
+
+
+
+
+    
+  }
+
+  private void assertTermDocsCount(String msg,
                                     IndexReader reader,
                                     Term term,
                                     int expected)
--- a/src/test/org/apache/lucene/index/TestTermVectorsReader.java
+++ b/src/test/org/apache/lucene/index/TestTermVectorsReader.java
@ -22,16 +22,19 @@ import org.apache.lucene.store.RAMDirectory;

 import java.io.IOException;
 import java.util.Arrays;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.SortedSet;

 public class TestTermVectorsReader extends TestCase {
  private TermVectorsWriter writer = null;
  //Must be lexicographically sorted, will do in setup, versus trying to maintain here
-  private String [] testFields = {"f1", "f2", "f3"};
-  private boolean [] testFieldsStorePos = {true, false, true, false};
-  private boolean [] testFieldsStoreOff = {true, false, false, true};  
-  private String [] testTerms = {"this", "is", "a", "test"};
-  private int [][] positions = new int[testTerms.length][];
-  private TermVectorOffsetInfo [][] offsets = new TermVectorOffsetInfo[testTerms.length][];
+  private String[] testFields = {"f1", "f2", "f3", "f4"};
+  private boolean[] testFieldsStorePos = {true, false, true, false};
+  private boolean[] testFieldsStoreOff = {true, false, false, true};
+  private String[] testTerms = {"this", "is", "a", "test"};
+  private int[][] positions = new int[testTerms.length][];
+  private TermVectorOffsetInfo[][] offsets = new TermVectorOffsetInfo[testTerms.length][];
  private RAMDirectory dir = new RAMDirectory();
  private String seg = "testSegment";
  private FieldInfos fieldInfos = new FieldInfos();
@ -44,35 +47,37 @@ public class TestTermVectorsReader extends TestCase {
    for (int i = 0; i < testFields.length; i++) {
      fieldInfos.add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]);
    }
-    
-    for (int i = 0; i < testTerms.length; i++)
-    {
+
+    for (int i = 0; i < testTerms.length; i++) {
      positions[i] = new int[3];
      for (int j = 0; j < positions[i].length; j++) {
        // poditions are always sorted in increasing order
-        positions[i][j] = (int)(j * 10 + Math.random() * 10);
+        positions[i][j] = (int) (j * 10 + Math.random() * 10);
      }
      offsets[i] = new TermVectorOffsetInfo[3];
-      for (int j = 0; j < offsets[i].length; j++){
+      for (int j = 0; j < offsets[i].length; j++) {
        // ofsets are alway sorted in increasing order
        offsets[i][j] = new TermVectorOffsetInfo(j * 10, j * 10 + testTerms[i].length());
-      }        
+      }
    }
    Arrays.sort(testTerms);
+    //Create 5 documents for testing, they all have the same terms
+    writer = new TermVectorsWriter(dir, seg, fieldInfos);
    for (int j = 0; j < 5; j++) {
-      writer = new TermVectorsWriter(dir, seg, fieldInfos);
+
      writer.openDocument();

      for (int k = 0; k < testFields.length; k++) {
        writer.openField(testFields[k]);
        for (int i = 0; i < testTerms.length; i++) {
-          writer.addTerm(testTerms[i], 3, positions[i], offsets[i]);      
+          writer.addTerm(testTerms[i], 3, positions[i], offsets[i]);
        }
        writer.closeField();
      }
      writer.closeDocument();
-      writer.close();
+
    }
+    writer.close();
  }

  protected void tearDown() {
@ -80,34 +85,38 @@ public class TestTermVectorsReader extends TestCase {
  }

  public void test() {
-      //Check to see the files were created properly in setup
-      assertTrue(writer.isDocumentOpen() == false);          
-      assertTrue(dir.fileExists(seg + TermVectorsWriter.TVD_EXTENSION));
-      assertTrue(dir.fileExists(seg + TermVectorsWriter.TVX_EXTENSION));
+    //Check to see the files were created properly in setup
+    assertTrue(writer.isDocumentOpen() == false);
+    assertTrue(dir.fileExists(seg + TermVectorsWriter.TVD_EXTENSION));
+    assertTrue(dir.fileExists(seg + TermVectorsWriter.TVX_EXTENSION));
  }
-  
+
  public void testReader() throws IOException {
    TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
    assertTrue(reader != null);
-    TermFreqVector vector = reader.get(0, testFields[0]);
-    assertTrue(vector != null);
-    String [] terms = vector.getTerms();
-    assertTrue(terms != null);
-    assertTrue(terms.length == testTerms.length);
-    for (int i = 0; i < terms.length; i++) {
-      String term = terms[i];
-      //System.out.println("Term: " + term);
-      assertTrue(term.equals(testTerms[i]));
+    for (int j = 0; j < 5; j++) {
+      TermFreqVector vector = reader.get(j, testFields[0]);
+      assertTrue(vector != null);
+      String[] terms = vector.getTerms();
+      assertTrue(terms != null);
+      assertTrue(terms.length == testTerms.length);
+      for (int i = 0; i < terms.length; i++) {
+        String term = terms[i];
+        //System.out.println("Term: " + term);
+        assertTrue(term.equals(testTerms[i]));
+      }
    }
-  }  
-  
+
+
+  }
+
  public void testPositionReader() throws IOException {
    TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
    assertTrue(reader != null);
    TermPositionVector vector;
-    String [] terms;
-    vector = (TermPositionVector)reader.get(0, testFields[0]);
-    assertTrue(vector != null);      
+    String[] terms;
+    vector = (TermPositionVector) reader.get(0, testFields[0]);
+    assertTrue(vector != null);
    terms = vector.getTerms();
    assertTrue(terms != null);
    assertTrue(terms.length == testTerms.length);
@ -115,14 +124,14 @@ public class TestTermVectorsReader extends TestCase {
      String term = terms[i];
      //System.out.println("Term: " + term);
      assertTrue(term.equals(testTerms[i]));
-      int [] positions = vector.getTermPositions(i);
+      int[] positions = vector.getTermPositions(i);
      assertTrue(positions != null);
      assertTrue(positions.length == this.positions[i].length);
      for (int j = 0; j < positions.length; j++) {
        int position = positions[j];
        assertTrue(position == this.positions[i][j]);
      }
-      TermVectorOffsetInfo [] offset = vector.getOffsets(i);
+      TermVectorOffsetInfo[] offset = vector.getOffsets(i);
      assertTrue(offset != null);
      assertTrue(offset.length == this.offsets[i].length);
      for (int j = 0; j < offset.length; j++) {
@ -130,9 +139,9 @@ public class TestTermVectorsReader extends TestCase {
        assertTrue(termVectorOffsetInfo.equals(offsets[i][j]));
      }
    }
-    
+
    TermFreqVector freqVector = reader.get(0, testFields[1]); //no pos, no offset
-    assertTrue(freqVector != null);      
+    assertTrue(freqVector != null);
    assertTrue(freqVector instanceof TermPositionVector == false);
    terms = freqVector.getTerms();
    assertTrue(terms != null);
@ -140,30 +149,30 @@ public class TestTermVectorsReader extends TestCase {
    for (int i = 0; i < terms.length; i++) {
      String term = terms[i];
      //System.out.println("Term: " + term);
-      assertTrue(term.equals(testTerms[i]));        
+      assertTrue(term.equals(testTerms[i]));
    }
  }
-  
+
  public void testOffsetReader() throws IOException {
    TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
    assertTrue(reader != null);
-    TermPositionVector vector = (TermPositionVector)reader.get(0, testFields[0]);
+    TermPositionVector vector = (TermPositionVector) reader.get(0, testFields[0]);
    assertTrue(vector != null);
-    String [] terms = vector.getTerms();
+    String[] terms = vector.getTerms();
    assertTrue(terms != null);
    assertTrue(terms.length == testTerms.length);
    for (int i = 0; i < terms.length; i++) {
      String term = terms[i];
      //System.out.println("Term: " + term);
      assertTrue(term.equals(testTerms[i]));
-      int [] positions = vector.getTermPositions(i);
+      int[] positions = vector.getTermPositions(i);
      assertTrue(positions != null);
      assertTrue(positions.length == this.positions[i].length);
      for (int j = 0; j < positions.length; j++) {
        int position = positions[j];
        assertTrue(position == this.positions[i][j]);
      }
-      TermVectorOffsetInfo [] offset = vector.getOffsets(i);
+      TermVectorOffsetInfo[] offset = vector.getOffsets(i);
      assertTrue(offset != null);
      assertTrue(offset.length == this.offsets[i].length);
      for (int j = 0; j < offset.length; j++) {
@ -172,18 +181,112 @@ public class TestTermVectorsReader extends TestCase {
      }
    }
  }
-  
+
+  public void testMapper() throws IOException {
+    TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
+    assertTrue(reader != null);
+    SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
+    reader.get(0, mapper);
+    SortedSet set = mapper.getTermVectorEntrySet();
+    assertTrue("set is null and it shouldn't be", set != null);
+    //three fields, 4 terms, all terms are the same
+    assertTrue("set Size: " + set.size() + " is not: " + 4, set.size() == 4);
+    //Check offsets and positions
+    for (Iterator iterator = set.iterator(); iterator.hasNext();) {
+      TermVectorEntry tve = (TermVectorEntry) iterator.next();
+      assertTrue("tve is null and it shouldn't be", tve != null);
+      assertTrue("tve.getOffsets() is null and it shouldn't be", tve.getOffsets() != null);
+      assertTrue("tve.getPositions() is null and it shouldn't be", tve.getPositions() != null);
+
+    }
+
+    mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
+    reader.get(1, mapper);
+    set = mapper.getTermVectorEntrySet();
+    assertTrue("set is null and it shouldn't be", set != null);
+    //three fields, 4 terms, all terms are the same
+    assertTrue("set Size: " + set.size() + " is not: " + 4, set.size() == 4);
+    //Should have offsets and positions b/c we are munging all the fields together
+    for (Iterator iterator = set.iterator(); iterator.hasNext();) {
+      TermVectorEntry tve = (TermVectorEntry) iterator.next();
+      assertTrue("tve is null and it shouldn't be", tve != null);
+      assertTrue("tve.getOffsets() is null and it shouldn't be", tve.getOffsets() != null);
+      assertTrue("tve.getPositions() is null and it shouldn't be", tve.getPositions() != null);
+
+    }
+
+
+    FieldSortedTermVectorMapper fsMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
+    reader.get(0, fsMapper);
+    Map map = fsMapper.getFieldToTerms();
+    assertTrue("map Size: " + map.size() + " is not: " + testFields.length, map.size() == testFields.length);
+    for (Iterator iterator = map.entrySet().iterator(); iterator.hasNext();) {
+      Map.Entry entry = (Map.Entry) iterator.next();
+      SortedSet sortedSet = (SortedSet) entry.getValue();
+      assertTrue("sortedSet Size: " + sortedSet.size() + " is not: " + 4, sortedSet.size() == 4);
+      for (Iterator inner = sortedSet.iterator(); inner.hasNext();) {
+        TermVectorEntry tve = (TermVectorEntry) inner.next();
+        assertTrue("tve is null and it shouldn't be", tve != null);
+        //Check offsets and positions.
+        assertTrue("tve is null and it shouldn't be", tve != null);
+        String field = tve.getField();
+        if (field.equals(testFields[0])) {
+          //should have offsets
+
+          assertTrue("tve.getOffsets() is null and it shouldn't be", tve.getOffsets() != null);
+          assertTrue("tve.getPositions() is null and it shouldn't be", tve.getPositions() != null);
+        }
+        else if (field.equals(testFields[1])) {
+          //should not have offsets
+
+          assertTrue("tve.getOffsets() is not null and it shouldn't be", tve.getOffsets() == null);
+          assertTrue("tve.getPositions() is not null and it shouldn't be", tve.getPositions() == null);
+        }
+      }
+    }
+    //Try mapper that ignores offs and positions
+    fsMapper = new FieldSortedTermVectorMapper(true, true, new TermVectorEntryFreqSortedComparator());
+    reader.get(0, fsMapper);
+    map = fsMapper.getFieldToTerms();
+    assertTrue("map Size: " + map.size() + " is not: " + testFields.length, map.size() == testFields.length);
+    for (Iterator iterator = map.entrySet().iterator(); iterator.hasNext();) {
+      Map.Entry entry = (Map.Entry) iterator.next();
+      SortedSet sortedSet = (SortedSet) entry.getValue();
+      assertTrue("sortedSet Size: " + sortedSet.size() + " is not: " + 4, sortedSet.size() == 4);
+      for (Iterator inner = sortedSet.iterator(); inner.hasNext();) {
+        TermVectorEntry tve = (TermVectorEntry) inner.next();
+        assertTrue("tve is null and it shouldn't be", tve != null);
+        //Check offsets and positions.
+        assertTrue("tve is null and it shouldn't be", tve != null);
+        String field = tve.getField();
+        if (field.equals(testFields[0])) {
+          //should have offsets
+
+          assertTrue("tve.getOffsets() is null and it shouldn't be", tve.getOffsets() == null);
+          assertTrue("tve.getPositions() is null and it shouldn't be", tve.getPositions() == null);
+        }
+        else if (field.equals(testFields[1])) {
+          //should not have offsets
+
+          assertTrue("tve.getOffsets() is not null and it shouldn't be", tve.getOffsets() == null);
+          assertTrue("tve.getPositions() is not null and it shouldn't be", tve.getPositions() == null);
+        }
+      }
+    }
+
+  }
+

  /**
   * Make sure exceptions and bad params are handled appropriately
-   */ 
+   */
  public void testBadParams() {
    try {
      TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
      assertTrue(reader != null);
      //Bad document number, good field number
      reader.get(50, testFields[0]);
-      fail();      
+      fail();
    } catch (IOException e) {
      // expected exception
    }
@ -192,7 +295,7 @@ public class TestTermVectorsReader extends TestCase {
      assertTrue(reader != null);
      //Bad document number, no field
      reader.get(50);
-      fail();      
+      fail();
    } catch (IOException e) {
      // expected exception
    }
@ -201,9 +304,9 @@ public class TestTermVectorsReader extends TestCase {
      assertTrue(reader != null);
      //good document number, bad field number
      TermFreqVector vector = reader.get(0, "f50");
-      assertTrue(vector == null);      
+      assertTrue(vector == null);
    } catch (IOException e) {
      fail();
    }
-  }    
+  }
 }
--- a/src/test/org/apache/lucene/search/TestTermVectors.java
+++ b/src/test/org/apache/lucene/search/TestTermVectors.java
@ -28,7 +28,9 @@ import org.apache.lucene.util.English;

 import java.io.IOException;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.Map;
+import java.util.SortedSet;

 public class TestTermVectors extends TestCase {
  private IndexSearcher searcher;
@ -171,7 +173,7 @@ public class TestTermVectors extends TestCase {
      assertTrue(false);
    }
  }
-  
+
  public void testKnownSetOfDocuments() {
    String test1 = "eating chocolate in a computer lab"; //6 terms
    String test2 = "computer in a computer lab"; //5 terms
@ -275,20 +277,45 @@ public class TestTermVectors extends TestCase {
        Integer freqInt = (Integer)test4Map.get(term);
        assertTrue(freqInt != null);
        assertTrue(freqInt.intValue() == freq);        
-      } 
+      }
+      SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
+      knownSearcher.reader.getTermFreqVector(hits.id(1), mapper);
+      SortedSet vectorEntrySet = mapper.getTermVectorEntrySet();
+      assertTrue("mapper.getTermVectorEntrySet() Size: " + vectorEntrySet.size() + " is not: " + 10, vectorEntrySet.size() == 10);
+      TermVectorEntry last = null;
+      for (Iterator iterator = vectorEntrySet.iterator(); iterator.hasNext();) {
+         TermVectorEntry tve = (TermVectorEntry) iterator.next();
+        if (tve != null && last != null)
+        {
+          assertTrue("terms are not properly sorted", last.getFrequency() >= tve.getFrequency());
+          Integer expectedFreq = (Integer) test4Map.get(tve.getTerm());
+          //we expect double the expectedFreq, since there are two fields with the exact same text and we are collapsing all fields
+          assertTrue("Frequency is not correct:", tve.getFrequency() == 2*expectedFreq.intValue());
+        }
+        last = tve;
+
+      }
+
+      FieldSortedTermVectorMapper fieldMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator());
+      knownSearcher.reader.getTermFreqVector(hits.id(1), fieldMapper);
+      Map map = fieldMapper.getFieldToTerms();
+      assertTrue("map Size: " + map.size() + " is not: " + 2, map.size() == 2);
+      vectorEntrySet = (SortedSet) map.get("field");
+      assertTrue("vectorEntrySet is null and it shouldn't be", vectorEntrySet != null);
+      assertTrue("vectorEntrySet Size: " + vectorEntrySet.size() + " is not: " + 10, vectorEntrySet.size() == 10);
      knownSearcher.close();
    } catch (IOException e) {
      e.printStackTrace();
      assertTrue(false);
    }
-
-
  } 
  
  private void setupDoc(Document doc, String text)
  {
    doc.add(new Field("field", text, Field.Store.YES,
        Field.Index.TOKENIZED, Field.TermVector.YES));
+    doc.add(new Field("field2", text, Field.Store.YES,
+        Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
    //System.out.println("Document: " + doc);
  }