From a7c49eeb8cbd2d76559c0430d991200a48e7b3ff Mon Sep 17 00:00:00 2001
From: Adrien Grand <jpountz@apache.org>
Date: Fri, 1 Feb 2013 10:06:53 +0000
Subject: [PATCH 1/2] LUCENE-4733: Refactor term vectors formats tests around a
 BaseTermVectorsFormatTestCase.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1441367 13f79535-47bb-0310-9956-ffa450edef68
---
 .../TestSimpleTextTermVectorsFormat.java      |  30 +
 .../TestCompressingTermVectorsFormat.java     |  30 +
 .../TestLucene40TermVectorsFormat.java        |  30 +
 .../lucene/index/TestPayloadsOnVectors.java   | 326 ---------
 .../apache/lucene/search/TestTermVectors.java | 309 +--------
 .../index/BaseTermVectorsFormatTestCase.java  | 632 ++++++++++++++++++
 6 files changed, 731 insertions(+), 626 deletions(-)
 create mode 100644 lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextTermVectorsFormat.java
 create mode 100644 lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingTermVectorsFormat.java
 create mode 100644 lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40TermVectorsFormat.java
 create mode 100644 lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java

diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextTermVectorsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextTermVectorsFormat.java
new file mode 100644
index 00000000000..b0683c66d30
--- /dev/null
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextTermVectorsFormat.java
@@ -0,0 +1,30 @@
+package org.apache.lucene.codecs.simpletext;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.index.BaseTermVectorsFormatTestCase;
+
+public class TestSimpleTextTermVectorsFormat extends BaseTermVectorsFormatTestCase {
+
+  @Override
+  protected Codec getCodec() {
+    return new SimpleTextCodec();
+  }
+  
+}
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingTermVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingTermVectorsFormat.java
new file mode 100644
index 00000000000..7a9536210a9
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestCompressingTermVectorsFormat.java
@@ -0,0 +1,30 @@
+package org.apache.lucene.codecs.compressing;
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.index.BaseTermVectorsFormatTestCase;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TestCompressingTermVectorsFormat extends BaseTermVectorsFormatTestCase {
+
+  @Override
+  protected Codec getCodec() {
+    return CompressingCodec.randomInstance(random());
+  }
+  
+}
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40TermVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40TermVectorsFormat.java
new file mode 100644
index 00000000000..5a00b1cce0b
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestLucene40TermVectorsFormat.java
@@ -0,0 +1,30 @@
+package org.apache.lucene.codecs.lucene40;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.index.BaseTermVectorsFormatTestCase;
+
+public class TestLucene40TermVectorsFormat extends BaseTermVectorsFormatTestCase {
+
+  @Override
+  protected Codec getCodec() {
+    return new Lucene40Codec();
+  }
+  
+}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPayloadsOnVectors.java b/lucene/core/src/test/org/apache/lucene/index/TestPayloadsOnVectors.java
index d497c117814..d1bbcca4dae 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestPayloadsOnVectors.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestPayloadsOnVectors.java
@@ -17,38 +17,21 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
-import java.io.IOException;
 import java.io.StringReader;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
 
 import org.apache.lucene.analysis.CannedTokenStream;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.FieldType;
-import org.apache.lucene.document.IntField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.AttributeImpl;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util._TestUtil;
-
-import com.carrotsearch.randomizedtesting.generators.RandomInts;
-import com.carrotsearch.randomizedtesting.generators.RandomPicks;
 
 public class TestPayloadsOnVectors extends LuceneTestCase {
 
@@ -158,314 +141,5 @@ public class TestPayloadsOnVectors extends LuceneTestCase {
     writer.close();
     dir.close();
   }
-  
-  // custom impl to test cases that are forbidden by the default OffsetAttribute impl
-  static class PermissiveOffsetAttributeImpl extends AttributeImpl implements OffsetAttribute {
 
-    int start, end;
-
-    @Override
-    public int startOffset() {
-      return start;
-    }
-
-    @Override
-    public int endOffset() {
-      return end;
-    }
-
-    @Override
-    public void setOffset(int startOffset, int endOffset) {
-      // no check!
-      start = startOffset;
-      end = endOffset;
-    }
-
-    @Override
-    public void clear() {
-      start = end = 0;
-    }
-
-    @Override
-    public boolean equals(Object other) {
-      if (other == this) {
-        return true;
-      }
-
-      if (other instanceof PermissiveOffsetAttributeImpl) {
-        PermissiveOffsetAttributeImpl o = (PermissiveOffsetAttributeImpl) other;
-        return o.start == start && o.end == end;
-      }
-
-      return false;
-    }
-
-    @Override
-    public int hashCode() {
-      return start + 31 * end;
-    }
-
-    @Override
-    public void copyTo(AttributeImpl target) {
-      OffsetAttribute t = (OffsetAttribute) target;
-      t.setOffset(start, end);
-    }
-
-  }
-
-  static BytesRef randomPayload() {
-    final int len = random().nextInt(5);
-    if (len == 0) {
-      return null;
-    }
-    final BytesRef payload = new BytesRef(len);
-    random().nextBytes(payload.bytes);
-    payload.length = len;
-    return payload;
-  }
-
-  class RandomTokenStream extends TokenStream {
-
-    final String[] terms;
-    final int[] positionsIncrements;
-    final int[] positions;
-    final int[] startOffsets, endOffsets;
-    final BytesRef[] payloads;
-
-    final Map<Integer, Set<Integer>> positionToTerms;
-    final Map<Integer, Set<Integer>> startOffsetToTerms;
-
-    final CharTermAttribute termAtt;
-    final PositionIncrementAttribute piAtt;
-    final OffsetAttribute oAtt;
-    final PayloadAttribute pAtt;
-    int i = 0;
-
-    RandomTokenStream(int len, String[] sampleTerms, boolean weird) {
-      terms = new String[len];
-      positionsIncrements = new int[len];
-      positions = new int[len];
-      startOffsets = new int[len];
-      endOffsets = new int[len];
-      payloads = new BytesRef[len];
-      for (int i = 0; i < len; ++i) {
-        terms[i] = RandomPicks.randomFrom(random(), sampleTerms);
-        if (weird) {
-          positionsIncrements[i] = _TestUtil.nextInt(random(), 1, 1 << 18);
-          startOffsets[i] = random().nextInt();
-          endOffsets[i] = random().nextInt();
-        } else if (i == 0) {
-          positionsIncrements[i] = _TestUtil.nextInt(random(), 1, 1 << 5);
-          startOffsets[i] = _TestUtil.nextInt(random(), 0, 1 << 16);
-          endOffsets[i] = startOffsets[i] + _TestUtil.nextInt(random(), 0, rarely() ? 1 << 10 : 20);
-        } else {
-          positionsIncrements[i] = _TestUtil.nextInt(random(), 0, 1 << 5);
-          startOffsets[i] = startOffsets[i-1] + _TestUtil.nextInt(random(), 0, 1 << 16);
-          endOffsets[i] = startOffsets[i] + _TestUtil.nextInt(random(), 0, rarely() ? 1 << 10 : 20);
-        }
-      }
-      for (int i = 0; i < len; ++i) {
-        if (i == 0) {
-          positions[i] = positionsIncrements[i] - 1;
-        } else {
-          positions[i] = positions[i - 1] + positionsIncrements[i];
-        }
-      }
-      if (rarely()) {
-        Arrays.fill(payloads, randomPayload());
-      } else {
-        for (int i = 0; i < len; ++i) {
-          payloads[i] = randomPayload();
-        }
-      }
-
-      positionToTerms = new HashMap<Integer, Set<Integer>>();
-      startOffsetToTerms = new HashMap<Integer, Set<Integer>>();
-      for (int i = 0; i < len; ++i) {
-        if (!positionToTerms.containsKey(positions[i])) {
-          positionToTerms.put(positions[i], new HashSet<Integer>(1));
-        }
-        positionToTerms.get(positions[i]).add(i);
-        if (!startOffsetToTerms.containsKey(startOffsets[i])) {
-          startOffsetToTerms.put(startOffsets[i], new HashSet<Integer>(1));
-        }
-        startOffsetToTerms.get(startOffsets[i]).add(i);
-      }
-
-      addAttributeImpl(new PermissiveOffsetAttributeImpl());
-
-      termAtt = addAttribute(CharTermAttribute.class);
-      piAtt = addAttribute(PositionIncrementAttribute.class);
-      oAtt = addAttribute(OffsetAttribute.class);
-      pAtt = addAttribute(PayloadAttribute.class);
-    }
-
-    @Override
-    public final boolean incrementToken() throws IOException {
-      if (i < terms.length) {
-        termAtt.setLength(0).append(terms[i]);
-        piAtt.setPositionIncrement(positionsIncrements[i]);
-        oAtt.setOffset(startOffsets[i], endOffsets[i]);
-        pAtt.setPayload(payloads[i]);
-        ++i;
-        return true;
-      } else {
-        return false;
-      }
-    }
-
-  }
-
-  static FieldType randomFieldType() {
-    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
-    ft.setStoreTermVectors(true);
-    ft.setStoreTermVectorPositions(random().nextBoolean());
-    ft.setStoreTermVectorOffsets(random().nextBoolean());
-    if (random().nextBoolean()) {
-      ft.setStoreTermVectorPositions(true);
-      ft.setStoreTermVectorPayloads(true);
-    }
-    ft.freeze();
-    return ft;
-  }
-
-  public void testRandomVectors() throws IOException {
-    Directory dir = newDirectory();
-    IndexWriterConfig iwConf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
-    iwConf.setMaxBufferedDocs(RandomInts.randomIntBetween(random(), 2, 30));
-    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
-    String[] sampleTerms = new String[RandomInts.randomIntBetween(random(), 20, 50)];
-    for (int i = 0; i < sampleTerms.length; ++i) {
-      sampleTerms[i] = _TestUtil.randomUnicodeString(random());
-    }
-    FieldType ft = randomFieldType();
-    // generate random documents and index them
-    final String[] fieldNames = new String[_TestUtil.nextInt(random(), 1, 200)];
-    for (int i = 0; i < fieldNames.length; ++i) {
-      String fieldName;
-      do {
-        fieldName = _TestUtil.randomSimpleString(random());
-      } while ("id".equals(fieldName));
-      fieldNames[i] = fieldName;
-    }
-    final int numDocs = _TestUtil.nextInt(random(), 10, 100);
-    @SuppressWarnings("unchecked")
-    final Map<String, RandomTokenStream>[] fieldValues  = new Map[numDocs];
-    for (int i = 0; i < numDocs; ++i) {
-      fieldValues[i] = new HashMap<String, RandomTokenStream>();
-      final int numFields = _TestUtil.nextInt(random(), 0, rarely() ? fieldNames.length : 5);
-      for (int j = 0; j < numFields; ++j) {
-        final String fieldName = fieldNames[(i+j*31) % fieldNames.length];
-        final int tokenStreamLen = _TestUtil.nextInt(random(), 1, rarely() ? 300 : 5);
-        fieldValues[i].put(fieldName, new RandomTokenStream(tokenStreamLen, sampleTerms, rarely()));
-      }
-    }
-
-    // index them
-    for (int i = 0; i < numDocs; ++i) {
-      Document doc = new Document();
-      doc.add(new IntField("id", i, Store.YES));
-      for (Map.Entry<String, RandomTokenStream> entry : fieldValues[i].entrySet()) {
-        doc.add(new Field(entry.getKey(), entry.getValue(), ft));
-      }
-      iw.addDocument(doc);
-    }
-
-    iw.commit();
-    // make sure the format can merge
-    iw.forceMerge(2);
-
-    // read term vectors
-    final DirectoryReader reader = DirectoryReader.open(dir);
-    for (int i = 0; i < 100; ++i) {
-      final int docID = random().nextInt(numDocs);
-      final Map<String, RandomTokenStream> fvs = fieldValues[reader.document(docID).getField("id").numericValue().intValue()];
-      final Fields fields = reader.getTermVectors(docID);
-      if (fvs.isEmpty()) {
-        assertNull(fields);
-      } else {
-        Set<String> fns = new HashSet<String>();
-        for (String field : fields) {
-          fns.add(field);
-        }
-        assertEquals(fields.size(), fns.size());
-        assertEquals(fvs.keySet(), fns);
-        for (String field : fields) {
-          final RandomTokenStream tk = fvs.get(field);
-          assert tk != null;
-          final Terms terms = fields.terms(field);
-          assertEquals(ft.storeTermVectorPositions(), terms.hasPositions());
-          assertEquals(ft.storeTermVectorOffsets(), terms.hasOffsets());
-          assertEquals(1, terms.getDocCount());
-          final TermsEnum termsEnum = terms.iterator(null);
-          while (termsEnum.next() != null) {
-            assertEquals(1, termsEnum.docFreq());
-            final DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.docsAndPositions(null, null);
-            final DocsEnum docsEnum = docsAndPositionsEnum == null ? termsEnum.docs(null, null) : docsAndPositionsEnum;
-            if (ft.storeTermVectorOffsets() || ft.storeTermVectorPositions()) {
-              assertNotNull(docsAndPositionsEnum);
-            }
-            assertEquals(0, docsEnum.nextDoc());
-            if (terms.hasPositions() || terms.hasOffsets()) {
-              final int freq = docsEnum.freq();
-              assertTrue(freq >= 1);
-              if (docsAndPositionsEnum != null) {
-                for (int k = 0; k < freq; ++k) {
-                  final int position = docsAndPositionsEnum.nextPosition();
-                  final Set<Integer> indexes;
-                  if (terms.hasPositions()) {
-                    indexes = tk.positionToTerms.get(position);
-                    assertNotNull(tk.positionToTerms.keySet().toString() + " does not contain " + position, indexes);
-                  } else {
-                    indexes = tk.startOffsetToTerms.get(docsAndPositionsEnum.startOffset());
-                    assertNotNull(indexes);
-                  }
-                  if (terms.hasPositions()) {
-                    boolean foundPosition = false;
-                    for (int index : indexes) {
-                      if (new BytesRef(tk.terms[index]).equals(termsEnum.term()) && tk.positions[index] == position) {
-                        foundPosition = true;
-                        break;
-                      }
-                    }
-                    assertTrue(foundPosition);
-                  }
-                  if (terms.hasOffsets()) {
-                    boolean foundOffset = false;
-                    for (int index : indexes) {
-                      if (new BytesRef(tk.terms[index]).equals(termsEnum.term()) && tk.startOffsets[index] == docsAndPositionsEnum.startOffset() && tk.endOffsets[index] == docsAndPositionsEnum.endOffset()) {
-                        foundOffset = true;
-                        break;
-                      }
-                    }
-                    assertTrue(foundOffset);
-                  }
-                  if (terms.hasPayloads()) {
-                    boolean foundPayload = false;
-                    for (int index : indexes) {
-                      if (new BytesRef(tk.terms[index]).equals(termsEnum.term()) && equals(tk.payloads[index], docsAndPositionsEnum.getPayload())) {
-                        foundPayload = true;
-                        break;
-                      }
-                    }
-                    assertTrue(foundPayload);
-                  }
-                }
-              }
-            }
-            assertEquals(DocsEnum.NO_MORE_DOCS, docsEnum.nextDoc());
-          }
-        }
-      }
-    }
-    IOUtils.close(reader, iw, dir);
-  }
-
-  private static boolean equals(Object o1, Object o2) {
-    if (o1 == null) {
-      return o2 == null;
-    } else {
-      return o1.equals(o2);
-    }
-  }
 }
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTermVectors.java b/lucene/core/src/test/org/apache/lucene/search/TestTermVectors.java
index 4d60d9025c0..ebef3bd1335 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestTermVectors.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTermVectors.java
@@ -18,9 +18,6 @@ package org.apache.lucene.search;
  */
 
 import java.io.IOException;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
 
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
@@ -28,19 +25,24 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.*;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.English;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util._TestUtil;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 
 public class TestTermVectors extends LuceneTestCase {
-  private static IndexSearcher searcher;
   private static IndexReader reader;
   private static Directory directory;
 
@@ -75,7 +77,6 @@ public class TestTermVectors extends LuceneTestCase {
     }
     reader = writer.getReader();
     writer.close();
-    searcher = newSearcher(reader);
   }
   
   @AfterClass
@@ -84,300 +85,8 @@ public class TestTermVectors extends LuceneTestCase {
     directory.close();
     reader = null;
     directory = null;
-    searcher = null;
   }
 
-  public void test() {
-    assertTrue(searcher != null);
-  }
-
-  public void testTermVectors() throws IOException {
-    Query query = new TermQuery(new Term("field", "seventy"));
-    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
-    assertEquals(100, hits.length);
-      
-    for (int i = 0; i < hits.length; i++) {
-      Fields vectors = searcher.reader.getTermVectors(hits[i].doc);
-      assertNotNull(vectors);
-      assertEquals("doc=" + hits[i].doc + " tv=" + vectors, 1, vectors.size());
-    }
-    Terms vector;
-    vector = searcher.reader.getTermVectors(hits[0].doc).terms("noTV");
-    assertNull(vector);
-  }
-  
-  public void testTermVectorsFieldOrder() throws IOException {
-    Directory dir = newDirectory();
-    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
-    Document doc = new Document();
-    FieldType ft = new FieldType(TextField.TYPE_STORED);
-    ft.setStoreTermVectors(true);
-    ft.setStoreTermVectorOffsets(true);
-    ft.setStoreTermVectorPositions(true);
-    doc.add(newField("c", "some content here", ft));
-    doc.add(newField("a", "some content here", ft));
-    doc.add(newField("b", "some content here", ft));
-    doc.add(newField("x", "some content here", ft));
-    writer.addDocument(doc);
-    IndexReader reader = writer.getReader();
-    writer.close();
-    Fields v = reader.getTermVectors(0);
-    assertEquals(4, v.size());
-    String[] expectedFields = new String[]{"a", "b", "c", "x"};
-    int[] expectedPositions = new int[]{1, 2, 0};
-    Iterator<String> fieldsEnum = v.iterator();
-    for(int i=0;i<expectedFields.length;i++) {
-      assertEquals(expectedFields[i], fieldsEnum.next());
-      assertEquals(3, v.terms(expectedFields[i]).size());
-
-      DocsAndPositionsEnum dpEnum = null;
-      Terms terms = v.terms(expectedFields[i]);
-      assertNotNull(terms);
-      TermsEnum termsEnum = terms.iterator(null);
-      assertEquals("content", termsEnum.next().utf8ToString());
-      dpEnum = termsEnum.docsAndPositions(null, dpEnum);
-      assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
-      assertEquals(1, dpEnum.freq());
-      assertEquals(expectedPositions[0], dpEnum.nextPosition());
-
-      assertEquals("here", termsEnum.next().utf8ToString());
-      dpEnum = termsEnum.docsAndPositions(null, dpEnum);
-      assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
-      assertEquals(1, dpEnum.freq());
-      assertEquals(expectedPositions[1], dpEnum.nextPosition());
-
-      assertEquals("some", termsEnum.next().utf8ToString());
-      dpEnum = termsEnum.docsAndPositions(null, dpEnum);
-      assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
-      assertEquals(1, dpEnum.freq());
-      assertEquals(expectedPositions[2], dpEnum.nextPosition());
-
-      assertNull(termsEnum.next());
-    }
-    reader.close();
-    dir.close();
-  }
-
-  public void testTermPositionVectors() throws IOException {
-    Query query = new TermQuery(new Term("field", "zero"));
-    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
-    assertEquals(1, hits.length);
-
-    DocsAndPositionsEnum dpEnum = null;
-    for (int i = 0; i < hits.length; i++) {
-      Fields vectors = searcher.reader.getTermVectors(hits[i].doc);
-      assertNotNull(vectors);
-      assertEquals(1, vectors.size());
-      
-      TermsEnum termsEnum = vectors.terms("field").iterator(null);
-      assertNotNull(termsEnum.next());
-
-      boolean shouldBePosVector = hits[i].doc % 2 == 0;
-      boolean shouldBeOffVector = hits[i].doc % 3 == 0;
-      
-      if (shouldBePosVector || shouldBeOffVector) {
-        while(true) {
-          dpEnum = termsEnum.docsAndPositions(null, dpEnum);
-          assertNotNull(dpEnum);
-          assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
-
-          dpEnum.nextPosition();
-
-          if (shouldBeOffVector) {
-            assertTrue(dpEnum.startOffset() != -1);
-            assertTrue(dpEnum.endOffset() != -1);
-          }
-
-          if (termsEnum.next() == null) {
-            break;
-          }
-        }
-      } else {
-        fail();
-      }
-    }
-  }
-  
-  public void testTermOffsetVectors() throws IOException {
-    Query query = new TermQuery(new Term("field", "fifty"));
-    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
-    assertEquals(100, hits.length);
-      
-    for (int i = 0; i < hits.length; i++) {
-      Fields vectors = searcher.reader.getTermVectors(hits[i].doc);
-      assertNotNull(vectors);
-      assertEquals(1, vectors.size());
-    }
-  }
-
-  public void testKnownSetOfDocuments() throws IOException {
-    String test1 = "eating chocolate in a computer lab"; //6 terms
-    String test2 = "computer in a computer lab"; //5 terms
-    String test3 = "a chocolate lab grows old"; //5 terms
-    String test4 = "eating chocolate with a chocolate lab in an old chocolate colored computer lab"; //13 terms
-    Map<String,Integer> test4Map = new HashMap<String,Integer>();
-    test4Map.put("chocolate", Integer.valueOf(3));
-    test4Map.put("lab", Integer.valueOf(2));
-    test4Map.put("eating", Integer.valueOf(1));
-    test4Map.put("computer", Integer.valueOf(1));
-    test4Map.put("with", Integer.valueOf(1));
-    test4Map.put("a", Integer.valueOf(1));
-    test4Map.put("colored", Integer.valueOf(1));
-    test4Map.put("in", Integer.valueOf(1));
-    test4Map.put("an", Integer.valueOf(1));
-    test4Map.put("computer", Integer.valueOf(1));
-    test4Map.put("old", Integer.valueOf(1));
-    
-    Document testDoc1 = new Document();
-    setupDoc(testDoc1, test1);
-    Document testDoc2 = new Document();
-    setupDoc(testDoc2, test2);
-    Document testDoc3 = new Document();
-    setupDoc(testDoc3, test3);
-    Document testDoc4 = new Document();
-    setupDoc(testDoc4, test4);
-    
-    Directory dir = newDirectory();
-    
-    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, 
-        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true))
-          .setOpenMode(OpenMode.CREATE)
-          .setMergePolicy(newLogMergePolicy())
-          .setSimilarity(new DefaultSimilarity()));
-    writer.addDocument(testDoc1);
-    writer.addDocument(testDoc2);
-    writer.addDocument(testDoc3);
-    writer.addDocument(testDoc4);
-    IndexReader reader = writer.getReader();
-    writer.close();
-    IndexSearcher knownSearcher = newSearcher(reader);
-    knownSearcher.setSimilarity(new DefaultSimilarity());
-    Fields fields = MultiFields.getFields(knownSearcher.reader);
-    
-    DocsEnum docs = null;
-    for (String fieldName : fields) {
-      Terms terms = fields.terms(fieldName);
-      assertNotNull(terms); // NOTE: kinda sketchy assumptions, but ideally we would fix fieldsenum api... 
-      TermsEnum termsEnum = terms.iterator(null);
-
-      while (termsEnum.next() != null) {
-        String text = termsEnum.term().utf8ToString();
-        docs = _TestUtil.docs(random(), termsEnum, MultiFields.getLiveDocs(knownSearcher.reader), docs, DocsEnum.FLAG_FREQS);
-        
-        while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
-          int docId = docs.docID();
-          int freq = docs.freq();
-          //System.out.println("Doc Id: " + docId + " freq " + freq);
-          Terms vector = knownSearcher.reader.getTermVectors(docId).terms("field");
-          //float tf = sim.tf(freq);
-          //float idf = sim.idf(knownSearcher.docFreq(term), knownSearcher.maxDoc());
-          //float qNorm = sim.queryNorm()
-          //This is fine since we don't have stop words
-          //float lNorm = sim.lengthNorm("field", vector.getTerms().length);
-          //float coord = sim.coord()
-          //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm);
-          assertNotNull(vector);
-          TermsEnum termsEnum2 = vector.iterator(null);
-
-          while(termsEnum2.next() != null) {
-            if (text.equals(termsEnum2.term().utf8ToString())) {
-              assertEquals(freq, termsEnum2.totalTermFreq());
-            }
-          }
-        }
-      }
-      //System.out.println("--------");
-    }
-    Query query = new TermQuery(new Term("field", "chocolate"));
-    ScoreDoc[] hits = knownSearcher.search(query, null, 1000).scoreDocs;
-    //doc 3 should be the first hit b/c it is the shortest match
-    assertTrue(hits.length == 3);
-    /*System.out.println("Hit 0: " + hits.id(0) + " Score: " + hits.score(0) + " String: " + hits.doc(0).toString());
-      System.out.println("Explain: " + knownSearcher.explain(query, hits.id(0)));
-      System.out.println("Hit 1: " + hits.id(1) + " Score: " + hits.score(1) + " String: " + hits.doc(1).toString());
-      System.out.println("Explain: " + knownSearcher.explain(query, hits.id(1)));
-      System.out.println("Hit 2: " + hits.id(2) + " Score: " + hits.score(2) + " String: " +  hits.doc(2).toString());
-      System.out.println("Explain: " + knownSearcher.explain(query, hits.id(2)));*/
-    assertTrue(hits[0].doc == 2);
-    assertTrue(hits[1].doc == 3);
-    assertTrue(hits[2].doc == 0);
-    Terms vector = knownSearcher.reader.getTermVectors(hits[1].doc).terms("field");
-    assertNotNull(vector);
-    //System.out.println("Vector: " + vector);
-    assertEquals(10, vector.size());
-    TermsEnum termsEnum = vector.iterator(null);
-    while(termsEnum.next() != null) {
-      String term = termsEnum.term().utf8ToString();
-      //System.out.println("Term: " + term);
-      int freq = (int) termsEnum.totalTermFreq();
-      assertTrue(test4.indexOf(term) != -1);
-      Integer freqInt = test4Map.get(term);
-      assertTrue(freqInt != null);
-      assertEquals(freqInt.intValue(), freq);
-    }
-    reader.close();
-    dir.close();
-  } 
-  
-  private void setupDoc(Document doc, String text)
-  {
-    FieldType ft = new FieldType(TextField.TYPE_STORED);
-    ft.setStoreTermVectors(true);
-    ft.setStoreTermVectorOffsets(true);
-    ft.setStoreTermVectorPositions(true);
-    FieldType ft2 = new FieldType(TextField.TYPE_STORED);
-    ft2.setStoreTermVectors(true);
-    doc.add(newField("field2", text, ft));
-    doc.add(newField("field", text, ft2));
-    //System.out.println("Document: " + doc);
-  }
-
-  // Test only a few docs having vectors
-  public void testRareVectors() throws IOException {
-    RandomIndexWriter writer = new RandomIndexWriter(random(), directory, 
-        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true))
-        .setOpenMode(OpenMode.CREATE));
-    if (VERBOSE) {
-      System.out.println("TEST: now add non-vectors");
-    }
-    for (int i = 0; i < 100; i++) {
-      Document doc = new Document();
-      doc.add(newTextField("field", English.intToEnglish(i), Field.Store.YES));
-      writer.addDocument(doc);
-    }
-    if (VERBOSE) {
-      System.out.println("TEST: now add vectors");
-    }
-    FieldType ft = new FieldType(TextField.TYPE_STORED);
-    ft.setStoreTermVectors(true);
-    ft.setStoreTermVectorOffsets(true);
-    ft.setStoreTermVectorPositions(true);
-    for(int i=0;i<10;i++) {
-      Document doc = new Document();
-      doc.add(newField("field", English.intToEnglish(100+i), ft));
-      writer.addDocument(doc);
-    }
-
-    if (VERBOSE) {
-      System.out.println("TEST: now getReader");
-    }
-    IndexReader reader = writer.getReader();
-    writer.close();
-    IndexSearcher searcher = newSearcher(reader);
-
-    Query query = new TermQuery(new Term("field", "hundred"));
-    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
-    assertEquals(10, hits.length);
-    for (int i = 0; i < hits.length; i++) {
-
-      Fields vectors = searcher.reader.getTermVectors(hits[i].doc);
-      assertNotNull(vectors);
-      assertEquals(1, vectors.size());
-    }
-    reader.close();
-  }
-
-
   // In a single doc, for the same field, mix the term
   // vectors up
   public void testMixedVectrosVectors() throws IOException {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java
new file mode 100644
index 00000000000..af93522eb76
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java
@@ -0,0 +1,632 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.EnumSet;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.TermVectorsFormat;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.TermsEnum.SeekStatus;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+import com.carrotsearch.randomizedtesting.generators.RandomPicks;
+
+/**
+ * Base class aiming at testing {@link TermVectorsFormat term vectors formats}.
+ * To test a new format, all you need is to register a new {@link Codec} which
+ * uses it and extend this class and override {@link #getCodec()}.
+ * @lucene.experimental
+ */
+public abstract class BaseTermVectorsFormatTestCase extends LuceneTestCase {
+
+  private Codec savedCodec;
+
+  /**
+   * Returns the Codec to run tests against
+   */
+  protected abstract Codec getCodec();
+
+  public void setUp() throws Exception {
+    super.setUp();
+    // set the default codec, so adding test cases to this isn't fragile
+    savedCodec = Codec.getDefault();
+    Codec.setDefault(getCodec());
+  }
+
+  public void tearDown() throws Exception {
+    Codec.setDefault(savedCodec); // restore
+    super.tearDown();
+  }
+
+  /**
+   * A combination of term vectors options.
+   */
+  protected enum Options {
+    NONE(false, false, false),
+    POSITIONS(true, false, false),
+    OFFSETS(false, true, false),
+    POSITIONS_AND_OFFSETS(true, true, false),
+    POSITIONS_AND_PAYLOADS(true, false, true),
+    POSITIONS_AND_OFFSETS_AND_PAYLOADS(true, true, true);
+    final boolean positions, offsets, payloads;
+    private Options(boolean positions, boolean offsets, boolean payloads) {
+      this.positions = positions;
+      this.offsets = offsets;
+      this.payloads = payloads;
+    }
+  }
+
+  protected Set<Options> validOptions() {
+    return EnumSet.allOf(Options.class);
+  }
+
+  protected Options randomOptions() {
+    return RandomPicks.randomFrom(random(), new ArrayList<Options>(validOptions()));
+  }
+
+  protected FieldType fieldType(Options options) {
+    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
+    ft.setStoreTermVectors(true);
+    ft.setStoreTermVectorPositions(options.positions);
+    ft.setStoreTermVectorOffsets(options.offsets);
+    ft.setStoreTermVectorPayloads(options.payloads);
+    ft.freeze();
+    return ft;
+  }
+
+  protected BytesRef randomPayload() {
+    final int len = random().nextInt(5);
+    if (len == 0) {
+      return null;
+    }
+    final BytesRef payload = new BytesRef(len);
+    random().nextBytes(payload.bytes);
+    payload.length = len;
+    return payload;
+  }
+
+  // custom impl to test cases that are forbidden by the default OffsetAttribute impl
+  private static class PermissiveOffsetAttributeImpl extends AttributeImpl implements OffsetAttribute {
+
+    int start, end;
+
+    @Override
+    public int startOffset() {
+      return start;
+    }
+
+    @Override
+    public int endOffset() {
+      return end;
+    }
+
+    @Override
+    public void setOffset(int startOffset, int endOffset) {
+      // no check!
+      start = startOffset;
+      end = endOffset;
+    }
+
+    @Override
+    public void clear() {
+      start = end = 0;
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (other == this) {
+        return true;
+      }
+
+      if (other instanceof PermissiveOffsetAttributeImpl) {
+        PermissiveOffsetAttributeImpl o = (PermissiveOffsetAttributeImpl) other;
+        return o.start == start && o.end == end;
+      }
+
+      return false;
+    }
+
+    @Override
+    public int hashCode() {
+      return start + 31 * end;
+    }
+
+    @Override
+    public void copyTo(AttributeImpl target) {
+      OffsetAttribute t = (OffsetAttribute) target;
+      t.setOffset(start, end);
+    }
+
+  }
+
+  // TODO: use CannedTokenStream?
+  protected class RandomTokenStream extends TokenStream {
+
+    final String[] terms;
+    final BytesRef[] termBytes;
+    final int[] positionsIncrements;
+    final int[] positions;
+    final int[] startOffsets, endOffsets;
+    final BytesRef[] payloads;
+
+    final Map<String, Integer> freqs;
+    final Map<Integer, Set<Integer>> positionToTerms;
+    final Map<Integer, Set<Integer>> startOffsetToTerms;
+
+    final CharTermAttribute termAtt;
+    final PositionIncrementAttribute piAtt;
+    final OffsetAttribute oAtt;
+    final PayloadAttribute pAtt;
+    int i = 0;
+
+    protected RandomTokenStream(int len, String[] sampleTerms, BytesRef[] sampleTermBytes) {
+      this(len, sampleTerms, sampleTermBytes, rarely());
+    }
+
+    protected RandomTokenStream(int len, String[] sampleTerms, BytesRef[] sampleTermBytes, boolean offsetsGoBackwards) {
+      terms = new String[len];
+      termBytes = new BytesRef[len];
+      positionsIncrements = new int[len];
+      positions = new int[len];
+      startOffsets = new int[len];
+      endOffsets = new int[len];
+      payloads = new BytesRef[len];
+      for (int i = 0; i < len; ++i) {
+        final int o = random().nextInt(sampleTerms.length);
+        terms[i] = sampleTerms[o];
+        termBytes[i] = sampleTermBytes[o];
+        positionsIncrements[i] = _TestUtil.nextInt(random(), i == 0 ? 1 : 0, 10);
+        if (offsetsGoBackwards) {
+          startOffsets[i] = random().nextInt();
+          endOffsets[i] = random().nextInt();
+        } else {
+          if (i == 0) {
+            startOffsets[i] = _TestUtil.nextInt(random(), 0, 1 << 16);
+          } else {
+            startOffsets[i] = startOffsets[i-1] + _TestUtil.nextInt(random(), 0, rarely() ? 1 << 16 : 20);
+          }
+          endOffsets[i] = startOffsets[i] + _TestUtil.nextInt(random(), 0, rarely() ? 1 << 10 : 20);
+        }
+      }
+
+      for (int i = 0; i < len; ++i) {
+        if (i == 0) {
+          positions[i] = positionsIncrements[i] - 1;
+        } else {
+          positions[i] = positions[i - 1] + positionsIncrements[i];
+        }
+      }
+      if (rarely()) {
+        Arrays.fill(payloads, randomPayload());
+      } else {
+        for (int i = 0; i < len; ++i) {
+          payloads[i] = randomPayload();
+        }
+      }
+
+      positionToTerms = new HashMap<Integer, Set<Integer>>(len);
+      startOffsetToTerms = new HashMap<Integer, Set<Integer>>(len);
+      for (int i = 0; i < len; ++i) {
+        if (!positionToTerms.containsKey(positions[i])) {
+          positionToTerms.put(positions[i], new HashSet<Integer>(1));
+        }
+        positionToTerms.get(positions[i]).add(i);
+        if (!startOffsetToTerms.containsKey(startOffsets[i])) {
+          startOffsetToTerms.put(startOffsets[i], new HashSet<Integer>(1));
+        }
+        startOffsetToTerms.get(startOffsets[i]).add(i);
+      }
+
+      freqs = new HashMap<String, Integer>();
+      for (String term : terms) {
+        if (freqs.containsKey(term)) {
+          freqs.put(term, freqs.get(term) + 1);
+        } else {
+          freqs.put(term, 1);
+        }
+      }
+
+      addAttributeImpl(new PermissiveOffsetAttributeImpl());
+
+      termAtt = addAttribute(CharTermAttribute.class);
+      piAtt = addAttribute(PositionIncrementAttribute.class);
+      oAtt = addAttribute(OffsetAttribute.class);
+      pAtt = addAttribute(PayloadAttribute.class);
+    }
+
+    public boolean hasPayloads() {
+      for (BytesRef payload : payloads) {
+        if (payload != null && payload.length > 0) {
+          return true;
+        }
+      }
+      return false;
+    }
+
+    @Override
+    public final boolean incrementToken() throws IOException {
+      if (i < terms.length) {
+        termAtt.setLength(0).append(terms[i]);
+        piAtt.setPositionIncrement(positionsIncrements[i]);
+        oAtt.setOffset(startOffsets[i], endOffsets[i]);
+        pAtt.setPayload(payloads[i]);
+        ++i;
+        return true;
+      } else {
+        return false;
+      }
+    }
+
+  }
+
+  protected class RandomDocument {
+
+    private final String[] fieldNames;
+    private final FieldType[] fieldTypes;
+    private final RandomTokenStream[] tokenStreams;
+
+    protected RandomDocument(int fieldCount, int maxTermCount, Options options, String[] fieldNames, String[] sampleTerms, BytesRef[] sampleTermBytes) {
+      if (fieldCount > fieldNames.length) {
+        throw new IllegalArgumentException();
+      }
+      this.fieldNames = new String[fieldCount];
+      fieldTypes = new FieldType[fieldCount];
+      tokenStreams = new RandomTokenStream[fieldCount];
+      Arrays.fill(fieldTypes, fieldType(options));
+      final Set<String> usedFileNames = new HashSet<String>();
+      for (int i = 0; i < fieldCount; ++i) {
+        do {
+          this.fieldNames[i] = RandomPicks.randomFrom(random(), fieldNames);
+        } while (usedFileNames.contains(this.fieldNames[i]));
+        usedFileNames.add(this.fieldNames[i]);
+        tokenStreams[i] = new RandomTokenStream(_TestUtil.nextInt(random(), 1, maxTermCount), sampleTerms, sampleTermBytes);
+      }
+    }
+
+    public Document toDocument() {
+      final Document doc = new Document();
+      for (int i = 0; i < fieldNames.length; ++i) {
+        doc.add(new Field(fieldNames[i], tokenStreams[i], fieldTypes[i]));
+      }
+      return doc;
+    }
+
+  }
+
+  protected class RandomDocumentFactory {
+
+    private final String[] fieldNames;
+    private final String[] terms;
+    private final BytesRef[] termBytes;
+
+    protected RandomDocumentFactory(int distinctFieldNames, int disctinctTerms) {
+      final Set<String> fieldNames = new HashSet<String>();
+      while (fieldNames.size() < distinctFieldNames) {
+        fieldNames.add(_TestUtil.randomSimpleString(random()));
+        fieldNames.remove("id");
+      }
+      this.fieldNames = fieldNames.toArray(new String[0]);
+      terms = new String[disctinctTerms];
+      termBytes = new BytesRef[disctinctTerms];
+      for (int i = 0; i < disctinctTerms; ++i) {
+        terms[i] = _TestUtil.randomRealisticUnicodeString(random());
+        termBytes[i] = new BytesRef(terms[i]);
+      }
+    }
+
+    public RandomDocument newDocument(int fieldCount, int maxTermCount, Options options) {
+      return new RandomDocument(fieldCount, maxTermCount, options, fieldNames, terms, termBytes);
+    }
+
+  }
+
+  protected void assertEquals(RandomDocument doc, Fields fields) throws IOException {
+    // compare field names
+    assertEquals(doc == null, fields == null);
+    assertEquals(doc.fieldNames.length, fields.size());
+    final Set<String> fields1 = new HashSet<String>();
+    final Set<String> fields2 = new HashSet<String>();
+    for (int i = 0; i < doc.fieldNames.length; ++i) {
+      fields1.add(doc.fieldNames[i]);
+    }
+    for (String field : fields) {
+      fields2.add(field);
+    }
+    assertEquals(fields1, fields2);
+
+    for (int i = 0; i < doc.fieldNames.length; ++i) {
+      assertEquals(doc.tokenStreams[i], doc.fieldTypes[i], fields.terms(doc.fieldNames[i]));
+    }
+  }
+
+  protected static boolean equals(Object o1, Object o2) {
+    if (o1 == null) {
+      return o2 == null;
+    } else {
+      return o1.equals(o2);
+    }
+  }
+
+  // to test reuse
+  private TermsEnum termsEnum = null;
+  private DocsEnum docsEnum = null;
+  private DocsAndPositionsEnum docsAndPositionsEnum = null;
+
+  protected void assertEquals(RandomTokenStream tk, FieldType ft, Terms terms) throws IOException {
+    assertEquals(1, terms.getDocCount());
+    final int termCount = new HashSet<String>(Arrays.asList(tk.terms)).size();
+    assertEquals(termCount, terms.size());
+    assertEquals(termCount, terms.getSumDocFreq());
+    assertEquals(ft.storeTermVectorPositions(), terms.hasPositions());
+    assertEquals(ft.storeTermVectorOffsets(), terms.hasOffsets());
+    assertEquals(ft.storeTermVectorPayloads() && tk.hasPayloads(), terms.hasPayloads());
+    final Set<BytesRef> uniqueTerms = new HashSet<BytesRef>();
+    for (String term : tk.freqs.keySet()) {
+      uniqueTerms.add(new BytesRef(term));
+    }
+    final BytesRef[] sortedTerms = uniqueTerms.toArray(new BytesRef[0]);
+    Arrays.sort(sortedTerms, terms.getComparator());
+    termsEnum = terms.iterator(random().nextBoolean() ? null : termsEnum);
+    for (int i = 0; i < sortedTerms.length; ++i) {
+      final BytesRef nextTerm = termsEnum.next();
+      assertEquals(sortedTerms[i], nextTerm);
+      assertEquals(sortedTerms[i], termsEnum.term());
+      assertEquals(1, termsEnum.docFreq());
+
+      final FixedBitSet bits = new FixedBitSet(1);
+      docsEnum = termsEnum.docs(bits, random().nextBoolean() ? null : docsEnum);
+      assertEquals(DocsEnum.NO_MORE_DOCS, docsEnum.nextDoc());
+      bits.set(0);
+
+      docsEnum = termsEnum.docs(random().nextBoolean() ? bits : null, random().nextBoolean() ? null : docsEnum);
+      assertNotNull(docsEnum);
+      assertEquals(0, docsEnum.nextDoc());
+      assertEquals(0, docsEnum.docID());
+      assertEquals(tk.freqs.get(termsEnum.term().utf8ToString()), (Integer) docsEnum.freq());
+      assertEquals(DocsEnum.NO_MORE_DOCS, docsEnum.nextDoc());
+
+      bits.clear(0);
+      docsAndPositionsEnum = termsEnum.docsAndPositions(bits, random().nextBoolean() ? null : docsAndPositionsEnum);
+      assertEquals(ft.storeTermVectorOffsets() || ft.storeTermVectorPositions(), docsAndPositionsEnum != null);
+      if (docsAndPositionsEnum != null) {
+        assertEquals(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
+      }
+      bits.set(0);
+
+      docsAndPositionsEnum = termsEnum.docsAndPositions(random().nextBoolean() ? bits : null, random().nextBoolean() ? null : docsAndPositionsEnum);
+      assertEquals(ft.storeTermVectorOffsets() || ft.storeTermVectorPositions(), docsAndPositionsEnum != null);
+      if (terms.hasPositions() || terms.hasOffsets()) {
+        assertEquals(0, docsAndPositionsEnum.nextDoc());
+        final int freq = docsAndPositionsEnum.freq();
+        assertEquals(tk.freqs.get(termsEnum.term().utf8ToString()), (Integer) freq);
+        if (docsAndPositionsEnum != null) {
+          for (int k = 0; k < freq; ++k) {
+            final int position = docsAndPositionsEnum.nextPosition();
+            final Set<Integer> indexes;
+            if (terms.hasPositions()) {
+              indexes = tk.positionToTerms.get(position);
+              assertNotNull(indexes);
+            } else {
+              indexes = tk.startOffsetToTerms.get(docsAndPositionsEnum.startOffset());
+              assertNotNull(indexes);
+            }
+            if (terms.hasPositions()) {
+              boolean foundPosition = false;
+              for (int index : indexes) {
+                if (tk.termBytes[index].equals(termsEnum.term()) && tk.positions[index] == position) {
+                  foundPosition = true;
+                  break;
+                }
+              }
+              assertTrue(foundPosition);
+            }
+            if (terms.hasOffsets()) {
+              boolean foundOffset = false;
+              for (int index : indexes) {
+                if (tk.termBytes[index].equals(termsEnum.term()) && tk.startOffsets[index] == docsAndPositionsEnum.startOffset() && tk.endOffsets[index] == docsAndPositionsEnum.endOffset()) {
+                  foundOffset = true;
+                  break;
+                }
+              }
+              assertTrue(foundOffset);
+            }
+            if (terms.hasPayloads()) {
+              boolean foundPayload = false;
+              for (int index : indexes) {
+                if (tk.termBytes[index].equals(termsEnum.term()) && equals(tk.payloads[index], docsAndPositionsEnum.getPayload())) {
+                  foundPayload = true;
+                  break;
+                }
+              }
+              assertTrue(foundPayload);
+            }
+          }
+          try {
+            docsAndPositionsEnum.nextPosition();
+            fail();
+          } catch (Exception e) {
+            // ok
+          } catch (AssertionError e) {
+            // ok
+          }
+        }
+        assertEquals(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
+      }
+    }
+    assertNull(termsEnum.next());
+    for (int i = 0; i < 5; ++i) {
+      if (random().nextBoolean()) {
+        assertTrue(termsEnum.seekExact(RandomPicks.randomFrom(random(), tk.termBytes), random().nextBoolean()));
+      } else {
+        assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(RandomPicks.randomFrom(random(), tk.termBytes), random().nextBoolean()));
+      }
+    }
+  }
+
+  protected Document addId(Document doc, String id) {
+    doc.add(new StringField("id", id, Store.NO));
+    return doc;
+  }
+
+  protected int docID(IndexReader reader, String id) throws IOException {
+    return new IndexSearcher(reader).search(new TermQuery(new Term("id", id)), 1).scoreDocs[0].doc;
+  }
+
+  // only one doc with vectors
+  public void testRareVectors() throws IOException {
+    final RandomDocumentFactory docFactory = new RandomDocumentFactory(10, 20);
+    for (Options options : validOptions()) {
+      final int numDocs = _TestUtil.nextInt(random(), 10, 10000);
+      final int docWithVectors = random().nextInt(numDocs);
+      final Document emptyDoc = new Document();
+      final Directory dir = newDirectory();
+      final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+      final RandomDocument doc = docFactory.newDocument(_TestUtil.nextInt(random(), 1, 3), 20, options);
+      for (int i = 0; i < numDocs; ++i) {
+        if (i == docWithVectors) {
+          writer.addDocument(addId(doc.toDocument(), "42"));
+        } else {
+          writer.addDocument(emptyDoc);
+        }
+      }
+      final IndexReader reader = writer.getReader();
+      final int docWithVectorsID = docID(reader, "42");
+      for (int i = 0; i < 10; ++i) {
+        final int docID = random().nextInt(numDocs);
+        final Fields fields = reader.getTermVectors(docID);
+        if (docID == docWithVectorsID) {
+          assertEquals(doc, fields);
+        } else {
+          assertNull(fields);
+        }
+      }
+      final Fields fields = reader.getTermVectors(docWithVectorsID);
+      assertEquals(doc, fields);
+      reader.close();
+      writer.close();
+      dir.close();
+    }
+  }
+
+  public void testHighFreqs() throws IOException {
+    final RandomDocumentFactory docFactory = new RandomDocumentFactory(3, 5);
+    for (Options options : validOptions()) {
+      if (options == Options.NONE) {
+        continue;
+      }
+      final Directory dir = newDirectory();
+      final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+      final RandomDocument doc = docFactory.newDocument(_TestUtil.nextInt(random(), 1, 2), _TestUtil.nextInt(random(), 50000, 100000), options);
+      writer.addDocument(doc.toDocument());
+      final IndexReader reader = writer.getReader();
+      assertEquals(doc, reader.getTermVectors(0));
+      reader.close();
+      writer.close();
+      dir.close();
+    }
+  }
+
+  public void testLotsOfFields() throws IOException {
+    final RandomDocumentFactory docFactory = new RandomDocumentFactory(5000, 10);
+    for (Options options : validOptions()) {
+      final Directory dir = newDirectory();
+      final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+      final RandomDocument doc = docFactory.newDocument(_TestUtil.nextInt(random(), 500, 1000), 5, options);
+      writer.addDocument(doc.toDocument());
+      final IndexReader reader = writer.getReader();
+      assertEquals(doc, reader.getTermVectors(0));
+      reader.close();
+      writer.close();
+      dir.close();
+    }
+  }
+
+  // different options for the same field
+  public void testMixedOptions() throws IOException {
+    final int numFields = _TestUtil.nextInt(random(), 1, 3);
+    final RandomDocumentFactory docFactory = new RandomDocumentFactory(numFields, 10);
+    for (Options options1 : validOptions()) {
+      for (Options options2 : validOptions()) {
+        if (options1 == options2) {
+          continue;
+        }
+        final Directory dir = newDirectory();
+        final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+        final RandomDocument doc1 = docFactory.newDocument(numFields, 20, options1);
+        final RandomDocument doc2 = docFactory.newDocument(numFields, 20,  options2);
+        writer.addDocument(addId(doc1.toDocument(), "1"));
+        writer.addDocument(addId(doc2.toDocument(), "2"));
+        final IndexReader reader = writer.getReader();
+        final int doc1ID = docID(reader, "1");
+        assertEquals(doc1, reader.getTermVectors(doc1ID));
+        final int doc2ID = docID(reader, "2");
+        assertEquals(doc2, reader.getTermVectors(doc2ID));
+        reader.close();
+        writer.close();
+        dir.close();
+      }
+    }
+  }
+
+  public void testRandom() throws IOException {
+    final RandomDocumentFactory docFactory = new RandomDocumentFactory(5, 20);
+    final int numDocs = _TestUtil.nextInt(random(), 100, 1000);
+    final RandomDocument[] docs = new RandomDocument[numDocs];
+    for (int i = 0; i < numDocs; ++i) {
+      docs[i] = docFactory.newDocument(_TestUtil.nextInt(random(), 1, 3), _TestUtil.nextInt(random(), 10, 50), randomOptions());
+    }
+    final Directory dir = newDirectory();
+    final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+    for (int i = 0; i < numDocs; ++i) {
+      writer.addDocument(docs[i].toDocument());
+    }
+    final IndexReader reader = writer.getReader();
+    for (int i = 0; i < numDocs; ++i) {
+      assertEquals(docs[i], reader.getTermVectors(i));
+    }
+    reader.close();
+    writer.close();
+    dir.close();
+  }
+
+}

From 6ae5a4e52d0c46923573643481fe79685aca9967 Mon Sep 17 00:00:00 2001
From: Adrien Grand <jpountz@apache.org>
Date: Fri, 1 Feb 2013 11:39:58 +0000
Subject: [PATCH 2/2] Fix test bug (used to rely on internal doc IDs).

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1441395 13f79535-47bb-0310-9956-ffa450edef68
---
 .../apache/lucene/index/BaseTermVectorsFormatTestCase.java   | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java
index af93522eb76..d4c4a6857ce 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java
@@ -618,11 +618,12 @@ public abstract class BaseTermVectorsFormatTestCase extends LuceneTestCase {
     final Directory dir = newDirectory();
     final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
     for (int i = 0; i < numDocs; ++i) {
-      writer.addDocument(docs[i].toDocument());
+      writer.addDocument(addId(docs[i].toDocument(), ""+i));
     }
     final IndexReader reader = writer.getReader();
     for (int i = 0; i < numDocs; ++i) {
-      assertEquals(docs[i], reader.getTermVectors(i));
+      final int docID = docID(reader, ""+i);
+      assertEquals(docs[i], reader.getTermVectors(docID));
     }
     reader.close();
     writer.close();