LUCENE-3431: Removed deprecated addStopwords methods in QueryAutoStopWordAnalyzer

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1170424 13f79535-47bb-0310-9956-ffa450edef68
2011-09-14 03:33:50 +00:00 · 2011-09-14 03:33:50 +00:00 · 94028fe11a
parent d6ab323d4e
commit 94028fe11a
3 changed files with 164 additions and 183 deletions
--- a/modules/analysis/CHANGES.txt
+++ b/modules/analysis/CHANGES.txt
@ -34,6 +34,10 @@ API Changes

 * LUCENE-3400: Removed DutchAnalyzer.setStemDictionary (Chris Male)

+ * LUCENE-3431: Removed QueryAutoStopWordAnalyzer.addStopWords* deprecated methods
+   since they prevented reuse.  Stopwords are now generated at instantiation through
+   the Analyzer's constructors. (Chris Male)
+
 New Features

 * LUCENE-2341: A new analyzer/ filter: Morfologik - a dictionary-driven lemmatizer 
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
@ -41,131 +41,130 @@ import java.util.*;
 * a 38 million doc index which had a term in around 50% of docs and was causing TermQueries for 
 * this term to take 2 seconds.
 * </p>
- * <p>
- * Use the various "addStopWords" methods in this class to automate the identification and addition of 
- * stop words found in an already existing index.
- * </p>
 */
 public final class QueryAutoStopWordAnalyzer extends Analyzer {
-  Analyzer delegate;
-  HashMap<String,HashSet<String>> stopWordsPerField = new HashMap<String,HashSet<String>>();
+
+  private final Analyzer delegate;
+  private final Map<String, Set<String>> stopWordsPerField = new HashMap<String, Set<String>>();
  //The default maximum percentage (40%) of index documents which
  //can contain a term, after which the term is considered to be a stop word.
  public static final float defaultMaxDocFreqPercent = 0.4f;
  private final Version matchVersion;

  /**
-   * Initializes this analyzer with the Analyzer object that actually produces the tokens
+   * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all
+   * indexed fields from terms with a document frequency percentage greater than
+   * {@link #defaultMaxDocFreqPercent}
   *
-   * @param delegate The choice of {@link Analyzer} that is used to produce the token stream which needs filtering
+   * @param matchVersion Version to be used in {@link StopFilter}
+   * @param delegate Analyzer whose TokenStream will be filtered
+   * @param indexReader IndexReader to identify the stopwords from
+   * @throws IOException Can be thrown while reading from the IndexReader
   */
-  public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer delegate) {
-    this.delegate = delegate;
+  public QueryAutoStopWordAnalyzer(
+      Version matchVersion,
+      Analyzer delegate,
+      IndexReader indexReader) throws IOException {
+    this(matchVersion, delegate, indexReader, defaultMaxDocFreqPercent);
+  }
+
+  /**
+   * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all
+   * indexed fields from terms with a document frequency greater than the given
+   * maxDocFreq
+   *
+   * @param matchVersion Version to be used in {@link StopFilter}
+   * @param delegate Analyzer whose TokenStream will be filtered
+   * @param indexReader IndexReader to identify the stopwords from
+   * @param maxDocFreq Document frequency terms should be above in order to be stopwords
+   * @throws IOException Can be thrown while reading from the IndexReader
+   */
+  public QueryAutoStopWordAnalyzer(
+      Version matchVersion,
+      Analyzer delegate,
+      IndexReader indexReader,
+      int maxDocFreq) throws IOException {
+    this(matchVersion, delegate, indexReader, indexReader.getFieldNames(IndexReader.FieldOption.INDEXED), maxDocFreq);
+  }
+
+  /**
+   * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all
+   * indexed fields from terms with a document frequency percentage greater than
+   * the given maxPercentDocs
+   *
+   * @param matchVersion Version to be used in {@link StopFilter}
+   * @param delegate Analyzer whose TokenStream will be filtered
+   * @param indexReader IndexReader to identify the stopwords from
+   * @param maxPercentDocs The maximum percentage (between 0.0 and 1.0) of index documents which
+   *                      contain a term, after which the word is considered to be a stop word
+   * @throws IOException Can be thrown while reading from the IndexReader
+   */
+  public QueryAutoStopWordAnalyzer(
+      Version matchVersion,
+      Analyzer delegate, 
+      IndexReader indexReader,
+      float maxPercentDocs) throws IOException {
+    this(matchVersion, delegate, indexReader, indexReader.getFieldNames(IndexReader.FieldOption.INDEXED), maxPercentDocs);
+  }
+
+  /**
+   * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
+   * given selection of fields from terms with a document frequency percentage
+   * greater than the given maxPercentDocs
+   *
+   * @param matchVersion Version to be used in {@link StopFilter}
+   * @param delegate Analyzer whose TokenStream will be filtered
+   * @param indexReader IndexReader to identify the stopwords from
+   * @param fields Selection of fields to calculate stopwords for
+   * @param maxPercentDocs The maximum percentage (between 0.0 and 1.0) of index documents which
+   *                      contain a term, after which the word is considered to be a stop word
+   * @throws IOException Can be thrown while reading from the IndexReader
+   */
+  public QueryAutoStopWordAnalyzer(
+      Version matchVersion,
+      Analyzer delegate,
+      IndexReader indexReader,
+      Collection<String> fields,
+      float maxPercentDocs) throws IOException {
+    this(matchVersion, delegate, indexReader, fields, (int) (indexReader.numDocs() * maxPercentDocs));
+  }
+
+  /**
+   * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
+   * given selection of fields from terms with a document frequency greater than
+   * the given maxDocFreq
+   *
+   * @param matchVersion Version to be used in {@link StopFilter}
+   * @param delegate Analyzer whose TokenStream will be filtered
+   * @param indexReader IndexReader to identify the stopwords from
+   * @param fields Selection of fields to calculate stopwords for
+   * @param maxDocFreq Document frequency terms should be above in order to be stopwords
+   * @throws IOException Can be thrown while reading from the IndexReader
+   */
+  public QueryAutoStopWordAnalyzer(
+      Version matchVersion,
+      Analyzer delegate,
+      IndexReader indexReader,
+      Collection<String> fields,
+      int maxDocFreq) throws IOException {
    this.matchVersion = matchVersion;
-  }
-
-  /**
-   * Automatically adds stop words for all fields with terms exceeding the defaultMaxDocFreqPercent
-   *
-   * @param reader The {@link IndexReader} which will be consulted to identify potential stop words that
-   *               exceed the required document frequency
-   * @return The number of stop words identified.
-   * @throws IOException
-   */
-  public int addStopWords(IndexReader reader) throws IOException {
-    return addStopWords(reader, defaultMaxDocFreqPercent);
-  }
-
-  /**
-   * Automatically adds stop words for all fields with terms exceeding the maxDocFreqPercent
-   *
-   * @param reader     The {@link IndexReader} which will be consulted to identify potential stop words that
-   *                   exceed the required document frequency
-   * @param maxDocFreq The maximum number of index documents which can contain a term, after which
-   *                   the term is considered to be a stop word
-   * @return The number of stop words identified.
-   * @throws IOException
-   */
-  public int addStopWords(IndexReader reader, int maxDocFreq) throws IOException {
-    int numStopWords = 0;
-    Collection<String> fieldNames = reader.getFieldNames(IndexReader.FieldOption.INDEXED);
-    for (Iterator<String> iter = fieldNames.iterator(); iter.hasNext();) {
-      String fieldName = iter.next();
-      numStopWords += addStopWords(reader, fieldName, maxDocFreq);
-    }
-    return numStopWords;
-  }
-
-  /**
-   * Automatically adds stop words for all fields with terms exceeding the maxDocFreqPercent
-   *
-   * @param reader        The {@link IndexReader} which will be consulted to identify potential stop words that
-   *                      exceed the required document frequency
-   * @param maxPercentDocs The maximum percentage (between 0.0 and 1.0) of index documents which
-   *                      contain a term, after which the word is considered to be a stop word.
-   * @return The number of stop words identified.
-   * @throws IOException
-   */
-  public int addStopWords(IndexReader reader, float maxPercentDocs) throws IOException {
-    int numStopWords = 0;
-    Collection<String> fieldNames = reader.getFieldNames(IndexReader.FieldOption.INDEXED);
-    for (Iterator<String> iter = fieldNames.iterator(); iter.hasNext();) {
-      String fieldName = iter.next();
-      numStopWords += addStopWords(reader, fieldName, maxPercentDocs);
-    }
-    return numStopWords;
-  }
-
-  /**
-   * Automatically adds stop words for the given field with terms exceeding the maxPercentDocs
-   *
-   * @param reader         The {@link IndexReader} which will be consulted to identify potential stop words that
-   *                       exceed the required document frequency
-   * @param fieldName      The field for which stopwords will be added
-   * @param maxPercentDocs The maximum percentage (between 0.0 and 1.0) of index documents which
-   *                       contain a term, after which the word is considered to be a stop word.
-   * @return The number of stop words identified.
-   * @throws IOException
-   */
-  public int addStopWords(IndexReader reader, String fieldName, float maxPercentDocs) throws IOException {
-    return addStopWords(reader, fieldName, (int) (reader.numDocs() * maxPercentDocs));
-  }
-
-  /**
-   * Automatically adds stop words for the given field with terms exceeding the maxPercentDocs
-   *
-   * @param reader     The {@link IndexReader} which will be consulted to identify potential stop words that
-   *                   exceed the required document frequency
-   * @param fieldName  The field for which stopwords will be added
-   * @param maxDocFreq The maximum number of index documents which
-   *                   can contain a term, after which the term is considered to be a stop word.
-   * @return The number of stop words identified.
-   * @throws IOException
-   */
-  public int addStopWords(IndexReader reader, String fieldName, int maxDocFreq) throws IOException {
-    HashSet<String> stopWords = new HashSet<String>();
-    final Terms terms = MultiFields.getTerms(reader, fieldName);
-    final CharsRef spare = new CharsRef();
-    if (terms != null) {
-      final TermsEnum te = terms.iterator();
-      BytesRef text;
-      while ((text = te.next()) != null) {
-        if (te.docFreq() > maxDocFreq) {
-          stopWords.add(text.utf8ToChars(spare).toString());
+    this.delegate = delegate;
+    
+    for (String field : fields) {
+      Set<String> stopWords = new HashSet<String>();
+      Terms terms = MultiFields.getTerms(indexReader, field);
+      CharsRef spare = new CharsRef();
+      if (terms != null) {
+        TermsEnum te = terms.iterator();
+        BytesRef text;
+        while ((text = te.next()) != null) {
+          if (te.docFreq() > maxDocFreq) {
+            stopWords.add(text.utf8ToChars(spare).toString());
+          }
        }
      }
+      stopWordsPerField.put(field, stopWords);
    }
-    stopWordsPerField.put(fieldName, stopWords);
-    
-    /* if the stopwords for a field are changed,
-     * then saved streams for that field are erased.
-     */
-    @SuppressWarnings("unchecked")
-    Map<String,SavedStreams> streamMap = (Map<String,SavedStreams>) getPreviousTokenStream();
-    if (streamMap != null)
-      streamMap.remove(fieldName);
-    
-    return stopWords.size();
  }

  @Override
@ -176,7 +175,7 @@ public final class QueryAutoStopWordAnalyzer extends Analyzer {
    } catch (IOException e) {
      result = delegate.tokenStream(fieldName, reader);
    }
-    HashSet<String> stopWords = stopWordsPerField.get(fieldName);
+    Set<String> stopWords = stopWordsPerField.get(fieldName);
    if (stopWords != null) {
      result = new StopFilter(matchVersion, result, stopWords);
    }
@ -193,12 +192,11 @@ public final class QueryAutoStopWordAnalyzer extends Analyzer {
     */
    TokenStream withStopFilter;
  }
-  
+
+  @SuppressWarnings("unchecked")
  @Override
-  public TokenStream reusableTokenStream(String fieldName, Reader reader)
-      throws IOException {
+  public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
    /* map of SavedStreams for each field */
-    @SuppressWarnings("unchecked")
    Map<String,SavedStreams> streamMap = (Map<String,SavedStreams>) getPreviousTokenStream();
    if (streamMap == null) {
      streamMap = new HashMap<String, SavedStreams>();
@ -213,31 +211,32 @@ public final class QueryAutoStopWordAnalyzer extends Analyzer {
      streams.wrapped = delegate.reusableTokenStream(fieldName, reader);

      /* if there are any stopwords for the field, save the stopfilter */
-      HashSet<String> stopWords = stopWordsPerField.get(fieldName);
-      if (stopWords != null)
+      Set<String> stopWords = stopWordsPerField.get(fieldName);
+      if (stopWords != null) {
        streams.withStopFilter = new StopFilter(matchVersion, streams.wrapped, stopWords);
-      else
+      } else {
        streams.withStopFilter = streams.wrapped;
-
+      }
    } else {
      /*
-       * an entry for this field exists, verify the wrapped stream has not
-       * changed. if it has not, reuse it, otherwise wrap the new stream.
-       */
+      * an entry for this field exists, verify the wrapped stream has not
+      * changed. if it has not, reuse it, otherwise wrap the new stream.
+      */
      TokenStream result = delegate.reusableTokenStream(fieldName, reader);
      if (result == streams.wrapped) {
        /* the wrapped analyzer reused the stream */
      } else {
        /*
-         * the wrapped analyzer did not. if there are any stopwords for the
-         * field, create a new StopFilter around the new stream
-         */
+        * the wrapped analyzer did not. if there are any stopwords for the
+        * field, create a new StopFilter around the new stream
+        */
        streams.wrapped = result;
-        HashSet<String> stopWords = stopWordsPerField.get(fieldName);
-        if (stopWords != null)
+        Set<String> stopWords = stopWordsPerField.get(fieldName);
+        if (stopWords != null) {
          streams.withStopFilter = new StopFilter(matchVersion, streams.wrapped, stopWords);
-        else
+        } else {
          streams.withStopFilter = streams.wrapped;
+        }
      }
    }

@ -252,14 +251,8 @@ public final class QueryAutoStopWordAnalyzer extends Analyzer {
   * @return the stop words identified for a field
   */
  public String[] getStopWords(String fieldName) {
-    String[] result;
-    HashSet<String> stopWords = stopWordsPerField.get(fieldName);
-    if (stopWords != null) {
-      result = stopWords.toArray(new String[stopWords.size()]);
-    } else {
-      result = new String[0];
-    }
-    return result;
+    Set<String> stopWords = stopWordsPerField.get(fieldName);
+    return stopWords != null ? stopWords.toArray(new String[stopWords.size()]) : new String[0];
  }

  /**
@ -268,12 +261,10 @@ public final class QueryAutoStopWordAnalyzer extends Analyzer {
   * @return the stop words (as terms)
   */
  public Term[] getStopWords() {
-    ArrayList<Term> allStopWords = new ArrayList<Term>();
-    for (Iterator<String> iter = stopWordsPerField.keySet().iterator(); iter.hasNext();) {
-      String fieldName = iter.next();
-      HashSet<String> stopWords = stopWordsPerField.get(fieldName);
-      for (Iterator<String> iterator = stopWords.iterator(); iterator.hasNext();) {
-        String text = iterator.next();
+    List<Term> allStopWords = new ArrayList<Term>();
+    for (String fieldName : stopWordsPerField.keySet()) {
+      Set<String> stopWords = stopWordsPerField.get(fieldName);
+      for (String text : stopWords) {
        allStopWords.add(new Term(fieldName, text));
      }
    }
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
@ -16,23 +16,19 @@ package org.apache.lucene.analysis.query;
 * limitations under the License.
 */

-import java.io.Reader;
-import java.io.StringReader;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.*;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.Term;
 import org.apache.lucene.store.RAMDirectory;

+import java.io.StringReader;
+import java.util.Arrays;
+import java.util.Collections;
+
 public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
  String variedFieldValues[] = {"the", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "boring", "dog"};
  String repetitiveFieldValues[] = {"boring", "boring", "vaguelyboring"};
@ -58,7 +54,6 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
    }
    writer.close();
    reader = IndexReader.open(dir, true);
-    protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer);
  }

  @Override
@ -67,9 +62,9 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
    super.tearDown();
  }

-  public void testUninitializedAnalyzer() throws Exception {
-    // Note: no calls to "addStopWord"
-    // query = "variedField:quick repetitiveField:boring";
+  public void testNoStopwords() throws Exception {
+    // Note: an empty list of fields passed in
+    protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Collections.EMPTY_LIST, 1);
    TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("variedField", new StringReader("quick"));
    assertTokenStreamContents(protectedTokenStream, new String[]{"quick"});

@ -77,21 +72,14 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
    assertTokenStreamContents(protectedTokenStream, new String[]{"boring"});
  }

-  /*
-    * Test method for 'org.apache.lucene.analysis.QueryAutoStopWordAnalyzer.addStopWords(IndexReader)'
-    */
-  public void testDefaultAddStopWordsIndexReader() throws Exception {
-    protectedAnalyzer.addStopWords(reader);
+  public void testDefaultStopwordsAllFields() throws Exception {
+    protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader);
    TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
-
    assertTokenStreamContents(protectedTokenStream, new String[0]); // Default stop word filtering will remove boring
  }

-  /*
-    * Test method for 'org.apache.lucene.analysis.QueryAutoStopWordAnalyzer.addStopWords(IndexReader, int)'
-    */
-  public void testAddStopWordsIndexReaderInt() throws Exception {
-    protectedAnalyzer.addStopWords(reader, 1f / 2f);
+  public void testStopwordsAllFieldsMaxPercentDocs() throws Exception {
+    protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 2f);

    TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
    // A filter on terms in > one half of docs remove boring
@ -101,39 +89,36 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
     // A filter on terms in > half of docs should not remove vaguelyBoring
    assertTokenStreamContents(protectedTokenStream, new String[]{"vaguelyboring"});

-    protectedAnalyzer.addStopWords(reader, 1f / 4f);
+    protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 4f);
    protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("vaguelyboring"));
     // A filter on terms in > quarter of docs should remove vaguelyBoring
    assertTokenStreamContents(protectedTokenStream, new String[0]);
  }

-  public void testAddStopWordsIndexReaderStringFloat() throws Exception {
-    protectedAnalyzer.addStopWords(reader, "variedField", 1f / 2f);
+  public void testStopwordsPerFieldMaxPercentDocs() throws Exception {
+    protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("variedField"), 1f / 2f);
    TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
    // A filter on one Field should not affect queries on another
    assertTokenStreamContents(protectedTokenStream, new String[]{"boring"});

-    protectedAnalyzer.addStopWords(reader, "repetitiveField", 1f / 2f);
+    protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("variedField", "repetitiveField"), 1f / 2f);
    protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
    // A filter on the right Field should affect queries on it
    assertTokenStreamContents(protectedTokenStream, new String[0]);
  }

-  public void testAddStopWordsIndexReaderStringInt() throws Exception {
-    int numStopWords = protectedAnalyzer.addStopWords(reader, "repetitiveField", 10);
+  public void testStopwordsPerFieldMaxDocFreq() throws Exception {
+    protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("repetitiveField"), 10);
+    int numStopWords = protectedAnalyzer.getStopWords("repetitiveField").length;
    assertTrue("Should have identified stop words", numStopWords > 0);

-    Term[] t = protectedAnalyzer.getStopWords();
-    assertEquals("num terms should = num stopwords returned", t.length, numStopWords);
-
-    int numNewStopWords = protectedAnalyzer.addStopWords(reader, "variedField", 10);
-    assertTrue("Should have identified more stop words", numNewStopWords > 0);
-    t = protectedAnalyzer.getStopWords();
-    assertEquals("num terms should = num stopwords returned", t.length, numStopWords + numNewStopWords);
+    protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("repetitiveField", "variedField"), 10);
+    int numNewStopWords = protectedAnalyzer.getStopWords("repetitiveField").length + protectedAnalyzer.getStopWords("variedField").length;
+    assertTrue("Should have identified more stop words", numNewStopWords > numStopWords);
  }

  public void testNoFieldNamePollution() throws Exception {
-    protectedAnalyzer.addStopWords(reader, "repetitiveField", 10);
+    protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("repetitiveField"), 10);

    TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
    // Check filter set up OK
@ -145,8 +130,9 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
  }
  
  public void testTokenStream() throws Exception {
-    QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
-    a.addStopWords(reader, 10);
+    QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(
+        TEST_VERSION_CURRENT,
+        new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), reader, 10);
    TokenStream ts = a.tokenStream("repetitiveField", new StringReader("this boring"));
    assertTokenStreamContents(ts, new String[] { "this" });
  }