LUCENE-2034: Refactor analyzer reuse and stopword handling

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@895339 13f79535-47bb-0310-9956-ffa450edef68
2010-01-03 08:48:17 +00:00 · 2010-01-03 08:48:17 +00:00 · a949836869
parent 5e77ec9845
commit a949836869
25 changed files with 584 additions and 743 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -105,6 +105,12 @@ New features
  backwards compatibility. If Version < 3.1 is passed to the constructor, 
  LowerCaseFilter yields the old behavior. (Simon Willnauer, Robert Muir)  

+* LUCENE-2034: Added ReusableAnalyzerBase, an abstract subclass of Analyzer
+  that makes it easier to reuse TokenStreams correctly. This issue also added
+  StopwordAnalyzerBase, which improves consistency of all Analyzers that use
+  stopwords, and implement many analyzers in contrib with it.  
+  (Simon Willnauer via Robert Muir)
+  
 Optimizations

 * LUCENE-2086: When resolving deleted terms, do so in term sort order
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
@ -19,17 +19,15 @@ package org.apache.lucene.analysis.ar;

 import java.io.File;
 import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
 import java.io.Reader;
-import java.util.Collections;
 import java.util.Hashtable;
 import java.util.Set;

 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
@ -52,7 +50,7 @@ import org.apache.lucene.util.Version;
 * </ul>
 * 
 */
-public final class ArabicAnalyzer extends Analyzer {
+public final class ArabicAnalyzer extends StopwordAnalyzerBase {

  /**
   * File containing default Arabic stopwords.
@ -62,21 +60,18 @@ public final class ArabicAnalyzer extends Analyzer {
   */
  public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";

-  /**
-   * Contains the stopwords used with the StopFilter.
-   */
-  private final Set<?> stoptable;
  /**
   * The comment character in the stopwords file.  All lines prefixed with this will be ignored
   * @deprecated use {@link WordlistLoader#getWordSet(File, String)} directly  
   */
+  // TODO make this private 
  public static final String STOPWORDS_COMMENT = "#";
  
  /**
   * Returns an unmodifiable instance of the default stop-words set.
   * @return an unmodifiable instance of the default stop-words set.
   */
-  public static Set<String> getDefaultStopSet(){
+  public static Set<?> getDefaultStopSet(){
    return DefaultSetHolder.DEFAULT_STOP_SET;
  }
  
@ -85,34 +80,19 @@ public final class ArabicAnalyzer extends Analyzer {
   * accesses the static final set the first time.;
   */
  private static class DefaultSetHolder {
-    static final Set<String> DEFAULT_STOP_SET;
+    static final Set<?> DEFAULT_STOP_SET;

    static {
      try {
-        DEFAULT_STOP_SET = loadDefaultStopWordSet();
+        DEFAULT_STOP_SET = loadStopwordSet(false, ArabicAnalyzer.class, DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
        throw new RuntimeException("Unable to load default stopword set");
      }
    }
-
-    static Set<String> loadDefaultStopWordSet() throws IOException {
-      InputStream stream = ArabicAnalyzer.class
-          .getResourceAsStream(DEFAULT_STOPWORD_FILE);
-      try {
-        InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
-        // make sure it is unmodifiable as we expose it in the outer class
-        return Collections.unmodifiableSet(WordlistLoader.getWordSet(reader,
-            STOPWORDS_COMMENT));
-      } finally {
-        stream.close();
-      }
-    }
  }

-  private final Version matchVersion;
-
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
@ -129,8 +109,7 @@ public final class ArabicAnalyzer extends Analyzer {
   *          a stopword set
   */
  public ArabicAnalyzer(Version matchVersion, Set<?> stopwords){
-    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
-    this.matchVersion = matchVersion;
+    super(matchVersion, stopwords);
  }

  /**
@ -159,54 +138,21 @@ public final class ArabicAnalyzer extends Analyzer {


  /**
-   * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
+   * Creates {@link TokenStreamComponents} used to tokenize all the text in the provided {@link Reader}.
   *
-   * @return  A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with
+   * @return {@link TokenStreamComponents} built from an {@link ArabicLetterTokenizer} filtered with
   * 			{@link LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter}
   *            and {@link ArabicStemFilter}.
   */
  @Override
-  public final TokenStream tokenStream(String fieldName, Reader reader) {
-    TokenStream result = new ArabicLetterTokenizer( reader );
-    result = new LowerCaseFilter(matchVersion, result);
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new ArabicLetterTokenizer(reader);
+    TokenStream result = new LowerCaseFilter(matchVersion, source);
    // the order here is important: the stopword list is not normalized!
-    result = new StopFilter( matchVersion, result, stoptable );
-    result = new ArabicNormalizationFilter( result );
-    result = new ArabicStemFilter( result );
-
-    return result;
-  }
-  
-  private class SavedStreams {
-    Tokenizer source;
-    TokenStream result;
-  };
-  
-  /**
-   * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text 
-   * in the provided {@link Reader}.
-   *
-   * @return  A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with
-   *            {@link LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter}
-   *            and {@link ArabicStemFilter}.
-   */
-  @Override
-  public TokenStream reusableTokenStream(String fieldName, Reader reader)
-      throws IOException {
-    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
-    if (streams == null) {
-      streams = new SavedStreams();
-      streams.source = new ArabicLetterTokenizer(reader);
-      streams.result = new LowerCaseFilter(matchVersion, streams.source);
-      // the order here is important: the stopword list is not normalized!
-      streams.result = new StopFilter( matchVersion, streams.result, stoptable);
-      streams.result = new ArabicNormalizationFilter(streams.result);
-      streams.result = new ArabicStemFilter(streams.result);
-      setPreviousTokenStream(streams);
-    } else {
-      streams.source.reset(reader);
-    }
-    return streams.result;
+    result = new StopFilter( matchVersion, result, stopwords);
+    result = new ArabicNormalizationFilter(result);
+    return new TokenStreamComponents(source, new ArabicStemFilter(result));
  }
 }

--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
@ -17,17 +17,16 @@ package org.apache.lucene.analysis.bg;
 * limitations under the License.
 */

+import java.io.File;
 import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
 import java.io.Reader;
-import java.util.Collections;
 import java.util.Set;

 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
@ -43,7 +42,7 @@ import org.apache.lucene.util.Version;
 * http://members.unine.ch/jacques.savoy/Papers/BUIR.pdf
 * <p>
 */
-public final class BulgarianAnalyzer extends Analyzer {
+public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
  
  /**
   * File containing default Bulgarian stopwords.
@ -54,14 +53,12 @@ public final class BulgarianAnalyzer extends Analyzer {
   */
  public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
  
-  /**
-   * Contains the stopwords used with the StopFilter.
-   */
-  private final Set<?> stoptable;
  /**
   * The comment character in the stopwords file. All lines prefixed with this
   * will be ignored
+   * @deprecated use {@link WordlistLoader#getWordSet(File, String)} directly
   */
+  //TODO make this private
  public static final String STOPWORDS_COMMENT = "#";
  
  /**
@ -69,7 +66,7 @@ public final class BulgarianAnalyzer extends Analyzer {
   * 
   * @return an unmodifiable instance of the default stop-words set.
   */
-  public static Set<String> getDefaultStopSet() {
+  public static Set<?> getDefaultStopSet() {
    return DefaultSetHolder.DEFAULT_STOP_SET;
  }
  
@ -78,35 +75,19 @@ public final class BulgarianAnalyzer extends Analyzer {
   * class accesses the static final set the first time.;
   */
  private static class DefaultSetHolder {
-    static final Set<String> DEFAULT_STOP_SET;
+    static final Set<?> DEFAULT_STOP_SET;
    
    static {
      try {
-        DEFAULT_STOP_SET = loadDefaultStopWordSet();
-      } catch (Exception ex) {
+        DEFAULT_STOP_SET = loadStopwordSet(false, BulgarianAnalyzer.class, DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
+      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
-        throw new RuntimeException("Unable to load default stopword set", ex);
-      }
-    }
-    
-    static Set<String> loadDefaultStopWordSet() throws IOException {
-      final InputStream stream = BulgarianAnalyzer.class
-          .getResourceAsStream(DEFAULT_STOPWORD_FILE);
-      try {
-        InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
-        // make sure it is unmodifiable as we expose it in the outer class
-        return Collections.unmodifiableSet(WordlistLoader.getWordSet(reader,
-            STOPWORDS_COMMENT));
-      } finally {
-        if(stream != null)
-          stream.close();
+        throw new RuntimeException("Unable to load default stopword set");
      }
    }
  }
-  
-  private final Version matchVersion;
-  
+   
  /**
   * Builds an analyzer with the default stop words:
   * {@link #DEFAULT_STOPWORD_FILE}.
@ -119,58 +100,24 @@ public final class BulgarianAnalyzer extends Analyzer {
   * Builds an analyzer with the given stop words.
   */
  public BulgarianAnalyzer(Version matchVersion, Set<?> stopwords) {
-    super();
-    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion,
-        stopwords));
-    this.matchVersion = matchVersion;
+    super(matchVersion, stopwords);
  }
  
  /**
-   * Creates a {@link TokenStream} which tokenizes all the text in the provided
+   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
   * {@link Reader}.
   * 
-   * @return A {@link TokenStream} built from an {@link StandardTokenizer}
+   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
   *         {@link StopFilter}, and {@link BulgarianStemFilter}.
   */
  @Override
-  public TokenStream tokenStream(String fieldName, Reader reader) {
-    TokenStream result = new StandardTokenizer(matchVersion, reader);
-    result = new StandardFilter(result);
+  public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new StandardFilter(source);
    result = new LowerCaseFilter(matchVersion, result);
-    result = new StopFilter(matchVersion, result, stoptable);
+    result = new StopFilter(matchVersion, result, stopwords);
    result = new BulgarianStemFilter(result);
-    return result;
-  }
-  
-  private class SavedStreams {
-    Tokenizer source;
-    TokenStream result;
-  };
-  
-  /**
-   * Returns a (possibly reused) {@link TokenStream} which tokenizes all the
-   * text in the provided {@link Reader}.
-   * 
-   * @return A {@link TokenStream} built from an {@link StandardTokenizer}
-   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
-   *         {@link StopFilter}, and {@link BulgarianStemFilter}.
-   */
-  @Override
-  public TokenStream reusableTokenStream(String fieldName, Reader reader)
-      throws IOException {
-    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
-    if (streams == null) {
-      streams = new SavedStreams();
-      streams.source = new StandardTokenizer(matchVersion, reader);
-      streams.result = new StandardFilter(streams.source);
-      streams.result = new LowerCaseFilter(matchVersion, streams.result);
-      streams.result = new StopFilter(matchVersion, streams.result, stoptable);
-      streams.result = new BulgarianStemFilter(streams.result);
-      setPreviousTokenStream(streams);
-    } else {
-      streams.source.reset(reader);
-    }
-    return streams.result;
+    return new TokenStreamComponents(source, result);
  }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
@ -21,19 +21,21 @@ import java.io.File;
 import java.io.IOException;
 import java.io.Reader;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
-import java.util.Collections;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.util.Version;
@ -49,7 +51,7 @@ import org.apache.lucene.util.Version;
 * <p><b>NOTE</b>: This class uses the same {@link Version}
 * dependent settings as {@link StandardAnalyzer}.</p>
 */
-public final class BrazilianAnalyzer extends Analyzer {
+public final class BrazilianAnalyzer extends StopwordAnalyzerBase {

 	/**
 	 * List of typical Brazilian Portuguese stopwords.
@ -91,19 +93,13 @@ public final class BrazilianAnalyzer extends Analyzer {
            Arrays.asList(BRAZILIAN_STOP_WORDS), false));
  }

-	/**
-	 * Contains the stopwords used with the {@link StopFilter}.
-	 */
-	private final Set<?> stoptable;
-	
+
 	/**
 	 * Contains words that should be indexed but not stemmed.
 	 */
 	// TODO make this private in 3.1
 	private Set<?> excltable = Collections.emptySet();
 	
-  private final Version matchVersion;
-
 	/**
 	 * Builds an analyzer with the default stop words ({@link #BRAZILIAN_STOP_WORDS}).
 	 */
@ -120,8 +116,7 @@ public final class BrazilianAnalyzer extends Analyzer {
   *          a stopword set
   */
  public BrazilianAnalyzer(Version matchVersion, Set<?> stopwords) {
-    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
-    this.matchVersion = matchVersion;
+     super(matchVersion, stopwords);
  }

  /**
@ -188,53 +183,22 @@ public final class BrazilianAnalyzer extends Analyzer {
 		excltable = WordlistLoader.getWordSet( exclusionlist );
 		setPreviousTokenStream(null); // force a new stemmer to be created
 	}
-
-	/**
-	 * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
-	 *
-	 * @return  A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
-	 * 			{@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and 
-	 *          {@link BrazilianStemFilter}.
-	 */
-	@Override
-	public final TokenStream tokenStream(String fieldName, Reader reader) {
-                TokenStream result = new StandardTokenizer( matchVersion, reader );
-		result = new LowerCaseFilter( matchVersion, result );
-		result = new StandardFilter( result );
-		result = new StopFilter( matchVersion, result, stoptable );
-		result = new BrazilianStemFilter( result, excltable );
-		return result;
-	}
-	
-    private class SavedStreams {
-      Tokenizer source;
-      TokenStream result;
-    };
-    
-    /**
-     * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text 
-     * in the provided {@link Reader}.
-     *
-     * @return  A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
-     *          {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and 
-     *          {@link BrazilianStemFilter}.
-     */
-    @Override
-    public TokenStream reusableTokenStream(String fieldName, Reader reader)
-      throws IOException {
-      SavedStreams streams = (SavedStreams) getPreviousTokenStream();
-      if (streams == null) {
-        streams = new SavedStreams();
-        streams.source = new StandardTokenizer(matchVersion, reader);
-        streams.result = new LowerCaseFilter(matchVersion, streams.source);
-        streams.result = new StandardFilter(streams.result);
-        streams.result = new StopFilter(matchVersion, streams.result, stoptable);
-        streams.result = new BrazilianStemFilter(streams.result, excltable);
-        setPreviousTokenStream(streams);
-      } else {
-        streams.source.reset(reader);
-      }
-      return streams.result;
-    }
+  /**
+   * Creates {@link TokenStreamComponents} used to tokenize all the text in the provided {@link Reader}.
+   *
+   * @return  {@link TokenStreamComponents} built from a {@link StandardTokenizer} filtered with
+   *      {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and 
+   *          {@link BrazilianStemFilter}.
+   */
+  @Override
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new LowerCaseFilter(matchVersion, source);
+    result = new StandardFilter(result);
+    result = new StopFilter(matchVersion, result, stopwords);
+    return new TokenStreamComponents(source, new BrazilianStemFilter(result,
+        excltable));
+  }
 }

--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
@ -19,12 +19,12 @@ package org.apache.lucene.analysis.cjk;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.StopFilter;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.util.Version;

-import java.io.IOException;
 import java.io.Reader;
 import java.util.Arrays;
 import java.util.Set;
@ -35,7 +35,7 @@ import java.util.Set;
 * filters with {@link StopFilter}
 *
 */
-public final class CJKAnalyzer extends Analyzer {
+public final class CJKAnalyzer extends StopwordAnalyzerBase {
  //~ Static fields/initializers ---------------------------------------------

  /**
@ -71,11 +71,6 @@ public final class CJKAnalyzer extends Analyzer {
        .unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(STOP_WORDS),
            false));
  }
-  /**
-   * stop word list
-   */
-  private final Set<?> stopTable;
-  private final Version matchVersion;

  //~ Constructors -----------------------------------------------------------

@ -95,8 +90,7 @@ public final class CJKAnalyzer extends Analyzer {
   *          a stopword set
   */
  public CJKAnalyzer(Version matchVersion, Set<?> stopwords){
-    stopTable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
-    this.matchVersion = matchVersion;
+    super(matchVersion, stopwords);
  }

  /**
@ -106,51 +100,15 @@ public final class CJKAnalyzer extends Analyzer {
   * @deprecated use {@link #CJKAnalyzer(Version, Set)} instead
   */
  public CJKAnalyzer(Version matchVersion, String... stopWords) {
-    stopTable = StopFilter.makeStopSet(matchVersion, stopWords);
-    this.matchVersion = matchVersion;
+    super(matchVersion, StopFilter.makeStopSet(matchVersion, stopWords));
  }

  //~ Methods ----------------------------------------------------------------

-  /**
-   * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
-   *
-   * @param fieldName lucene field name
-   * @param reader    input {@link Reader}
-   * @return A {@link TokenStream} built from {@link CJKTokenizer}, filtered with
-   *    {@link StopFilter}
-   */
  @Override
-  public final TokenStream tokenStream(String fieldName, Reader reader) {
-    return new StopFilter(matchVersion, new CJKTokenizer(reader), stopTable);
-  }
-  
-  private class SavedStreams {
-    Tokenizer source;
-    TokenStream result;
-  };
-  
-  /**
-   * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text 
-   * in the provided {@link Reader}.
-   *
-   * @param fieldName lucene field name
-   * @param reader    Input {@link Reader}
-   * @return A {@link TokenStream} built from {@link CJKTokenizer}, filtered with
-   *    {@link StopFilter}
-   */
-  @Override
-  public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
-    /* tokenStream() is final, no back compat issue */
-    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
-    if (streams == null) {
-      streams = new SavedStreams();
-      streams.source = new CJKTokenizer(reader);
-      streams.result = new StopFilter(matchVersion, streams.source, stopTable);
-      setPreviousTokenStream(streams);
-    } else {
-      streams.source.reset(reader);
-    }
-    return streams.result;
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new CJKTokenizer(reader);
+    return new TokenStreamComponents(source, new StopFilter(matchVersion, source, stopwords));
  }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
@ -25,8 +25,6 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.AttributeSource.AttributeFactory;
-

 /**
 * CJKTokenizer is designed for Chinese, Japanese, and Korean languages.
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseAnalyzer.java
@ -17,10 +17,11 @@ package org.apache.lucene.analysis.cn;
 * limitations under the License.
 */

-import java.io.IOException;
 import java.io.Reader;
+
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;

 /**
@ -29,49 +30,19 @@ import org.apache.lucene.analysis.Tokenizer;
 *
 */

-public final class ChineseAnalyzer extends Analyzer {
+public final class ChineseAnalyzer extends ReusableAnalyzerBase {

-    public ChineseAnalyzer() {
-    }
-
-    /**
-    * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
-    *
-    * @return  A {@link TokenStream} built from a {@link ChineseTokenizer} 
-    *   filtered with {@link ChineseFilter}.
-    */
+  /**
+   * Creates {@link TokenStreamComponents} used to tokenize all the text in the
+   * provided {@link Reader}.
+   * 
+   * @return {@link TokenStreamComponents} built from a
+   *         {@link ChineseTokenizer} filtered with {@link ChineseFilter}
+   */
    @Override
-    public final TokenStream tokenStream(String fieldName, Reader reader) {
-        TokenStream result = new ChineseTokenizer(reader);
-        result = new ChineseFilter(result);
-        return result;
-    }
-    
-    private class SavedStreams {
-      Tokenizer source;
-      TokenStream result;
-    };
-
-    /**
-    * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the
-    * provided {@link Reader}.
-    * 
-    * @return A {@link TokenStream} built from a {@link ChineseTokenizer} 
-    *   filtered with {@link ChineseFilter}.
-    */
-    @Override
-    public final TokenStream reusableTokenStream(String fieldName, Reader reader)
-      throws IOException {
-      /* tokenStream() is final, no back compat issue */
-      SavedStreams streams = (SavedStreams) getPreviousTokenStream();
-      if (streams == null) {
-        streams = new SavedStreams();
-        streams.source = new ChineseTokenizer(reader);
-        streams.result = new ChineseFilter(streams.source);
-        setPreviousTokenStream(streams);
-      } else {
-        streams.source.reset(reader);
-      }
-      return streams.result;
+    protected TokenStreamComponents createComponents(String fieldName,
+        Reader reader) {
+      final Tokenizer source = new ChineseTokenizer(reader);
+      return new TokenStreamComponents(source, new ChineseFilter(source));
    }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.cz;
 * limitations under the License.
 */

+import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
@ -30,9 +32,9 @@ import org.apache.lucene.util.Version;

 import java.io.*;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.Set;
-import java.util.Collections;

 /**
 * {@link Analyzer} for Czech language.
@ -53,7 +55,7 @@ import java.util.Collections;
 * <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
 * </ul>
 */
-public final class CzechAnalyzer extends Analyzer {
+public final class CzechAnalyzer extends ReusableAnalyzerBase {

  /**
 	 * List of typical stopwords.
@ -95,10 +97,11 @@ public final class CzechAnalyzer extends Analyzer {
 	      Version.LUCENE_CURRENT, Arrays.asList(CZECH_STOP_WORDS), false));
 	}

+ 
  /**
   * Contains the stopwords used with the {@link StopFilter}.
   */
-	// TODO make this final in 3.1
+	// TODO once loadStopWords is gone those member should be removed too in favor of StopwordAnalyzerBase
 	private Set<?> stoptable;
  private final Version matchVersion;

@ -168,6 +171,7 @@ public final class CzechAnalyzer extends Analyzer {
     * @deprecated use {@link WordlistLoader#getWordSet(Reader, String) }
     *             and {@link #CzechAnalyzer(Version, Set)} instead
     */
+    // TODO extend StopwordAnalyzerBase once this method is gone!
    public void loadStopWords( InputStream wordfile, String encoding ) {
        setPreviousTokenStream(null); // force a new stopfilter to be created
        if ( wordfile == null ) {
@ -191,58 +195,25 @@ public final class CzechAnalyzer extends Analyzer {
          stoptable = Collections.emptySet();
        }
    }
-
  /**
-   * Creates a {@link TokenStream} which tokenizes all the text in the provided
+   * Creates {@link TokenStreamComponents} used to tokenize all the text in the provided
   * {@link Reader}.
   * 
-   * @return A {@link TokenStream} built from a {@link StandardTokenizer}
+   * @return {@link TokenStreamComponents} built from a {@link StandardTokenizer}
   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
   *         {@link StopFilter}, and {@link CzechStemFilter} (only if version is
   *         >= LUCENE_31)
   */
  @Override
-	public final TokenStream tokenStream( String fieldName, Reader reader ) {
-                TokenStream result = new StandardTokenizer( matchVersion, reader );
-		result = new StandardFilter( result );
-		result = new LowerCaseFilter( matchVersion, result );
-		result = new StopFilter( matchVersion, result, stoptable );
-		if (matchVersion.onOrAfter(Version.LUCENE_31))
-		  result = new CzechStemFilter(result);
-		return result;
-	}
-	
-	private class SavedStreams {
-	    Tokenizer source;
-	    TokenStream result;
-	};
-	
-  /**
-   * Returns a (possibly reused) {@link TokenStream} which tokenizes all the
-   * text in the provided {@link Reader}.
-   * 
-   * @return A {@link TokenStream} built from a {@link StandardTokenizer}
-   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
-   *         {@link StopFilter}, and {@link CzechStemFilter} (only if version is
-   *         >= LUCENE_31)
-   */
-	@Override
-	public TokenStream reusableTokenStream(String fieldName, Reader reader)
-      throws IOException {
-      SavedStreams streams = (SavedStreams) getPreviousTokenStream();
-      if (streams == null) {
-        streams = new SavedStreams();
-        streams.source = new StandardTokenizer(matchVersion, reader);
-        streams.result = new StandardFilter(streams.source);
-        streams.result = new LowerCaseFilter(matchVersion, streams.result);
-        streams.result = new StopFilter( matchVersion, streams.result, stoptable);
-        if (matchVersion.onOrAfter(Version.LUCENE_31))
-          streams.result = new CzechStemFilter(streams.result);
-        setPreviousTokenStream(streams);
-      } else {
-        streams.source.reset(reader);
-      }
-      return streams.result;
-    }
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new StandardFilter(source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter( matchVersion, result, stoptable);
+    if (matchVersion.onOrAfter(Version.LUCENE_31))
+      result = new CzechStemFilter(result);
+    return new TokenStreamComponents(source, result);
+  }
 }

--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
@ -29,13 +29,15 @@ import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
 import org.apache.lucene.util.Version;

 /**
@ -51,7 +53,7 @@ import org.apache.lucene.util.Version;
 * <p><b>NOTE</b>: This class uses the same {@link Version}
 * dependent settings as {@link StandardAnalyzer}.</p>
 */
-public final class GermanAnalyzer extends Analyzer {
+public final class GermanAnalyzer extends StopwordAnalyzerBase {
  
  /**
   * List of typical german stopwords.
@ -89,17 +91,13 @@ public final class GermanAnalyzer extends Analyzer {
  /**
   * Contains the stopwords used with the {@link StopFilter}.
   */
-  //TODO make this final in 3.1
-  private Set<?> stopSet;
-
+ 
  /**
   * Contains words that should be indexed but not stemmed.
   */
  // TODO make this final in 3.1
  private Set<?> exclusionSet;

-  private final Version matchVersion;
-
  /**
   * Builds an analyzer with the default stop words:
   * {@link #getDefaultStopSet()}.
@ -131,9 +129,8 @@ public final class GermanAnalyzer extends Analyzer {
   *          a stemming exclusion set
   */
  public GermanAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
-    stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
+    super(matchVersion, stopwords);
    exclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionSet));
-    this.matchVersion = matchVersion;
  }

  /**
@ -187,51 +184,23 @@ public final class GermanAnalyzer extends Analyzer {
    exclusionSet = WordlistLoader.getWordSet(exclusionlist);
    setPreviousTokenStream(null); // force a new stemmer to be created
  }
-
+  
  /**
-   * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
-   *
-   * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
-   *         {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}, and
+   * Creates {@link TokenStreamComponents} used to tokenize all the text in the
+   * provided {@link Reader}.
+   * 
+   * @return {@link TokenStreamComponents} built from a
+   *         {@link StandardTokenizer} filtered with {@link StandardFilter},
+   *         {@link LowerCaseFilter}, {@link StopFilter}, and
   *         {@link GermanStemFilter}
   */
  @Override
-  public TokenStream tokenStream(String fieldName, Reader reader) {
-    TokenStream result = new StandardTokenizer(matchVersion, reader);
-    result = new StandardFilter(result);
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new StandardFilter(source);
    result = new LowerCaseFilter(matchVersion, result);
-    result = new StopFilter( matchVersion, result, stopSet);
-    result = new GermanStemFilter(result, exclusionSet);
-    return result;
-  }
-  
-  private class SavedStreams {
-    Tokenizer source;
-    TokenStream result;
-  };
-  
-  /**
-   * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text 
-   * in the provided {@link Reader}.
-   *
-   * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
-   *         {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}, and
-   *         {@link GermanStemFilter}
-   */
-  @Override
-  public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
-    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
-    if (streams == null) {
-      streams = new SavedStreams();
-      streams.source = new StandardTokenizer(matchVersion, reader);
-      streams.result = new StandardFilter(streams.source);
-      streams.result = new LowerCaseFilter(matchVersion, streams.result);
-      streams.result = new StopFilter( matchVersion, streams.result, stopSet);
-      streams.result = new GermanStemFilter(streams.result, exclusionSet);
-      setPreviousTokenStream(streams);
-    } else {
-      streams.source.reset(reader);
-    }
-    return streams.result;
+    result = new StopFilter( matchVersion, result, stopwords);
+    return new TokenStreamComponents(source, new GermanStemFilter(result, exclusionSet));
  }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
@ -19,14 +19,15 @@ package org.apache.lucene.analysis.el;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
 import org.apache.lucene.util.Version;

-import java.io.IOException;
 import java.io.Reader;
 import java.util.Arrays;
 import java.util.Map;
@ -43,7 +44,7 @@ import java.util.Set;
 * <p><b>NOTE</b>: This class uses the same {@link Version}
 * dependent settings as {@link StandardAnalyzer}.</p>
 */
-public final class GreekAnalyzer extends Analyzer
+public final class GreekAnalyzer extends StopwordAnalyzerBase
 {
    /**
     * List of typical Greek stopwords.
@ -73,13 +74,6 @@ public final class GreekAnalyzer extends Analyzer
          Version.LUCENE_CURRENT, Arrays.asList(GREEK_STOP_WORDS), false));
    }

-    /**
-     * Contains the stopwords used with the {@link StopFilter}.
-     */
-    private final Set<?> stopSet;
-
-    private final Version matchVersion;
-
    public GreekAnalyzer(Version matchVersion) {
      this(matchVersion, DefaultSetHolder.DEFAULT_SET);
    }
@ -93,8 +87,7 @@ public final class GreekAnalyzer extends Analyzer
     *          a stopword set
     */
    public GreekAnalyzer(Version matchVersion, Set<?> stopwords) {
-      stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
-      this.matchVersion = matchVersion;
+      super(matchVersion, stopwords);
    }

    /**
@ -115,47 +108,20 @@ public final class GreekAnalyzer extends Analyzer
    {
      this(matchVersion, stopwords.keySet());
    }
-
-    /**
-     * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}.
-     *
-     * @return  A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
-     *                  {@link GreekLowerCaseFilter} and {@link StopFilter}
-     */
+  
+   /**
+    * Creates {@link TokenStreamComponents} used to tokenize all the text in the
+    * provided {@link Reader}.
+    * 
+    * @return {@link TokenStreamComponents} built from a
+    *         {@link StandardTokenizer} filtered with
+    *         {@link GreekLowerCaseFilter} and {@link StopFilter}
+    */
    @Override
-    public TokenStream tokenStream(String fieldName, Reader reader)
-    {
-        TokenStream result = new StandardTokenizer(matchVersion, reader);
-        result = new GreekLowerCaseFilter(result);
-        result = new StopFilter(matchVersion, result, stopSet);
-        return result;
-    }
-    
-    private class SavedStreams {
-      Tokenizer source;
-      TokenStream result;
-    };
-    
-    /**
-     * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text 
-     * in the provided {@link Reader}.
-     *
-     * @return  A {@link TokenStream} built from a {@link StandardTokenizer} filtered with
-     *                  {@link GreekLowerCaseFilter} and {@link StopFilter}
-     */
-    @Override
-    public TokenStream reusableTokenStream(String fieldName, Reader reader) 
-      throws IOException {
-      SavedStreams streams = (SavedStreams) getPreviousTokenStream();
-      if (streams == null) {
-        streams = new SavedStreams();
-        streams.source = new StandardTokenizer(matchVersion, reader);
-        streams.result = new GreekLowerCaseFilter(streams.source);
-        streams.result = new StopFilter(matchVersion, streams.result, stopSet);
-        setPreviousTokenStream(streams);
-      } else {
-        streams.source.reset(reader);
-      }
-      return streams.result;
+    protected TokenStreamComponents createComponents(String fieldName,
+        Reader reader) {
+      final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+      final TokenStream result = new GreekLowerCaseFilter(source);
+      return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords));
    }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
@ -19,17 +19,15 @@ package org.apache.lucene.analysis.fa;

 import java.io.File;
 import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
 import java.io.Reader;
-import java.util.Collections;
 import java.util.Hashtable;
 import java.util.Set;

 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
@ -45,7 +43,7 @@ import org.apache.lucene.util.Version;
 * yeh and keheh) are standardized. "Stemming" is accomplished via stopwords.
 * </p>
 */
-public final class PersianAnalyzer extends Analyzer {
+public final class PersianAnalyzer extends StopwordAnalyzerBase {

  /**
   * File containing default Persian stopwords.
@ -57,11 +55,6 @@ public final class PersianAnalyzer extends Analyzer {
   */
  public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";

-  /**
-   * Contains the stopwords used with the StopFilter.
-   */
-  private final Set<?> stoptable;
-
  /**
   * The comment character in the stopwords file. All lines prefixed with this
   * will be ignored
@ -85,30 +78,15 @@ public final class PersianAnalyzer extends Analyzer {

    static {
      try {
-        DEFAULT_STOP_SET = loadDefaultStopWordSet();
+        DEFAULT_STOP_SET = loadStopwordSet(false, PersianAnalyzer.class, DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
        throw new RuntimeException("Unable to load default stopword set");
      }
    }
-
-    static Set<String> loadDefaultStopWordSet() throws IOException {
-      InputStream stream = PersianAnalyzer.class
-          .getResourceAsStream(DEFAULT_STOPWORD_FILE);
-      try {
-        InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
-        // make sure it is unmodifiable as we expose it in the outer class
-        return Collections.unmodifiableSet(WordlistLoader.getWordSet(reader,
-            STOPWORDS_COMMENT));
-      } finally {
-        stream.close();
-      }
-    }
  }

-  private final Version matchVersion;
-
  /**
   * Builds an analyzer with the default stop words:
   * {@link #DEFAULT_STOPWORD_FILE}.
@ -126,8 +104,7 @@ public final class PersianAnalyzer extends Analyzer {
   *          a stopword set
   */
  public PersianAnalyzer(Version matchVersion, Set<?> stopwords){
-    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
-    this.matchVersion = matchVersion;
+    super(matchVersion, stopwords);
  }

  /**
@ -156,18 +133,19 @@ public final class PersianAnalyzer extends Analyzer {
  }

  /**
-   * Creates a {@link TokenStream} which tokenizes all the text in the provided
+   * Creates {@link TokenStreamComponents} used to tokenize all the text in the provided
   * {@link Reader}.
   * 
-   * @return A {@link TokenStream} built from a {@link ArabicLetterTokenizer}
+   * @return {@link TokenStreamComponents} built from a {@link ArabicLetterTokenizer}
   *         filtered with {@link LowerCaseFilter}, 
   *         {@link ArabicNormalizationFilter},
   *         {@link PersianNormalizationFilter} and Persian Stop words
   */
  @Override
-  public TokenStream tokenStream(String fieldName, Reader reader) {
-    TokenStream result = new ArabicLetterTokenizer(reader);
-    result = new LowerCaseFilter(matchVersion, result);
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new ArabicLetterTokenizer(reader);
+    TokenStream result = new LowerCaseFilter(matchVersion, source);
    result = new ArabicNormalizationFilter(result);
    /* additional persian-specific normalization */
    result = new PersianNormalizationFilter(result);
@ -175,44 +153,6 @@ public final class PersianAnalyzer extends Analyzer {
     * the order here is important: the stopword list is normalized with the
     * above!
     */
-    result = new StopFilter(matchVersion, result, stoptable);
-    return result;
-  }
-  
-  private class SavedStreams {
-    Tokenizer source;
-    TokenStream result;
-  }
-
-  /**
-   * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text 
-   * in the provided {@link Reader}.
-   * 
-   * @return A {@link TokenStream} built from a {@link ArabicLetterTokenizer}
-   *         filtered with {@link LowerCaseFilter}, 
-   *         {@link ArabicNormalizationFilter},
-   *         {@link PersianNormalizationFilter} and Persian Stop words
-   */
-  @Override
-  public TokenStream reusableTokenStream(String fieldName, Reader reader)
-      throws IOException {
-    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
-    if (streams == null) {
-      streams = new SavedStreams();
-      streams.source = new ArabicLetterTokenizer(reader);
-      streams.result = new LowerCaseFilter(matchVersion, streams.source);
-      streams.result = new ArabicNormalizationFilter(streams.result);
-      /* additional persian-specific normalization */
-      streams.result = new PersianNormalizationFilter(streams.result);
-      /*
-       * the order here is important: the stopword list is normalized with the
-       * above!
-       */
-      streams.result = new StopFilter(matchVersion, streams.result, stoptable);
-      setPreviousTokenStream(streams);
-    } else {
-      streams.source.reset(reader);
-    }
-    return streams.result;
+    return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords));
  }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
@ -20,7 +20,9 @@ package org.apache.lucene.analysis.fr;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
@ -59,7 +61,7 @@ import java.util.Set;
 * <p><b>NOTE</b>: This class uses the same {@link Version}
 * dependent settings as {@link StandardAnalyzer}.</p>
 */
-public final class FrenchAnalyzer extends Analyzer {
+public final class FrenchAnalyzer extends StopwordAnalyzerBase {

  /**
   * Extended list of typical French stopwords.
@ -91,18 +93,12 @@ public final class FrenchAnalyzer extends Analyzer {
    "été", "être", "ô"
  };

-  /**
-   * Contains the stopwords used with the {@link StopFilter}.
-   */
-  private final Set<?> stoptable;
  /**
   * Contains words that should be indexed but not stemmed.
   */
  //TODO make this final in 3.0
  private Set<?> excltable = Collections.<Object>emptySet();

-  private final Version matchVersion;
-  
  /**
   * Returns an unmodifiable instance of the default stop-words set.
   * @return an unmodifiable instance of the default stop-words set.
@ -148,9 +144,7 @@ public final class FrenchAnalyzer extends Analyzer {
   */
  public FrenchAnalyzer(Version matchVersion, Set<?> stopwords,
      Set<?> stemExclutionSet) {
-    this.matchVersion = matchVersion;
-    this.stoptable = CharArraySet.unmodifiableSet(CharArraySet
-        .copy(matchVersion, stopwords));
+    super(matchVersion, stopwords);
    this.excltable = CharArraySet.unmodifiableSet(CharArraySet
        .copy(matchVersion, stemExclutionSet));
  }
@ -202,54 +196,22 @@ public final class FrenchAnalyzer extends Analyzer {
  }

  /**
-   * Creates a {@link TokenStream} which tokenizes all the text in the provided
+   * Creates {@link TokenStreamComponents} used to tokenize all the text in the provided
   * {@link Reader}.
   *
-   * @return A {@link TokenStream} built from a {@link StandardTokenizer} 
+   * @return {@link TokenStreamComponents} built from a {@link StandardTokenizer} 
   *         filtered with {@link StandardFilter}, {@link StopFilter}, 
   *         {@link FrenchStemFilter} and {@link LowerCaseFilter}
   */
  @Override
-  public final TokenStream tokenStream(String fieldName, Reader reader) {
-    TokenStream result = new StandardTokenizer(matchVersion, reader);
-    result = new StandardFilter(result);
-    result = new StopFilter(matchVersion, result, stoptable);
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new StandardFilter(source);
+    result = new StopFilter(matchVersion, result, stopwords);
    result = new FrenchStemFilter(result, excltable);
    // Convert to lowercase after stemming!
-    result = new LowerCaseFilter(matchVersion, result);
-    return result;
-  }
-  
-  private class SavedStreams {
-    Tokenizer source;
-    TokenStream result;
-  };
-  
-  /**
-   * Returns a (possibly reused) {@link TokenStream} which tokenizes all the 
-   * text in the provided {@link Reader}.
-   *
-   * @return A {@link TokenStream} built from a {@link StandardTokenizer} 
-   *         filtered with {@link StandardFilter}, {@link StopFilter}, 
-   *         {@link FrenchStemFilter} and {@link LowerCaseFilter}
-   */
-  @Override
-  public TokenStream reusableTokenStream(String fieldName, Reader reader)
-      throws IOException {
-    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
-    if (streams == null) {
-      streams = new SavedStreams();
-      streams.source = new StandardTokenizer(matchVersion, reader);
-      streams.result = new StandardFilter(streams.source);
-      streams.result = new StopFilter(matchVersion, streams.result, stoptable);
-      streams.result = new FrenchStemFilter(streams.result, excltable);
-      // Convert to lowercase after stemming!
-      streams.result = new LowerCaseFilter(matchVersion, streams.result);
-      setPreviousTokenStream(streams);
-    } else {
-      streams.source.reset(reader);
-    }
-    return streams.result;
+    return new TokenStreamComponents(source, new LowerCaseFilter(matchVersion, result));
  }
 }

--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
@ -17,7 +17,6 @@ package org.apache.lucene.analysis.ru;
 * limitations under the License.
 */

-import java.io.IOException;
 import java.io.Reader;
 import java.util.Arrays;
 import java.util.Map;
@ -26,7 +25,9 @@ import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.util.Version;
@ -39,7 +40,7 @@ import org.apache.lucene.util.Version;
 * A default set of stopwords is used unless an alternative list is specified.
 * </p>
 */
-public final class RussianAnalyzer extends Analyzer
+public final class RussianAnalyzer extends StopwordAnalyzerBase
 {
    /**
     * List of typical Russian stopwords.
@ -63,13 +64,6 @@ public final class RussianAnalyzer extends Analyzer
              Arrays.asList(RUSSIAN_STOP_WORDS), false));
    }

-    /**
-     * Contains the stopwords used with the StopFilter.
-     */
-    private final Set<?> stopSet;
-
-    private final Version matchVersion;
-
    public RussianAnalyzer(Version matchVersion) {
      this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
    }
@ -91,8 +85,7 @@ public final class RussianAnalyzer extends Analyzer
     *          a stopword set
     */
    public RussianAnalyzer(Version matchVersion, Set<?> stopwords){
-      stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
-      this.matchVersion = matchVersion;
+      super(matchVersion, stopwords);
    }
   
    /**
@ -106,52 +99,21 @@ public final class RussianAnalyzer extends Analyzer
    }

    /**
-     * Creates a {@link TokenStream} which tokenizes all the text in the 
+     * Creates {@link TokenStreamComponents} used to tokenize all the text in the 
     * provided {@link Reader}.
     *
-     * @return  A {@link TokenStream} built from a 
+     * @return {@link TokenStreamComponents} built from a 
     *   {@link RussianLetterTokenizer} filtered with 
     *   {@link LowerCaseFilter}, {@link StopFilter}, 
     *   and {@link RussianStemFilter}
     */
    @Override
-    public TokenStream tokenStream(String fieldName, Reader reader)
-    {
-        TokenStream result = new RussianLetterTokenizer(reader);
-        result = new LowerCaseFilter(matchVersion, result);
-        result = new StopFilter(matchVersion, result, stopSet);
-        result = new RussianStemFilter(result);
-        return result;
+    protected TokenStreamComponents createComponents(String fieldName,
+        Reader reader) {
+      final Tokenizer source = new RussianLetterTokenizer(reader);
+      TokenStream result = new LowerCaseFilter(matchVersion, source);
+      result = new StopFilter(matchVersion, result, stopwords);
+      return new TokenStreamComponents(source, new RussianStemFilter(result));
+      
    }
-    
-    private class SavedStreams {
-      Tokenizer source;
-      TokenStream result;
-    };
-    
-    /**
-     * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text 
-     * in the provided {@link Reader}.
-     *
-     * @return  A {@link TokenStream} built from a 
-     *   {@link RussianLetterTokenizer} filtered with 
-     *   {@link LowerCaseFilter}, {@link StopFilter}, 
-     *   and {@link RussianStemFilter}
-     */
-    @Override
-    public TokenStream reusableTokenStream(String fieldName, Reader reader) 
-      throws IOException {
-    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
-    if (streams == null) {
-      streams = new SavedStreams();
-      streams.source = new RussianLetterTokenizer(reader);
-      streams.result = new LowerCaseFilter(matchVersion, streams.source);
-      streams.result = new StopFilter(matchVersion, streams.result, stopSet);
-      streams.result = new RussianStemFilter(streams.result);
-      setPreviousTokenStream(streams);
-    } else {
-      streams.source.reset(reader);
-    }
-    return streams.result;
-  }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
@ -16,16 +16,18 @@ package org.apache.lucene.analysis.th;
 * limitations under the License.
 */

-import java.io.IOException;
 import java.io.Reader;
+
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.StopAnalyzer;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
 import org.apache.lucene.util.Version;

 /**
@ -35,41 +37,28 @@ import org.apache.lucene.util.Version;
 * <p><b>NOTE</b>: This class uses the same {@link Version}
 * dependent settings as {@link StandardAnalyzer}.</p>
 */
-public final class ThaiAnalyzer extends Analyzer {
+public final class ThaiAnalyzer extends ReusableAnalyzerBase {
  private final Version matchVersion;

  public ThaiAnalyzer(Version matchVersion) {
    this.matchVersion = matchVersion;
  }
-  
+
+  /**
+   * Creates {@link TokenStreamComponents} used to tokenize all the text in the
+   * provided {@link Reader}.
+   * 
+   * @return {@link TokenStreamComponents} built from a
+   *         {@link StandardTokenizer} filtered with {@link StandardFilter},
+   *         {@link ThaiWordFilter}, and {@link StopFilter}
+   */
  @Override
-  public TokenStream tokenStream(String fieldName, Reader reader) {
-    TokenStream ts = new StandardTokenizer(matchVersion, reader);
-    ts = new StandardFilter(ts);
-    ts = new ThaiWordFilter(ts);
-    ts = new StopFilter(matchVersion, ts, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
-    return ts;
-  }
-  
-  private class SavedStreams {
-    Tokenizer source;
-    TokenStream result;
-  };
-  
-  @Override
-  public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
-    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
-    if (streams == null) {
-      streams = new SavedStreams();
-      streams.source = new StandardTokenizer(matchVersion, reader);
-      streams.result = new StandardFilter(streams.source);
-      streams.result = new ThaiWordFilter(streams.result);
-      streams.result = new StopFilter(matchVersion, streams.result, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
-      setPreviousTokenStream(streams);
-    } else {
-      streams.source.reset(reader);
-      streams.result.reset(); // reset the ThaiWordFilter's state
-    }
-    return streams.result;
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new StandardFilter(source);
+    result = new ThaiWordFilter(result);
+    return new TokenStreamComponents(source, new StopFilter(matchVersion,
+        result, StopAnalyzer.ENGLISH_STOP_WORDS_SET));
  }
 }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
@ -17,10 +17,10 @@ package org.apache.lucene.analysis.ar;
 * limitations under the License.
 */

-import java.io.StringReader;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;

-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.util.Version;

@ -78,7 +78,9 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
   * Test that custom stopwords work, and are not case-sensitive.
   */
  public void testCustomStopwords() throws Exception {
-    ArabicAnalyzer a = new ArabicAnalyzer(Version.LUCENE_CURRENT, new String[] { "the", "and", "a" });
+    Set<String> set = new HashSet<String>();
+    Collections.addAll(set, "the", "and", "a");
+    ArabicAnalyzer a = new ArabicAnalyzer(Version.LUCENE_CURRENT, set);
    assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
        "brown", "fox" });
  }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
@ -17,10 +17,12 @@ package org.apache.lucene.analysis.br;
 * limitations under the License.
 */

+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.util.Version;

 /**
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java
@ -17,11 +17,8 @@ package org.apache.lucene.analysis.fa;
 * limitations under the License.
 */

-import java.io.StringReader;
-
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.util.Version;

 /**
--- a/src/java/org/apache/lucene/analysis/ReusableAnalyzerBase.java
+++ b/src/java/org/apache/lucene/analysis/ReusableAnalyzerBase.java
@ -0,0 +1,163 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis;
+
+import java.io.IOException;
+import java.io.Reader;
+
+/**
+ * An convenience subclass of Analyzer that makes it easy to implement
+ * {@link TokenStream} reuse.
+ * <p>
+ * ReusableAnalyzerBase is a simplification of Analyzer that supports easy reuse
+ * for the most common use-cases. Analyzers such as
+ * {@link PerFieldAnalyzerWrapper} that behave differently depending upon the
+ * field name need to subclass Analyzer directly instead.
+ * </p>
+ * <p>
+ * To prevent consistency problems, this class does not allow subclasses to
+ * extend {@link #reusableTokenStream(String, Reader)} or
+ * {@link #tokenStream(String, Reader)} directly. Instead, subclasses must
+ * implement {@link #createComponents(String, Reader)}.
+ * </p>
+ */
+public abstract class ReusableAnalyzerBase extends Analyzer {
+
+  /**
+   * Creates a new {@link TokenStreamComponents} instance for this analyzer.
+   * 
+   * @param fieldName
+   *          the name of the fields content passed to the
+   *          {@link TokenStreamComponents} sink as a reader
+   * @param aReader
+   *          the reader passed to the {@link Tokenizer} constructor
+   * @return the {@link TokenStreamComponents} for this analyzer.
+   */
+  protected abstract TokenStreamComponents createComponents(String fieldName,
+      Reader aReader);
+
+  /**
+   * This method uses {@link #createComponents(String, Reader)} to obtain an
+   * instance of {@link TokenStreamComponents}. It returns the sink of the
+   * components and stores the components internally. Subsequent calls to this
+   * method will reuse the previously stored components if and only if the
+   * {@link TokenStreamComponents#reset(Reader)} method returned
+   * <code>true</code>. Otherwise a new instance of
+   * {@link TokenStreamComponents} is created.
+   * 
+   * @param fieldName the name of the field the created TokenStream is used for
+   * @param reader the reader the streams source reads from
+   */
+  @Override
+  public final TokenStream reusableTokenStream(final String fieldName,
+      final Reader reader) throws IOException {
+    TokenStreamComponents streamChain = (TokenStreamComponents)
+    getPreviousTokenStream();
+    if (streamChain == null || !streamChain.reset(reader)) {
+      streamChain = createComponents(fieldName, reader);
+      setPreviousTokenStream(streamChain);
+    }
+    return streamChain.getTokenStream();
+  }
+
+  /**
+   * This method uses {@link #createComponents(String, Reader)} to obtain an
+   * instance of {@link TokenStreamComponents} and returns the sink of the
+   * components. Each calls to this method will create a new instance of
+   * {@link TokenStreamComponents}. Created {@link TokenStream} instances are 
+   * never reused.
+   * 
+   * @param fieldName the name of the field the created TokenStream is used for
+   * @param reader the reader the streams source reads from
+   */
+  @Override
+  public final TokenStream tokenStream(final String fieldName,
+      final Reader reader) {
+    return createComponents(fieldName, reader).getTokenStream();
+  }
+  
+  /**
+   * This class encapsulates the outer components of a token stream. It provides
+   * access to the source ({@link Tokenizer}) and the outer end (sink), an
+   * instance of {@link TokenFilter} which also serves as the
+   * {@link TokenStream} returned by
+   * {@link Analyzer#tokenStream(String, Reader)} and
+   * {@link Analyzer#reusableTokenStream(String, Reader)}.
+   */
+  public static class TokenStreamComponents {
+    final Tokenizer source;
+    final TokenStream sink;
+
+    /**
+     * Creates a new {@link TokenStreamComponents} instance.
+     * 
+     * @param source
+     *          the analyzer's tokenizer
+     * @param result
+     *          the analyzer's resulting token stream
+     */
+    public TokenStreamComponents(final Tokenizer source,
+        final TokenStream result) {
+      this.source = source;
+      this.sink = result;
+    }
+    
+    /**
+     * Creates a new {@link TokenStreamComponents} instance.
+     * 
+     * @param source
+     *          the analyzer's tokenizer
+     */
+    public TokenStreamComponents(final Tokenizer source) {
+      this.source = source;
+      this.sink = source;
+    }
+
+    /**
+     * Resets the encapsulated components with the given reader. This method by
+     * default returns <code>true</code> indicating that the components have
+     * been reset successfully. Subclasses of {@link ReusableAnalyzerBase} might use
+     * their own {@link TokenStreamComponents} returning <code>false</code> if
+     * the components cannot be reset.
+     * 
+     * @param reader
+     *          a reader to reset the source component
+     * @return <code>true</code> if the components were reset, otherwise
+     *         <code>false</code>
+     * @throws IOException
+     *           if the component's reset method throws an {@link IOException}
+     */
+    protected boolean reset(final Reader reader) throws IOException {
+      source.reset(reader);
+      if(sink != source)
+        sink.reset(); // only reset if the sink reference is different from source
+      return true;
+    }
+
+    /**
+     * Returns the sink {@link TokenStream}
+     * 
+     * @return the sink {@link TokenStream}
+     */
+    protected TokenStream getTokenStream() {
+      return sink;
+    }
+
+  }
+
+}
--- a/src/java/org/apache/lucene/analysis/SimpleAnalyzer.java
+++ b/src/java/org/apache/lucene/analysis/SimpleAnalyzer.java
@ -18,25 +18,15 @@ package org.apache.lucene.analysis;
 */

 import java.io.Reader;
-import java.io.IOException;

 /** An {@link Analyzer} that filters {@link LetterTokenizer} 
 *  with {@link LowerCaseFilter} */

-public final class SimpleAnalyzer extends Analyzer {
-  @Override
-  public TokenStream tokenStream(String fieldName, Reader reader) {
-    return new LowerCaseTokenizer(reader);
-  }
+public final class SimpleAnalyzer extends ReusableAnalyzerBase {

  @Override
-  public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
-    Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
-    if (tokenizer == null) {
-      tokenizer = new LowerCaseTokenizer(reader);
-      setPreviousTokenStream(tokenizer);
-    } else
-      tokenizer.reset(reader);
-    return tokenizer;
+  protected TokenStreamComponents createComponents(final String fieldName,
+      final Reader reader) {
+    return new TokenStreamComponents(new LowerCaseTokenizer(reader));
  }
 }
--- a/src/java/org/apache/lucene/analysis/StopAnalyzer.java
+++ b/src/java/org/apache/lucene/analysis/StopAnalyzer.java
@ -24,6 +24,7 @@ import java.util.Arrays;
 import java.util.Set;
 import java.util.List;

+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.util.Version;

 /** Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
@ -38,9 +39,7 @@ import org.apache.lucene.util.Version;
 * </ul>
 */

-public final class StopAnalyzer extends Analyzer {
-  private final Set<?> stopWords;
-  private final Version matchVersion;
+public final class StopAnalyzer extends StopwordAnalyzerBase {
  
  /** An unmodifiable set containing some common English words that are not usually useful
  for searching.*/
@ -65,16 +64,14 @@ public final class StopAnalyzer extends Analyzer {
   * @param matchVersion See <a href="#version">above</a>
   */
  public StopAnalyzer(Version matchVersion) {
-    stopWords = ENGLISH_STOP_WORDS_SET;
-    this.matchVersion = matchVersion;
+    this(matchVersion, ENGLISH_STOP_WORDS_SET);
  }

  /** Builds an analyzer with the stop words from the given set.
   * @param matchVersion See <a href="#version">above</a>
   * @param stopWords Set of stop words */
  public StopAnalyzer(Version matchVersion, Set<?> stopWords) {
-    this.stopWords = stopWords;
-    this.matchVersion = matchVersion;
+    super(matchVersion, stopWords);
  }

  /** Builds an analyzer with the stop words from the given file.
@ -82,8 +79,7 @@ public final class StopAnalyzer extends Analyzer {
   * @param matchVersion See <a href="#version">above</a>
   * @param stopwordsFile File to load stop words from */
  public StopAnalyzer(Version matchVersion, File stopwordsFile) throws IOException {
-    stopWords = WordlistLoader.getWordSet(stopwordsFile);
-    this.matchVersion = matchVersion;
+    this(matchVersion, WordlistLoader.getWordSet(stopwordsFile));
  }

  /** Builds an analyzer with the stop words from the given reader.
@ -91,34 +87,21 @@ public final class StopAnalyzer extends Analyzer {
   * @param matchVersion See <a href="#version">above</a>
   * @param stopwords Reader to load stop words from */
  public StopAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
-    stopWords = WordlistLoader.getWordSet(stopwords);
-    this.matchVersion = matchVersion;
+    this(matchVersion, WordlistLoader.getWordSet(stopwords));
  }

-  /** Filters LowerCaseTokenizer with StopFilter. */
+  /**
+   * Creates {@link TokenStreamComponents} used to tokenize all the text in the provided {@link Reader}.
+   *
+   * @return {@link TokenStreamComponents} built from a {@link LowerCaseTokenizer} filtered with
+   *         {@link StopFilter}
+   */
  @Override
-  public TokenStream tokenStream(String fieldName, Reader reader) {
-    return new StopFilter(matchVersion,
-        new LowerCaseTokenizer(reader), stopWords);
-  }
-
-  /** Filters LowerCaseTokenizer with StopFilter. */
-  private class SavedStreams {
-    Tokenizer source;
-    TokenStream result;
-  };
-  @Override
-  public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
-    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
-    if (streams == null) {
-      streams = new SavedStreams();
-      streams.source = new LowerCaseTokenizer(reader);
-      streams.result = new StopFilter(matchVersion,
-          streams.source, stopWords);
-      setPreviousTokenStream(streams);
-    } else
-      streams.source.reset(reader);
-    return streams.result;
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new LowerCaseTokenizer(reader);
+    return new TokenStreamComponents(source, new StopFilter(matchVersion,
+          source, stopwords));
  }
 }

--- a/src/java/org/apache/lucene/analysis/StopwordAnalyzerBase.java
+++ b/src/java/org/apache/lucene/analysis/StopwordAnalyzerBase.java
@ -0,0 +1,110 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis;
+
+import java.io.IOException;
+import java.util.Set;
+
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.util.Version;
+
+/**
+ * Base class for Analyzers that need to make use of stopword sets. 
+ * 
+ */
+public abstract class StopwordAnalyzerBase extends ReusableAnalyzerBase {
+
+  /**
+   * An immutable stopword set
+   */
+  protected final CharArraySet stopwords;
+
+  protected final Version matchVersion;
+
+  /**
+   * Returns the analyzer's stopword set or an empty set if the analyzer has no
+   * stopwords
+   * 
+   * @return the analyzer's stopword set or an empty set if the analyzer has no
+   *         stopwords
+   */
+  public Set<?> getStopwordSet() {
+    return stopwords;
+  }
+
+  /**
+   * Creates a new instance initialized with the given stopword set
+   * 
+   * @param version
+   *          the Lucene version for cross version compatibility
+   * @param stopwords
+   *          the analyzer's stopword set
+   */
+  protected StopwordAnalyzerBase(final Version version, final Set<?> stopwords) {
+    /*
+     * no need to call
+     * setOverridesTokenStreamMethod(StopwordAnalyzerBase.class); here, both
+     * tokenStream methods are final in this class.
+     */
+    matchVersion = version;
+    // analyzers should use char array set for stopwords!
+    this.stopwords = stopwords == null ? CharArraySet.EMPTY_SET : CharArraySet
+        .unmodifiableSet(CharArraySet.copy(version, stopwords));
+  }
+
+  /**
+   * Creates a new Analyzer with an empty stopword set
+   * 
+   * @param version
+   *          the Lucene version for cross version compatibility
+   */
+  protected StopwordAnalyzerBase(final Version version) {
+    this(version, null);
+  }
+
+  /**
+   * Creates a CharArraySet from a file resource associated with a class. (See
+   * {@link Class#getResourceAsStream(String)}).
+   * 
+   * @param ignoreCase
+   *          <code>true</code> if the set should ignore the case of the
+   *          stopwords, otherwise <code>false</code>
+   * @param aClass
+   *          a class that is associated with the given stopwordResource
+   * @param resource
+   *          name of the resource file associated with the given class
+   * @param comment
+   *          comment string to ignore in the stopword file
+   * @return a CharArraySet containing the distinct stopwords from the given
+   *         file
+   * @throws IOException
+   *           if loading the stopwords throws an {@link IOException}
+   */
+  protected static CharArraySet loadStopwordSet(final boolean ignoreCase,
+      final Class<? extends ReusableAnalyzerBase> aClass, final String resource,
+      final String comment) throws IOException {
+    final Set<String> wordSet = WordlistLoader.getWordSet(aClass, resource,
+        comment);
+    final CharArraySet set = new CharArraySet(Version.LUCENE_31, wordSet.size(), ignoreCase);
+    set.addAll(wordSet);
+    return set;
+  }
+
+}
--- a/src/java/org/apache/lucene/analysis/WhitespaceAnalyzer.java
+++ b/src/java/org/apache/lucene/analysis/WhitespaceAnalyzer.java
@ -18,24 +18,14 @@ package org.apache.lucene.analysis;
 */

 import java.io.Reader;
-import java.io.IOException;

 /** An Analyzer that uses {@link WhitespaceTokenizer}. */

-public final class WhitespaceAnalyzer extends Analyzer {
-  @Override
-  public TokenStream tokenStream(String fieldName, Reader reader) {
-    return new WhitespaceTokenizer(reader);
-  }
+public final class WhitespaceAnalyzer extends ReusableAnalyzerBase {

  @Override
-  public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
-    Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
-    if (tokenizer == null) {
-      tokenizer = new WhitespaceTokenizer(reader);
-      setPreviousTokenStream(tokenizer);
-    } else
-      tokenizer.reset(reader);
-    return tokenizer;
+  protected TokenStreamComponents createComponents(final String fieldName,
+      final Reader reader) {
+    return new TokenStreamComponents(new WhitespaceTokenizer(reader));
  }
 }
--- a/src/java/org/apache/lucene/analysis/WordlistLoader.java
+++ b/src/java/org/apache/lucene/analysis/WordlistLoader.java
@ -21,15 +21,69 @@ import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
+import java.io.InputStreamReader;
 import java.io.Reader;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Set;

 /**
 * Loader for text files that represent a list of stopwords.
 */
 public class WordlistLoader {
-
+ 
+  /**
+   * Loads a text file associated with a given class (See
+   * {@link Class#getResourceAsStream(String)}) and adds every line as an entry
+   * to a {@link Set} (omitting leading and trailing whitespace). Every line of
+   * the file should contain only one word. The words need to be in lower-case if
+   * you make use of an Analyzer which uses LowerCaseFilter (like
+   * StandardAnalyzer).
+   * 
+   * @param aClass
+   *          a class that is associated with the given stopwordResource
+   * @param stopwordResource
+   *          name of the resource file associated with the given class
+   * @return a {@link Set} with the file's words
+   */
+  public static Set<String> getWordSet(Class<?> aClass, String stopwordResource)
+      throws IOException {
+    final Reader reader = new BufferedReader(new InputStreamReader(aClass
+        .getResourceAsStream(stopwordResource), "UTF-8"));
+    try {
+      return getWordSet(reader);
+    } finally {
+      reader.close();
+    }
+  }
+  
+  /**
+   * Loads a text file associated with a given class (See
+   * {@link Class#getResourceAsStream(String)}) and adds every line as an entry
+   * to a {@link Set} (omitting leading and trailing whitespace). Every line of
+   * the file should contain only one word. The words need to be in lower-case if
+   * you make use of an Analyzer which uses LowerCaseFilter (like
+   * StandardAnalyzer).
+   * 
+   * @param aClass
+   *          a class that is associated with the given stopwordResource
+   * @param stopwordResource
+   *          name of the resource file associated with the given class
+   * @param comment
+   *          the comment string to ignore
+   * @return a {@link Set} with the file's words
+   */
+  public static Set<String> getWordSet(Class<?> aClass,
+      String stopwordResource, String comment) throws IOException {
+    final Reader reader = new BufferedReader(new InputStreamReader(aClass
+        .getResourceAsStream(stopwordResource), "UTF-8"));
+    try {
+      return getWordSet(reader, comment);
+    } finally {
+      reader.close();
+    }
+  }
+  
  /**
   * Loads a text file and adds every line as an entry to a HashSet (omitting
   * leading and trailing whitespace). Every line of the file should contain only
@ -40,17 +94,15 @@ public class WordlistLoader {
   * @return A HashSet with the file's words
   */
  public static HashSet<String> getWordSet(File wordfile) throws IOException {
-    HashSet<String> result = new HashSet<String>();
    FileReader reader = null;
    try {
      reader = new FileReader(wordfile);
-      result = getWordSet(reader);
+      return getWordSet(reader);
    }
    finally {
      if (reader != null)
        reader.close();
    }
-    return result;
  }

  /**
@ -64,17 +116,15 @@ public class WordlistLoader {
   * @return A HashSet with the file's words
   */
  public static HashSet<String> getWordSet(File wordfile, String comment) throws IOException {
-    HashSet<String> result = new HashSet<String>();
    FileReader reader = null;
    try {
      reader = new FileReader(wordfile);
-      result = getWordSet(reader, comment);
+      return getWordSet(reader, comment);
    }
    finally {
      if (reader != null)
        reader.close();
    }
-    return result;
  }


@ -88,7 +138,7 @@ public class WordlistLoader {
   * @return A HashSet with the reader's words
   */
  public static HashSet<String> getWordSet(Reader reader) throws IOException {
-    HashSet<String> result = new HashSet<String>();
+    final HashSet<String> result = new HashSet<String>();
    BufferedReader br = null;
    try {
      if (reader instanceof BufferedReader) {
@ -119,7 +169,7 @@ public class WordlistLoader {
   * @return A HashSet with the reader's words
   */
  public static HashSet<String> getWordSet(Reader reader, String comment) throws IOException {
-    HashSet<String> result = new HashSet<String>();
+    final HashSet<String> result = new HashSet<String>();
    BufferedReader br = null;
    try {
      if (reader instanceof BufferedReader) {
@ -154,21 +204,18 @@ public class WordlistLoader {
  public static HashMap<String, String> getStemDict(File wordstemfile) throws IOException {
    if (wordstemfile == null)
      throw new NullPointerException("wordstemfile may not be null");
-    HashMap<String, String> result = new HashMap<String, String>();
+    final HashMap<String, String> result = new HashMap<String,String>();
    BufferedReader br = null;
-    FileReader fr = null;
+    
    try {
-      fr = new FileReader(wordstemfile);
-      br = new BufferedReader(fr);
+      br = new BufferedReader(new FileReader(wordstemfile));
      String line;
      while ((line = br.readLine()) != null) {
        String[] wordstem = line.split("\t", 2);
        result.put(wordstem[0], wordstem[1]);
      }
    } finally {
-      if (fr != null)
-        fr.close();
-      if (br != null)
+      if(br != null)
        br.close();
    }
    return result;
--- a/src/test/org/apache/lucene/index/wordliststopwords.txt
+++ b/src/test/org/apache/lucene/index/wordliststopwords.txt
@ -0,0 +1,5 @@
+#comment
+ONE
+two
+#comment
+three
--- a/src/test/org/apache/lucene/index/wordliststopwords_nocomment.txt
+++ b/src/test/org/apache/lucene/index/wordliststopwords_nocomment.txt
@ -0,0 +1,3 @@
+ONE
+two
+three