LUCENE-2051: Contrib Analyzer Setters should be deprecated and replace with ctor arguments, thanks to Simon Willnauer

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@880715 13f79535-47bb-0310-9956-ffa450edef68
2025-02-18 07:55:29 +00:00 · 2009-11-16 11:48:37 +00:00 · 2009-11-16 11:48:37 +00:00 · 00f07ee460
commit 00f07ee460
parent 7370094ead
13 changed files with 478 additions and 128 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -154,6 +154,10 @@ New features
 * LUCENE-2041: Parallelize the rest of ParallelMultiSearcher. Lots of
  code refactoring and Java 5 concurrent support in MultiSearcher.
  (Joey Surls, Simon Willnauer via Uwe Schindler)
+  
+* LUCENE-2051: Add CharArraySet.copy() as a simple method to copy
+  any Set<?> to a CharArraySet that is optimized, if Set<?> is already
+  an CharArraySet.

 Optimizations

--- a/contrib/CHANGES.txt
+++ b/contrib/CHANGES.txt
@ -20,6 +20,10 @@ API Changes
   text exactly the same as LowerCaseFilter. Please use LowerCaseFilter
   instead, which has the same functionality.  (Robert Muir)
   
+ * LUCENE-2051: Contrib Analyzer setters were deprecated and replaced
+   with ctor arguments / Version number.  Also stop word lists
+   were unified.  (Simon Willnauer)
+
 Bug fixes

 * LUCENE-1781: Fixed various issues with the lat/lng bounding box
@ -59,6 +63,7 @@ Optimizations
   Previous versions were loading the stopword files each time a new
   instance was created. This might improve performance for applications
   creating lots of instances of these Analyzers. (Simon Willnauer) 
+
 Documentation

 * LUCENE-1916: Translated documentation in the smartcn hhmm package.
@ -72,7 +77,6 @@ Build
 * LUCENE-2031: Moved PatternAnalyzer from contrib/memory into
   contrib/analyzers/common, under miscellaneous.  (Robert Muir)
   
-Test Cases
 ======================= Release 2.9.1 2009-11-06 =======================

 Changes in backwards compatibility policy
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
@ -23,11 +23,11 @@ import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.util.Collections;
-import java.util.HashSet;
 import java.util.Hashtable;
 import java.util.Set;

 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
@ -67,7 +67,8 @@ public final class ArabicAnalyzer extends Analyzer {
   */
  private final Set<?> stoptable;
  /**
-   * The comment character in the stopwords file.  All lines prefixed with this will be ignored  
+   * The comment character in the stopwords file.  All lines prefixed with this will be ignored
+   * @deprecated use {@link WordlistLoader#getWordSet(File, String)} directly  
   */
  public static final String STOPWORDS_COMMENT = "#";
  
@ -116,32 +117,44 @@ public final class ArabicAnalyzer extends Analyzer {
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
  public ArabicAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words
+   * 
+   * @param matchVersion
+   *          lucene compatibility version
+   * @param stopwords
+   *          a stopword set
+   */
+  public ArabicAnalyzer(Version matchVersion, Set<?> stopwords){
+    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
    this.matchVersion = matchVersion;
-    stoptable = DefaultSetHolder.DEFAULT_STOP_SET;
  }

  /**
   * Builds an analyzer with the given stop words.
+   * @deprecated use {@link #ArabicAnalyzer(Version, Set)} instead
   */
  public ArabicAnalyzer( Version matchVersion, String... stopwords ) {
-    stoptable = StopFilter.makeStopSet( stopwords );
-    this.matchVersion = matchVersion;
+    this(matchVersion, StopFilter.makeStopSet( stopwords ));
  }

  /**
   * Builds an analyzer with the given stop words.
+   * @deprecated use {@link #ArabicAnalyzer(Version, Set)} instead
   */
  public ArabicAnalyzer( Version matchVersion, Hashtable<?,?> stopwords ) {
-    stoptable = new HashSet(stopwords.keySet());
-    this.matchVersion = matchVersion;
+    this(matchVersion, stopwords.keySet());
  }

  /**
   * Builds an analyzer with the given stop words.  Lines can be commented out using {@link #STOPWORDS_COMMENT}
+   * @deprecated use {@link #ArabicAnalyzer(Version, Set)} instead
   */
  public ArabicAnalyzer( Version matchVersion, File stopwords ) throws IOException {
-    stoptable = WordlistLoader.getWordSet( stopwords, STOPWORDS_COMMENT);
-    this.matchVersion = matchVersion;
+    this(matchVersion, WordlistLoader.getWordSet( stopwords, STOPWORDS_COMMENT));
  }


--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
@ -20,12 +20,14 @@ package org.apache.lucene.analysis.br;
 import java.io.File;
 import java.io.IOException;
 import java.io.Reader;
+import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 import java.util.Collections;

 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
@ -51,7 +53,9 @@ public final class BrazilianAnalyzer extends Analyzer {

 	/**
 	 * List of typical Brazilian Portuguese stopwords.
+	 * @deprecated use {@link #getDefaultStopSet()} instead
 	 */
+  // TODO make this private in 3.1
 	public final static String[] BRAZILIAN_STOP_WORDS = {
      "a","ainda","alem","ambas","ambos","antes",
      "ao","aonde","aos","apos","aquele","aqueles",
@ -73,52 +77,98 @@ public final class BrazilianAnalyzer extends Analyzer {
      "suas","tal","tambem","teu","teus","toda","todas","todo",
      "todos","tua","tuas","tudo","um","uma","umas","uns"};

+	/**
+   * Returns an unmodifiable instance of the default stop-words set.
+   * @return an unmodifiable instance of the default stop-words set.
+   */
+  public static Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_STOP_SET;
+  }
+  
+  private static class DefaultSetHolder {
+    static final Set<?> DEFAULT_STOP_SET = CharArraySet
+        .unmodifiableSet(new CharArraySet(Arrays.asList(BRAZILIAN_STOP_WORDS),
+            false));
+  }

 	/**
 	 * Contains the stopwords used with the {@link StopFilter}.
 	 */
-	private Set stoptable = Collections.emptySet();
+	private final Set<?> stoptable;
 	
 	/**
 	 * Contains words that should be indexed but not stemmed.
 	 */
-	private Set excltable = Collections.emptySet();
-        private final Version matchVersion;
+	// TODO make this private in 3.1
+	private Set<?> excltable = Collections.emptySet();
+	
+  private final Version matchVersion;

 	/**
 	 * Builds an analyzer with the default stop words ({@link #BRAZILIAN_STOP_WORDS}).
 	 */
 	public BrazilianAnalyzer(Version matchVersion) {
-          stoptable = StopFilter.makeStopSet( BRAZILIAN_STOP_WORDS );
-          this.matchVersion = matchVersion;
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
 	}
+	
+	/**
+   * Builds an analyzer with the given stop words
+   * 
+   * @param matchVersion
+   *          lucene compatibility version
+   * @param stopwords
+   *          a stopword set
+   */
+  public BrazilianAnalyzer(Version matchVersion, Set<?> stopwords) {
+    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
+    this.matchVersion = matchVersion;
+  }
+
+  /**
+   * Builds an analyzer with the given stop words and stemming exclusion words
+   * 
+   * @param matchVersion
+   *          lucene compatibility version
+   * @param stopwords
+   *          a stopword set
+   * @param stemExclutionSet
+   *          a stemming exclusion set
+   */
+  public BrazilianAnalyzer(Version matchVersion, Set<?> stopset,
+      Set<?> stemExclusionSet) {
+    this(matchVersion, stopset);
+    excltable = CharArraySet.unmodifiableSet(CharArraySet
+        .copy(stemExclusionSet));
+  }

 	/**
 	 * Builds an analyzer with the given stop words.
+	 * @deprecated use {@link #BrazilianAnalyzer(Version, Set)} instead
 	 */
-        public BrazilianAnalyzer( Version matchVersion, String... stopwords ) {
-          stoptable = StopFilter.makeStopSet( stopwords );
-          this.matchVersion = matchVersion;
-	}
+  public BrazilianAnalyzer(Version matchVersion, String... stopwords) {
+    this(matchVersion, StopFilter.makeStopSet(stopwords));
+  }

-	/**
-	 * Builds an analyzer with the given stop words.
-	 */
-        public BrazilianAnalyzer( Version matchVersion, Map stopwords ) {
-          stoptable = new HashSet(stopwords.keySet());
-          this.matchVersion = matchVersion;
-	}
+  /**
+   * Builds an analyzer with the given stop words. 
+   * @deprecated use {@link #BrazilianAnalyzer(Version, Set)} instead
+   */
+  public BrazilianAnalyzer(Version matchVersion, Map<?,?> stopwords) {
+    this(matchVersion, stopwords.keySet());
+  }

-	/**
-	 * Builds an analyzer with the given stop words.
-	 */
-        public BrazilianAnalyzer( Version matchVersion, File stopwords ) throws IOException {
-          stoptable = WordlistLoader.getWordSet( stopwords );
-          this.matchVersion = matchVersion;
-	}
+  /**
+   * Builds an analyzer with the given stop words.
+   * @deprecated use {@link #BrazilianAnalyzer(Version, Set)} instead
+   */
+  public BrazilianAnalyzer(Version matchVersion, File stopwords)
+      throws IOException {
+    this(matchVersion, WordlistLoader.getWordSet(stopwords));
+  }

 	/**
 	 * Builds an exclusionlist from an array of Strings.
+	 * @deprecated use {@link #BrazilianAnalyzer(Version, Set, Set)} instead
 	 */
 	public void setStemExclusionTable( String... exclusionlist ) {
 		excltable = StopFilter.makeStopSet( exclusionlist );
@ -126,13 +176,15 @@ public final class BrazilianAnalyzer extends Analyzer {
 	}
 	/**
 	 * Builds an exclusionlist from a {@link Map}.
+	 * @deprecated use {@link #BrazilianAnalyzer(Version, Set, Set)} instead
 	 */
-	public void setStemExclusionTable( Map exclusionlist ) {
-		excltable = new HashSet(exclusionlist.keySet());
+	public void setStemExclusionTable( Map<?,?> exclusionlist ) {
+		excltable = new HashSet<Object>(exclusionlist.keySet());
 		setPreviousTokenStream(null); // force a new stemmer to be created
 	}
 	/**
 	 * Builds an exclusionlist from the words contained in the given file.
+	 * @deprecated use {@link #BrazilianAnalyzer(Version, Set, Set)} instead
 	 */
 	public void setStemExclusionTable( File exclusionlist ) throws IOException {
 		excltable = WordlistLoader.getWordSet( exclusionlist );
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.cjk;
 */

 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
@ -25,6 +26,7 @@ import org.apache.lucene.util.Version;

 import java.io.IOException;
 import java.io.Reader;
+import java.util.Arrays;
 import java.util.Set;


@ -39,7 +41,10 @@ public class CJKAnalyzer extends Analyzer {
  /**
   * An array containing some common English words that are not usually
   * useful for searching and some double-byte interpunctions.
+   * @deprecated use {@link #getDefaultStopSet()} instead
   */
+  // TODO make this final in 3.1 -
+  // this might be revised and merged with StopFilter stop words too
  public final static String[] STOP_WORDS = {
    "a", "and", "are", "as", "at", "be",
    "but", "by", "for", "if", "in",
@ -53,10 +58,23 @@ public class CJKAnalyzer extends Analyzer {

  //~ Instance fields --------------------------------------------------------

+  /**
+   * Returns an unmodifiable instance of the default stop-words set.
+   * @return an unmodifiable instance of the default stop-words set.
+   */
+  public static Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_STOP_SET;
+  }
+  
+  private static class DefaultSetHolder {
+    static final Set<?> DEFAULT_STOP_SET = CharArraySet
+        .unmodifiableSet(new CharArraySet(Arrays.asList(STOP_WORDS),
+            false));
+  }
  /**
   * stop word list
   */
-  private final Set stopTable;
+  private final Set<?> stopTable;
  private final Version matchVersion;

  //~ Constructors -----------------------------------------------------------
@ -65,7 +83,19 @@ public class CJKAnalyzer extends Analyzer {
   * Builds an analyzer which removes words in {@link #STOP_WORDS}.
   */
  public CJKAnalyzer(Version matchVersion) {
-    stopTable = StopFilter.makeStopSet(STOP_WORDS);
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words
+   * 
+   * @param matchVersion
+   *          lucene compatibility version
+   * @param stopwords
+   *          a stopword set
+   */
+  public CJKAnalyzer(Version matchVersion, Set<?> stopwords){
+    stopTable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
    this.matchVersion = matchVersion;
  }

@ -73,6 +103,7 @@ public class CJKAnalyzer extends Analyzer {
   * Builds an analyzer which removes words in the provided array.
   *
   * @param stopWords stop word array
+   * @deprecated use {@link #CJKAnalyzer(Version, Set)} instead
   */
  public CJKAnalyzer(Version matchVersion, String... stopWords) {
    stopTable = StopFilter.makeStopSet(stopWords);
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.cz;
 */

 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
@ -29,6 +30,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
 import org.apache.lucene.util.Version;

 import java.io.*;
+import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
 import java.util.Collections;
@ -48,7 +50,9 @@ public final class CzechAnalyzer extends Analyzer {

 	/**
 	 * List of typical stopwords.
+	 * @deprecated use {@link #getDefaultStopSet()} instead
 	 */
+  // TODO make this private in 3.1
 	public final static String[] CZECH_STOP_WORDS = {
        "a","s","k","o","i","u","v","z","dnes","cz","t\u00edmto","bude\u0161","budem",
        "byli","jse\u0161","m\u016fj","sv\u00fdm","ta","tomto","tohle","tuto","tyto",
@ -69,51 +73,84 @@ public final class CzechAnalyzer extends Analyzer {
        "j\u00ed","ji","m\u011b","mne","jemu","tomu","t\u011bm","t\u011bmu","n\u011bmu","n\u011bmu\u017e",
        "jeho\u017e","j\u00ed\u017e","jeliko\u017e","je\u017e","jako\u017e","na\u010de\u017e",
    };
+	
+	/**
+	 * Returns a set of default Czech-stopwords 
+	 * @return a set of default Czech-stopwords 
+	 */
+	public static final Set<?> getDefaultStopSet(){
+	  return DefaultSetHolder.DEFAULT_SET;
+	}
+	
+	private static class DefaultSetHolder {
+	  private static final Set<?> DEFAULT_SET = CharArraySet.unmodifiableSet(new CharArraySet(
+	      Arrays.asList(CZECH_STOP_WORDS), false));
+	}

 	/**
 	 * Contains the stopwords used with the {@link StopFilter}.
 	 */
-	private Set stoptable;
-        private final Version matchVersion;
+	// TODO make this final in 3.1
+	private Set<?> stoptable;
+  private final Version matchVersion;

 	/**
 	 * Builds an analyzer with the default stop words ({@link #CZECH_STOP_WORDS}).
 	 */
 	public CzechAnalyzer(Version matchVersion) {
-          stoptable = StopFilter.makeStopSet( CZECH_STOP_WORDS );
-          this.matchVersion = matchVersion;
+    this(matchVersion, DefaultSetHolder.DEFAULT_SET);
+	}
+	
+	/**
+   * Builds an analyzer with the given stop words and stemming exclusion words
+   * 
+   * @param matchversion
+   *          lucene compatibility version
+   * @param stopwords
+   *          a stopword set
+   */
+  public CzechAnalyzer(Version matchVersion, Set<?> stopwords) {
+    this.matchVersion = matchVersion;
+    this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
+  }
+
+
+	/**
+	 * Builds an analyzer with the given stop words.
+	 * @deprecated use {@link #CzechAnalyzer(Version, Set)} instead
+	 */
+  public CzechAnalyzer(Version matchVersion, String... stopwords) {
+    this(matchVersion, StopFilter.makeStopSet( stopwords ));
+	}
+
+  /**
+   * Builds an analyzer with the given stop words.
+   * 
+   * @deprecated use {@link #CzechAnalyzer(Version, Set)} instead
+   */
+  public CzechAnalyzer(Version matchVersion, HashSet<?> stopwords) {
+    this(matchVersion, (Set<?>)stopwords);
 	}

 	/**
 	 * Builds an analyzer with the given stop words.
+	 * @deprecated use {@link #CzechAnalyzer(Version, Set)} instead
 	 */
-        public CzechAnalyzer(Version matchVersion, String... stopwords) {
-          stoptable = StopFilter.makeStopSet( stopwords );
-          this.matchVersion = matchVersion;
-	}
-
-        public CzechAnalyzer(Version matchVersion, HashSet stopwords) {
-          stoptable = stopwords;
-          this.matchVersion = matchVersion;
-	}
-
-	/**
-	 * Builds an analyzer with the given stop words.
-	 */
-        public CzechAnalyzer(Version matchVersion, File stopwords ) throws IOException {
-          stoptable = WordlistLoader.getWordSet( stopwords );
-          this.matchVersion = matchVersion;
+  public CzechAnalyzer(Version matchVersion, File stopwords ) throws IOException {
+    this(matchVersion, (Set<?>)WordlistLoader.getWordSet( stopwords ));
 	}

    /**
     * Loads stopwords hash from resource stream (file, database...).
     * @param   wordfile    File containing the wordlist
     * @param   encoding    Encoding used (win-1250, iso-8859-2, ...), null for default system encoding
+     * @deprecated use {@link WordlistLoader#getWordSet(Reader, String) }
+     *             and {@link #CzechAnalyzer(Version, Set)} instead
     */
    public void loadStopWords( InputStream wordfile, String encoding ) {
        setPreviousTokenStream(null); // force a new stopfilter to be created
        if ( wordfile == null ) {
-            stoptable = new HashSet();
+            stoptable = Collections.emptySet();
            return;
        }
        try {
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
@ -21,11 +21,13 @@ package org.apache.lucene.analysis.de;
 import java.io.File;
 import java.io.IOException;
 import java.io.Reader;
+import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;

 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
@ -53,7 +55,9 @@ public class GermanAnalyzer extends Analyzer {
  
  /**
   * List of typical german stopwords.
+   * @deprecated use {@link #getDefaultStopSet()} instead
   */
+  //TODO make this private in 3.1
  public final static String[] GERMAN_STOP_WORDS = {
    "einer", "eine", "eines", "einem", "einen",
    "der", "die", "das", "dass", "daß",
@ -68,58 +72,99 @@ public class GermanAnalyzer extends Analyzer {
    "mein", "sein", "kein",
    "durch", "wegen", "wird"
  };
+  
+  /**
+   * Returns a set of default German-stopwords 
+   * @return a set of default German-stopwords 
+   */
+  public static final Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_SET;
+  }
+  
+  private static class DefaultSetHolder {
+    private static final Set<?> DEFAULT_SET = CharArraySet.unmodifiableSet(new CharArraySet(
+        Arrays.asList(GERMAN_STOP_WORDS), false));
+  }

  /**
   * Contains the stopwords used with the {@link StopFilter}.
   */
-  private Set stopSet = new HashSet();
+  //TODO make this final in 3.1
+  private Set<?> stopSet;

  /**
   * Contains words that should be indexed but not stemmed.
   */
-  private Set exclusionSet = new HashSet();
+  // TODO make this final in 3.1
+  private Set<?> exclusionSet;

  private final Version matchVersion;

  /**
   * Builds an analyzer with the default stop words:
-   * {@link #GERMAN_STOP_WORDS}.
+   * {@link #getDefaultStopSet()}.
   */
  public GermanAnalyzer(Version matchVersion) {
-    stopSet = StopFilter.makeStopSet(GERMAN_STOP_WORDS);
+    this(matchVersion, DefaultSetHolder.DEFAULT_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words 
+   * 
+   * @param matchversion
+   *          lucene compatibility version
+   * @param stopwords
+   *          a stopword set
+   */
+  public GermanAnalyzer(Version matchVersion, Set<?> stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words
+   * 
+   * @param matchversion
+   *          lucene compatibility version
+   * @param stopwords
+   *          a stopword set
+   * @param stemExclutionSet
+   *          a stemming exclusion set
+   */
+  public GermanAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
+    stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
+    exclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
    setOverridesTokenStreamMethod(GermanAnalyzer.class);
    this.matchVersion = matchVersion;
  }

  /**
   * Builds an analyzer with the given stop words.
+   * @deprecated use {@link #GermanAnalyzer(Version, Set)}
   */
  public GermanAnalyzer(Version matchVersion, String... stopwords) {
-    stopSet = StopFilter.makeStopSet(stopwords);
-    setOverridesTokenStreamMethod(GermanAnalyzer.class);
-    this.matchVersion = matchVersion;
+    this(matchVersion, StopFilter.makeStopSet(stopwords));
  }

  /**
   * Builds an analyzer with the given stop words.
+   * @deprecated use {@link #GermanAnalyzer(Version, Set)}
   */
-  public GermanAnalyzer(Version matchVersion, Map stopwords) {
-    stopSet = new HashSet(stopwords.keySet());
-    setOverridesTokenStreamMethod(GermanAnalyzer.class);
-    this.matchVersion = matchVersion;
+  public GermanAnalyzer(Version matchVersion, Map<?,?> stopwords) {
+    this(matchVersion, stopwords.keySet());
+    
  }

  /**
   * Builds an analyzer with the given stop words.
+   * @deprecated use {@link #GermanAnalyzer(Version, Set)}
   */
  public GermanAnalyzer(Version matchVersion, File stopwords) throws IOException {
-    stopSet = WordlistLoader.getWordSet(stopwords);
-    setOverridesTokenStreamMethod(GermanAnalyzer.class);
-    this.matchVersion = matchVersion;
+    this(matchVersion, WordlistLoader.getWordSet(stopwords));
  }

  /**
   * Builds an exclusionlist from an array of Strings.
+   * @deprecated use {@link #GermanAnalyzer(Version, Set, Set)} instead
   */
  public void setStemExclusionTable(String[] exclusionlist) {
    exclusionSet = StopFilter.makeStopSet(exclusionlist);
@ -128,6 +173,7 @@ public class GermanAnalyzer extends Analyzer {

  /**
   * Builds an exclusionlist from a {@link Map}
+   * @deprecated use {@link #GermanAnalyzer(Version, Set, Set)} instead
   */
  public void setStemExclusionTable(Map exclusionlist) {
    exclusionSet = new HashSet(exclusionlist.keySet());
@ -136,6 +182,7 @@ public class GermanAnalyzer extends Analyzer {

  /**
   * Builds an exclusionlist from the words contained in the given file.
+   * @deprecated use {@link #GermanAnalyzer(Version, Set, Set)} instead
   */
  public void setStemExclusionTable(File exclusionlist) throws IOException {
    exclusionSet = WordlistLoader.getWordSet(exclusionlist);
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.el;


 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
@ -27,7 +28,7 @@ import org.apache.lucene.util.Version;

 import java.io.IOException;
 import java.io.Reader;
-import java.util.HashSet;
+import java.util.Arrays;
 import java.util.Map;
 import java.util.Set;

@ -58,39 +59,61 @@ public final class GreekAnalyzer extends Analyzer
      "εκεινοι", "εκεινεσ", "εκεινα", "εκεινων", "εκεινουσ", "οπωσ", "ομωσ",
      "ισωσ", "οσο", "οτι"
    };
+    
+    /**
+     * Returns a set of default Greek-stopwords 
+     * @return a set of default Greek-stopwords 
+     */
+    public static final Set<?> getDefaultStopSet(){
+      return DefaultSetHolder.DEFAULT_SET;
+    }
+    
+    private static class DefaultSetHolder {
+      private static final Set<?> DEFAULT_SET = CharArraySet.unmodifiableSet(new CharArraySet(
+          Arrays.asList(GREEK_STOP_WORDS), false));
+    }

    /**
     * Contains the stopwords used with the {@link StopFilter}.
     */
-    private Set stopSet = new HashSet();
+    private final Set<?> stopSet;

    private final Version matchVersion;

    public GreekAnalyzer(Version matchVersion) {
-      super();
-      stopSet = StopFilter.makeStopSet(GREEK_STOP_WORDS);
+      this(matchVersion, DefaultSetHolder.DEFAULT_SET);
+    }
+    
+    /**
+     * Builds an analyzer with the given stop words 
+     * 
+     * @param matchversion
+     *          lucene compatibility version
+     * @param stopwords
+     *          a stopword set
+     */
+    public GreekAnalyzer(Version matchVersion, Set<?> stopwords) {
+      stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
      this.matchVersion = matchVersion;
    }

    /**
     * Builds an analyzer with the given stop words.
     * @param stopwords Array of stopwords to use.
+     * @deprecated use {@link #GreekAnalyzer(Version, Set)} instead
     */
    public GreekAnalyzer(Version matchVersion, String... stopwords)
    {
-      super();
-      stopSet = StopFilter.makeStopSet(stopwords);
-      this.matchVersion = matchVersion;
+      this(matchVersion, StopFilter.makeStopSet(stopwords));
    }

    /**
     * Builds an analyzer with the given stop words.
+     * @deprecated use {@link #GreekAnalyzer(Version, Set)} instead
     */
-    public GreekAnalyzer(Version matchVersion, Map stopwords)
+    public GreekAnalyzer(Version matchVersion, Map<?,?> stopwords)
    {
-      super();
-      stopSet = new HashSet(stopwords.keySet());
-      this.matchVersion = matchVersion;
+      this(matchVersion, stopwords.keySet());
    }

    /**
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
@ -23,11 +23,11 @@ import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.util.Collections;
-import java.util.HashSet;
 import java.util.Hashtable;
 import java.util.Set;

 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
@ -60,7 +60,7 @@ public final class PersianAnalyzer extends Analyzer {
  /**
   * Contains the stopwords used with the StopFilter.
   */
-  private final Set stoptable;
+  private final Set<?> stoptable;

  /**
   * The comment character in the stopwords file. All lines prefixed with this
@ -72,7 +72,7 @@ public final class PersianAnalyzer extends Analyzer {
   * Returns an unmodifiable instance of the default stop-words set.
   * @return an unmodifiable instance of the default stop-words set.
   */
-  public static Set<String> getDefaultStopSet(){
+  public static Set<?> getDefaultStopSet(){
    return DefaultSetHolder.DEFAULT_STOP_SET;
  }
  
@ -81,7 +81,7 @@ public final class PersianAnalyzer extends Analyzer {
   * accesses the static final set the first time.;
   */
  private static class DefaultSetHolder {
-    static final Set<String> DEFAULT_STOP_SET;
+    static final Set<?> DEFAULT_STOP_SET;

    static {
      try {
@ -114,33 +114,45 @@ public final class PersianAnalyzer extends Analyzer {
   * {@link #DEFAULT_STOPWORD_FILE}.
   */
  public PersianAnalyzer(Version matchVersion) {
-    stoptable = DefaultSetHolder.DEFAULT_STOP_SET;
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words 
+   * 
+   * @param matchversion
+   *          lucene compatibility version
+   * @param stopwords
+   *          a stopword set
+   */
+  public PersianAnalyzer(Version matchVersion, Set<?> stopwords){
+    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
    this.matchVersion = matchVersion;
  }

  /**
   * Builds an analyzer with the given stop words.
+   * @deprecated use {@link #PersianAnalyzer(Version, Set)} instead
   */
  public PersianAnalyzer(Version matchVersion, String... stopwords) {
-    stoptable = StopFilter.makeStopSet(stopwords);
-    this.matchVersion = matchVersion;
+    this(matchVersion, StopFilter.makeStopSet(stopwords));
  }

  /**
   * Builds an analyzer with the given stop words.
+   * @deprecated use {@link #PersianAnalyzer(Version, Set)} instead
   */
-  public PersianAnalyzer(Version matchVersion, Hashtable stopwords) {
-    stoptable = new HashSet(stopwords.keySet());
-    this.matchVersion = matchVersion;
+  public PersianAnalyzer(Version matchVersion, Hashtable<?, ?> stopwords) {
+    this(matchVersion, stopwords.keySet());
  }

  /**
   * Builds an analyzer with the given stop words. Lines can be commented out
   * using {@link #STOPWORDS_COMMENT}
+   * @deprecated use {@link #PersianAnalyzer(Version, Set)} instead
   */
  public PersianAnalyzer(Version matchVersion, File stopwords) throws IOException {
-    stoptable = WordlistLoader.getWordSet(stopwords, STOPWORDS_COMMENT);
-    this.matchVersion = matchVersion;
+    this(matchVersion, WordlistLoader.getWordSet(stopwords, STOPWORDS_COMMENT));
  }

  /**
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.fr;
 */

 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
@ -31,6 +32,7 @@ import org.apache.lucene.util.Version;
 import java.io.File;
 import java.io.IOException;
 import java.io.Reader;
+import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
@ -60,7 +62,9 @@ public final class FrenchAnalyzer extends Analyzer {

  /**
   * Extended list of typical French stopwords.
+   * @deprecated use {@link #getDefaultStopSet()} instead
   */
+  // TODO make this final in 3.1
  public final static String[] FRENCH_STOP_WORDS = {
    "a", "afin", "ai", "ainsi", "après", "attendu", "au", "aujourd", "auquel", "aussi",
    "autre", "autres", "aux", "auxquelles", "auxquels", "avait", "avant", "avec", "avoir",
@ -89,41 +93,87 @@ public final class FrenchAnalyzer extends Analyzer {
  /**
   * Contains the stopwords used with the {@link StopFilter}.
   */
-  private Set stoptable = new HashSet();
+  private final Set<?> stoptable;
  /**
   * Contains words that should be indexed but not stemmed.
   */
-  private Set excltable = new HashSet();
+  //TODO make this final in 3.0
+  private Set<?> excltable = new HashSet();

  private final Version matchVersion;
+  
+  /**
+   * Returns an unmodifiable instance of the default stop-words set.
+   * @return an unmodifiable instance of the default stop-words set.
+   */
+  public static Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_STOP_SET;
+  }
+  
+  private static class DefaultSetHolder {
+    static final Set<?> DEFAULT_STOP_SET = CharArraySet
+        .unmodifiableSet(new CharArraySet(Arrays.asList(FRENCH_STOP_WORDS),
+            false));
+  }

  /**
   * Builds an analyzer with the default stop words ({@link #FRENCH_STOP_WORDS}).
   */
  public FrenchAnalyzer(Version matchVersion) {
-    stoptable = StopFilter.makeStopSet(FRENCH_STOP_WORDS);
-    this.matchVersion = matchVersion;
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
+  
+  /**
+   * Builds an analyzer with the given stop words
+   * 
+   * @param matchversion
+   *          lucene compatibility version
+   * @param stopwords
+   *          a stopword set
+   */
+  public FrenchAnalyzer(Version matchVersion, Set<?> stopwords){
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words
+   * 
+   * @param matchversion
+   *          lucene compatibility version
+   * @param stopwords
+   *          a stopword set
+   * @param stemExclutionSet
+   *          a stemming exclusion set
+   */
+  public FrenchAnalyzer(Version matchVersion, Set<?> stopwords,
+      Set<?> stemExclutionSet) {
+    this.matchVersion = matchVersion;
+    this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
+    this.excltable = CharArraySet.unmodifiableSet(CharArraySet
+        .copy(stemExclutionSet));
+  }
+ 

  /**
   * Builds an analyzer with the given stop words.
+   * @deprecated use {@link #FrenchAnalyzer(Version, Set)} instead
   */
  public FrenchAnalyzer(Version matchVersion, String... stopwords) {
-    stoptable = StopFilter.makeStopSet(stopwords);
-    this.matchVersion = matchVersion;
+    this(matchVersion, StopFilter.makeStopSet(stopwords));
  }

  /**
   * Builds an analyzer with the given stop words.
   * @throws IOException
+   * @deprecated use {@link #FrenchAnalyzer(Version, Set)} instead
   */
  public FrenchAnalyzer(Version matchVersion, File stopwords) throws IOException {
-    stoptable = new HashSet(WordlistLoader.getWordSet(stopwords));
-    this.matchVersion = matchVersion;
+    this(matchVersion, WordlistLoader.getWordSet(stopwords));
  }

  /**
   * Builds an exclusionlist from an array of Strings.
+   * @deprecated use {@link #FrenchAnalyzer(Version, Set, Set)} instead
   */
  public void setStemExclusionTable(String... exclusionlist) {
    excltable = StopFilter.makeStopSet(exclusionlist);
@ -132,6 +182,7 @@ public final class FrenchAnalyzer extends Analyzer {

  /**
   * Builds an exclusionlist from a Map.
+   * @deprecated use {@link #FrenchAnalyzer(Version, Set, Set)} instead
   */
  public void setStemExclusionTable(Map exclusionlist) {
    excltable = new HashSet(exclusionlist.keySet());
@ -141,6 +192,7 @@ public final class FrenchAnalyzer extends Analyzer {
  /**
   * Builds an exclusionlist from the words contained in the given file.
   * @throws IOException
+   * @deprecated use {@link #FrenchAnalyzer(Version, Set, Set)} instead
   */
  public void setStemExclusionTable(File exclusionlist) throws IOException {
    excltable = new HashSet(WordlistLoader.getWordSet(exclusionlist));
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
@ -18,9 +18,11 @@ package org.apache.lucene.analysis.nl;
 */

 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
@ -29,6 +31,8 @@ import org.apache.lucene.util.Version;
 import java.io.File;
 import java.io.IOException;
 import java.io.Reader;
+import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Set;
@ -51,6 +55,7 @@ import java.util.Map;
 public class DutchAnalyzer extends Analyzer {
  /**
   * List of typical Dutch stopwords.
+   * @deprecated use {@link #getDefaultStopSet()} instead
   */
  public final static String[] DUTCH_STOP_WORDS =
      {
@ -65,19 +70,32 @@ public class DutchAnalyzer extends Analyzer {
        "wezen", "kunnen", "ons", "zelf", "tegen", "na", "reeds", "wil", "kon", "niets",
        "uw", "iemand", "geweest", "andere"
      };
+  /**
+   * Returns an unmodifiable instance of the default stop-words set.
+   * @return an unmodifiable instance of the default stop-words set.
+   */
+  public static Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_STOP_SET;
+  }
+  
+  private static class DefaultSetHolder {
+    static final Set<?> DEFAULT_STOP_SET = CharArraySet
+        .unmodifiableSet(new CharArraySet(Arrays.asList(DUTCH_STOP_WORDS),
+            false));
+  }


  /**
   * Contains the stopwords used with the StopFilter.
   */
-  private Set stoptable = new HashSet();
+  private final Set<?> stoptable;

  /**
   * Contains words that should be indexed but not stemmed.
   */
-  private Set excltable = new HashSet();
+  private Set<?> excltable = Collections.emptySet();

-  private Map stemdict = new HashMap();
+  private Map<String, String> stemdict = new HashMap<String, String>();
  private final Version matchVersion;

  /**
@ -86,13 +104,22 @@ public class DutchAnalyzer extends Analyzer {
   * 
   */
  public DutchAnalyzer(Version matchVersion) {
-    setOverridesTokenStreamMethod(DutchAnalyzer.class);
-    stoptable = StopFilter.makeStopSet(DUTCH_STOP_WORDS);
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
    stemdict.put("fiets", "fiets"); //otherwise fiet
    stemdict.put("bromfiets", "bromfiets"); //otherwise bromfiet
    stemdict.put("ei", "eier");
    stemdict.put("kind", "kinder");
+  }
+  
+  public DutchAnalyzer(Version matchVersion, Set<?> stopwords){
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+  }
+  
+  public DutchAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionTable){
+    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
+    excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionTable));
    this.matchVersion = matchVersion;
+    setOverridesTokenStreamMethod(DutchAnalyzer.class);
  }

  /**
@ -100,30 +127,30 @@ public class DutchAnalyzer extends Analyzer {
   *
   * @param matchVersion
   * @param stopwords
+   * @deprecated use {@link #DutchAnalyzer(Version, Set)} instead
   */
  public DutchAnalyzer(Version matchVersion, String... stopwords) {
-    setOverridesTokenStreamMethod(DutchAnalyzer.class);
-    stoptable = StopFilter.makeStopSet(stopwords);
-    this.matchVersion = matchVersion;
+    this(matchVersion, StopFilter.makeStopSet(stopwords));
  }

  /**
   * Builds an analyzer with the given stop words.
   *
   * @param stopwords
+   * @deprecated use {@link #DutchAnalyzer(Version, Set)} instead
   */
-  public DutchAnalyzer(Version matchVersion, HashSet stopwords) {
-    setOverridesTokenStreamMethod(DutchAnalyzer.class);
-    stoptable = stopwords;
-    this.matchVersion = matchVersion;
+  public DutchAnalyzer(Version matchVersion, HashSet<?> stopwords) {
+    this(matchVersion, (Set<?>)stopwords);
  }

  /**
   * Builds an analyzer with the given stop words.
   *
   * @param stopwords
+   * @deprecated use {@link #DutchAnalyzer(Version, Set)} instead
   */
  public DutchAnalyzer(Version matchVersion, File stopwords) {
+    // this is completely broken!
    setOverridesTokenStreamMethod(DutchAnalyzer.class);
    try {
      stoptable = org.apache.lucene.analysis.WordlistLoader.getWordSet(stopwords);
@ -138,6 +165,7 @@ public class DutchAnalyzer extends Analyzer {
   * Builds an exclusionlist from an array of Strings.
   *
   * @param exclusionlist
+   * @deprecated use {@link #DutchAnalyzer(Version, Set, Set)} instead
   */
  public void setStemExclusionTable(String... exclusionlist) {
    excltable = StopFilter.makeStopSet(exclusionlist);
@ -146,14 +174,16 @@ public class DutchAnalyzer extends Analyzer {

  /**
   * Builds an exclusionlist from a Hashtable.
+   * @deprecated use {@link #DutchAnalyzer(Version, Set, Set)} instead
   */
-  public void setStemExclusionTable(HashSet exclusionlist) {
+  public void setStemExclusionTable(HashSet<?> exclusionlist) {
    excltable = exclusionlist;
    setPreviousTokenStream(null); // force a new stemmer to be created
  }

  /**
   * Builds an exclusionlist from the words contained in the given file.
+   * @deprecated use {@link #DutchAnalyzer(Version, Set, Set)} instead
   */
  public void setStemExclusionTable(File exclusionlist) {
    try {
@ -172,7 +202,7 @@ public class DutchAnalyzer extends Analyzer {
   */
  public void setStemDictionary(File stemdictFile) {
    try {
-      stemdict = org.apache.lucene.analysis.WordlistLoader.getStemDict(stemdictFile);
+      stemdict = WordlistLoader.getStemDict(stemdictFile);
      setPreviousTokenStream(null); // force a new stemmer to be created
    } catch (IOException e) {
      // TODO: throw IOException
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
@ -19,11 +19,12 @@ package org.apache.lucene.analysis.ru;

 import java.io.IOException;
 import java.io.Reader;
-import java.util.HashSet;
+import java.util.Arrays;
 import java.util.Map;
 import java.util.Set;

 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
@ -55,37 +56,53 @@ public final class RussianAnalyzer extends Analyzer
      "тоже", "той", "только", "том", "ты", "у", "уже", "хотя", "чего", "чей", 
      "чем", "что", "чтобы", "чье", "чья", "эта", "эти", "это", "я"
    };
+    
+    private static class DefaultSetHolder {
+      static final Set<?> DEFAULT_STOP_SET = CharArraySet
+          .unmodifiableSet(new CharArraySet(Arrays.asList(RUSSIAN_STOP_WORDS),
+              false));
+    }

    /**
     * Contains the stopwords used with the StopFilter.
     */
-    private Set stopSet = new HashSet();
+    private final Set<?> stopSet;

    private final Version matchVersion;

    public RussianAnalyzer(Version matchVersion) {
-      this(matchVersion, RUSSIAN_STOP_WORDS);
+      this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
    }
  
    /**
     * Builds an analyzer with the given stop words.
+     * @deprecated use {@link #RussianAnalyzer(Version, Set)} instead
     */
-    public RussianAnalyzer(Version matchVersion, String... stopwords)
-    {
-      super();
-      stopSet = StopFilter.makeStopSet(stopwords);
+    public RussianAnalyzer(Version matchVersion, String... stopwords) {
+      this(matchVersion, StopFilter.makeStopSet(stopwords));
+    }
+    
+    /**
+     * Builds an analyzer with the given stop words
+     * 
+     * @param matchversion
+     *          lucene compatibility version
+     * @param stopwords
+     *          a stopword set
+     */
+    public RussianAnalyzer(Version matchVersion, Set<?> stopwords){
+      stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
      this.matchVersion = matchVersion;
    }
   
    /**
     * Builds an analyzer with the given stop words.
     * TODO: create a Set version of this ctor
+     * @deprecated use {@link #RussianAnalyzer(Version, Set)} instead
     */
-    public RussianAnalyzer(Version matchVersion, Map stopwords)
+    public RussianAnalyzer(Version matchVersion, Map<?,?> stopwords)
    {
-      super();
-      stopSet = new HashSet(stopwords.keySet());
-      this.matchVersion = matchVersion;
+      this(matchVersion, stopwords.keySet());
    }

    /**
--- a/src/java/org/apache/lucene/analysis/CharArraySet.java
+++ b/src/java/org/apache/lucene/analysis/CharArraySet.java
@ -4,6 +4,7 @@ import java.util.AbstractSet;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Iterator;
+import java.util.Set;

 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
@ -47,6 +48,7 @@ public class CharArraySet extends AbstractSet<Object> {
  private char[][] entries;
  private int count;
  private final boolean ignoreCase;
+  public static final CharArraySet EMPTY_SET = CharArraySet.unmodifiableSet(new CharArraySet(0, false));

  /** Create set with enough capacity to hold startSize
   *  terms */
@ -263,6 +265,11 @@ public class CharArraySet extends AbstractSet<Object> {
  public static CharArraySet unmodifiableSet(CharArraySet set) {
    if (set == null)
      throw new NullPointerException("Given set is null");
+    if (set == EMPTY_SET)
+      return EMPTY_SET;
+    if (set instanceof UnmodifiableCharArraySet)
+      return set;
+
    /*
     * Instead of delegating calls to the given set copy the low-level values to
     * the unmodifiable Subclass
@ -270,6 +277,27 @@ public class CharArraySet extends AbstractSet<Object> {
    return new UnmodifiableCharArraySet(set.entries, set.ignoreCase, set.count);
  }

+  /**
+   * Returns a copy of the given set as a {@link CharArraySet}. If the given set
+   * is a {@link CharArraySet} the ignoreCase property will be preserved.
+   * 
+   * @param set
+   *          a set to copy
+   * @return a copy of the given set as a {@link CharArraySet}. If the given set
+   *         is a {@link CharArraySet} the ignoreCase property will be
+   *         preserved.
+   */
+  public static CharArraySet copy(Set<?> set) {
+    if (set == null)
+      throw new NullPointerException("Given set is null");
+    if(set == EMPTY_SET)
+      return EMPTY_SET;
+    final boolean ignoreCase = set instanceof CharArraySet ? ((CharArraySet) set).ignoreCase
+        : false;
+    return new CharArraySet(set, ignoreCase);
+  }
+  
+
  /** The Iterator<String> for this set.  Strings are constructed on the fly, so
   * use <code>nextCharArray</code> for more efficient access. */
  public class CharArraySetIterator implements Iterator<String> {