LUCENE-2055: better snowball integration, deprecate buggy handcoded snowball impls, restructure lang support

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@907125 13f79535-47bb-0310-9956-ffa450edef68
2010-02-05 23:05:46 +00:00 · 2010-02-05 23:05:46 +00:00 · a6b7c5552b
parent 57d1387492
commit a6b7c5552b
75 changed files with 3375 additions and 180 deletions
--- a/NOTICE.txt
+++ b/NOTICE.txt
@ -23,6 +23,11 @@ stopword list that is BSD-licensed created by Jacques Savoy.  The file resides i
 contrib/analyzers/common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt.
 See http://members.unine.ch/jacques.savoy/clef/index.html.

+The Romanian analyzer (contrib/analyzers) comes with a default
+stopword list that is BSD-licensed created by Jacques Savoy.  The file resides in
+contrib/analyzers/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt.
+See http://members.unine.ch/jacques.savoy/clef/index.html.
+
 The Bulgarian analyzer (contrib/analyzers) comes with a default
 stopword list that is BSD-licensed created by Jacques Savoy.  The file resides in
 contrib/analyzers/common/src/resources/org/apache/lucene/analysis/bg/stopwords.txt.
--- a/contrib/CHANGES.txt
+++ b/contrib/CHANGES.txt
@ -27,6 +27,10 @@ Changes in runtime behavior
   used with Version > 3.0 and the TurkishStemmer.
   (Robert Muir via Simon Willnauer)  

+ * LUCENE-2055: GermanAnalyzer now uses the Snowball German2 algorithm and 
+   stopwords list by default for Version > 3.0.
+   (Robert Muir, Uwe Schindler, Simon Willnauer)
+
 Bug fixes

 * LUCENE-2199: ShingleFilter skipped over tri-gram shingles if outputUnigram
@ -53,6 +57,13 @@ Bug fixes
 * LUCENE-2207, LUCENE-2219: Fix incorrect offset calculations in end() for 
   CJKTokenizer, ChineseTokenizer, SmartChinese SentenceTokenizer, 
   and WikipediaTokenizer.  (Koji Sekiguchi, Robert Muir)
+
+ * LUCENE-2055: Deprecated RussianTokenizer, RussianStemmer, RussianStemFilter,
+   FrenchStemmer, FrenchStemFilter, DutchStemmer, and DutchStemFilter. For
+   these Analyzers, SnowballFilter is used instead (for Version > 3.0), as
+   the previous code did not always implement the Snowball algorithm correctly.
+   Additionally, for Version > 3.0, the Snowball stopword lists are used by
+   default.  (Robert Muir, Uwe Schindler, Simon Willnauer)
   
 API Changes

@ -68,6 +79,12 @@ API Changes

 * LUCENE-2204: Change some package private classes/members to publicly accessible to implement
   custom FragmentsBuilders. (Koji Sekiguchi)
+
+ * LUCENE-2055: Integrate snowball into contrib/analyzers. SnowballAnalyzer is
+   now deprecated in favor of language-specific analyzers which contain things
+   such as stopword lists and any language-specific processing in addition to
+   stemming. Add Turkish and Romanian stopwords lists to support this.
+   (Robert Muir, Uwe Schindler, Simon Willnauer)
   
 New features

@ -105,6 +122,10 @@ New features

 * LUCENE-2234: Add a Hindi analyzer.  (Robert Muir)

+ * LUCENE-2055: Add analyzers/misc/StemmerOverrideFilter. This filter provides
+   the ability to override any stemmer with a custom dictionary map.
+   (Robert Muir, Uwe Schindler, Simon Willnauer)
+
 Build

 * LUCENE-2124: Moved the JDK-based collation support from contrib/collation 
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
@ -0,0 +1,129 @@
+package org.apache.lucene.analysis.da;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
+import org.apache.lucene.analysis.snowball.SnowballFilter;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.Version;
+import org.tartarus.snowball.ext.DanishStemmer;
+
+/**
+ * {@link Analyzer} for Danish.
+ */
+public final class DanishAnalyzer extends StopwordAnalyzerBase {
+  private final Set<?> stemExclusionSet;
+  
+  /** File containing default Danish stopwords. */
+  public final static String DEFAULT_STOPWORD_FILE = "danish_stop.txt";
+  
+  /**
+   * Returns an unmodifiable instance of the default stop words set.
+   * @return default stop words set.
+   */
+  public static Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_STOP_SET;
+  }
+  
+  /**
+   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+   * accesses the static final set the first time.;
+   */
+  private static class DefaultSetHolder {
+    static final Set<?> DEFAULT_STOP_SET;
+
+    static {
+      try {
+        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
+            DEFAULT_STOPWORD_FILE);
+      } catch (IOException ex) {
+        // default set should always be present as it is part of the
+        // distribution (JAR)
+        throw new RuntimeException("Unable to load default stopword set");
+      }
+    }
+  }
+
+  /**
+   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
+   */
+  public DanishAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   */
+  public DanishAnalyzer(Version matchVersion, Set<?> stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
+   * stemming.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   * @param stemExclusionSet a set of terms not to be stemmed
+   */
+  public DanishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
+  }
+
+  /**
+   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
+   * {@link Reader}.
+   * 
+   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
+   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
+   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
+   *         exclusion set is provided and {@link SnowballFilter}.
+   */
+  @Override
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new StandardFilter(source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
+    if(!stemExclusionSet.isEmpty())
+      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
+    result = new SnowballFilter(result, new DanishStemmer());
+    return new TokenStreamComponents(source, result);
+  }
+}
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/da/package.html
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/da/package.html
@ -0,0 +1,22 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html><head></head>
+<body>
+Analyzer for Danish.
+</body>
+</html>
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
@ -36,10 +36,12 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.util.Version;
+import org.tartarus.snowball.ext.German2Stemmer;

 /**
 * {@link Analyzer} for German language. 
@ -51,6 +53,16 @@ import org.apache.lucene.util.Version;
 * exclusion list is empty by default.
 * </p>
 * 
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating GermanAnalyzer:
+ * <ul>
+ *   <li> As of 3.1, Snowball stemming is done with SnowballFilter, and 
+ *        Snowball stopwords are used by default.
+ *   <li> As of 2.9, StopFilter preserves position
+ *        increments
+ * </ul>
+ * 
 * <p><b>NOTE</b>: This class uses the same {@link Version}
 * dependent settings as {@link StandardAnalyzer}.</p>
 */
@ -60,7 +72,7 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
   * List of typical german stopwords.
   * @deprecated use {@link #getDefaultStopSet()} instead
   */
-  //TODO make this private in 3.1
+  //TODO make this private in 3.1, remove in 4.0
  @Deprecated
  public final static String[] GERMAN_STOP_WORDS = {
    "einer", "eine", "eines", "einem", "einen",
@ -77,6 +89,9 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
    "durch", "wegen", "wird"
  };
  
+  /** File containing default German stopwords. */
+  public final static String DEFAULT_STOPWORD_FILE = "german_stop.txt";
+  
  /**
   * Returns a set of default German-stopwords 
   * @return a set of default German-stopwords 
@ -86,8 +101,21 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
  }
  
  private static class DefaultSetHolder {
-    private static final Set<?> DEFAULT_SET = CharArraySet.unmodifiableSet(new CharArraySet(
+    /** @deprecated remove in Lucene 4.0 */
+    @Deprecated
+    private static final Set<?> DEFAULT_SET_30 = CharArraySet.unmodifiableSet(new CharArraySet(
        Version.LUCENE_CURRENT, Arrays.asList(GERMAN_STOP_WORDS), false));
+    private static final Set<?> DEFAULT_SET;
+    static {
+      try {
+        DEFAULT_SET = 
+          WordlistLoader.getSnowballWordSet(SnowballFilter.class, DEFAULT_STOPWORD_FILE);
+      } catch (IOException ex) {
+        // default set should always be present as it is part of the
+        // distribution (JAR)
+        throw new RuntimeException("Unable to load default stopword set");
+      }
+    }
  }

  /**
@ -105,7 +133,9 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
   * {@link #getDefaultStopSet()}.
   */
  public GermanAnalyzer(Version matchVersion) {
-    this(matchVersion, DefaultSetHolder.DEFAULT_SET);
+    this(matchVersion,
+        matchVersion.onOrAfter(Version.LUCENE_31) ? DefaultSetHolder.DEFAULT_SET
+            : DefaultSetHolder.DEFAULT_SET_30);
  }
  
  /**
@ -199,8 +229,9 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
   * 
   * @return {@link TokenStreamComponents} built from a
   *         {@link StandardTokenizer} filtered with {@link StandardFilter},
-   *         {@link LowerCaseFilter}, {@link StopFilter}, and
-   *         {@link GermanStemFilter}
+   *         {@link LowerCaseFilter}, {@link StopFilter}, 
+   *         {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided, and
+   *         {@link SnowballFilter}
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
@ -210,6 +241,10 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter( matchVersion, result, stopwords);
    result = new KeywordMarkerTokenFilter(result, exclusionSet);
-    return new TokenStreamComponents(source, new GermanStemFilter(result));
+    if (matchVersion.onOrAfter(Version.LUCENE_31))
+      result = new SnowballFilter(result, new German2Stemmer());
+    else
+      result = new GermanStemFilter(result);
+    return new TokenStreamComponents(source, result);
  }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
@ -24,6 +24,7 @@ import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
 import org.apache.lucene.util.Version;
@ -41,6 +42,15 @@ import java.util.Set;
 * A default set of stopwords is used unless an alternative list is specified.
 * </p>
 *
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating GreekAnalyzer:
+ * <ul>
+ *   <li> As of 3.1, StandardFilter is used by default.
+ *   <li> As of 2.9, StopFilter preserves position
+ *        increments
+ * </ul>
+ * 
 * <p><b>NOTE</b>: This class uses the same {@link Version}
 * dependent settings as {@link StandardAnalyzer}.</p>
 */
@ -117,13 +127,15 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase
    * 
    * @return {@link TokenStreamComponents} built from a
    *         {@link StandardTokenizer} filtered with
-    *         {@link GreekLowerCaseFilter} and {@link StopFilter}
+    *         {@link GreekLowerCaseFilter}, {@link StandardFilter} and {@link StopFilter}
    */
    @Override
    protected TokenStreamComponents createComponents(String fieldName,
        Reader reader) {
      final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-      final TokenStream result = new GreekLowerCaseFilter(source);
+      TokenStream result = new GreekLowerCaseFilter(source);
+      if (matchVersion.onOrAfter(Version.LUCENE_31))
+        result = new StandardFilter(result);
      return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords));
    }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
@ -0,0 +1,113 @@
+package org.apache.lucene.analysis.en;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.PorterStemFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.Version;
+
+/**
+ * {@link Analyzer} for English.
+ */
+public final class EnglishAnalyzer extends StopwordAnalyzerBase {
+  private final Set<?> stemExclusionSet;
+   
+  /**
+   * Returns an unmodifiable instance of the default stop words set.
+   * @return default stop words set.
+   */
+  public static Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_STOP_SET;
+  }
+  
+  /**
+   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+   * accesses the static final set the first time.;
+   */
+  private static class DefaultSetHolder {
+    static final Set<?> DEFAULT_STOP_SET = StandardAnalyzer.STOP_WORDS_SET;
+  }
+
+  /**
+   * Builds an analyzer with the default stop words: {@link #getDefaultStopSet}.
+   */
+  public EnglishAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   */
+  public EnglishAnalyzer(Version matchVersion, Set<?> stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
+   * stemming.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   * @param stemExclusionSet a set of terms not to be stemmed
+   */
+  public EnglishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
+  }
+
+  /**
+   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
+   * {@link Reader}.
+   * 
+   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
+   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
+   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
+   *         exclusion set is provided and {@link PorterStemFilter}.
+   */
+  @Override
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new StandardFilter(source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
+    if(!stemExclusionSet.isEmpty())
+      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
+    result = new PorterStemFilter(result);
+    return new TokenStreamComponents(source, result);
+  }
+}
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/package.html
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/package.html
@ -0,0 +1,22 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html><head></head>
+<body>
+Analyzer for English.
+</body>
+</html>
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
@ -0,0 +1,129 @@
+package org.apache.lucene.analysis.es;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
+import org.apache.lucene.analysis.snowball.SnowballFilter;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.Version;
+import org.tartarus.snowball.ext.SpanishStemmer;
+
+/**
+ * {@link Analyzer} for Spanish.
+ */
+public final class SpanishAnalyzer extends StopwordAnalyzerBase {
+  private final Set<?> stemExclusionSet;
+  
+  /** File containing default Spanish stopwords. */
+  public final static String DEFAULT_STOPWORD_FILE = "spanish_stop.txt";
+  
+  /**
+   * Returns an unmodifiable instance of the default stop words set.
+   * @return default stop words set.
+   */
+  public static Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_STOP_SET;
+  }
+  
+  /**
+   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+   * accesses the static final set the first time.;
+   */
+  private static class DefaultSetHolder {
+    static final Set<?> DEFAULT_STOP_SET;
+
+    static {
+      try {
+        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
+            DEFAULT_STOPWORD_FILE);
+      } catch (IOException ex) {
+        // default set should always be present as it is part of the
+        // distribution (JAR)
+        throw new RuntimeException("Unable to load default stopword set");
+      }
+    }
+  }
+
+  /**
+   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
+   */
+  public SpanishAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   */
+  public SpanishAnalyzer(Version matchVersion, Set<?> stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
+   * stemming.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   * @param stemExclusionSet a set of terms not to be stemmed
+   */
+  public SpanishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
+  }
+
+  /**
+   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
+   * {@link Reader}.
+   * 
+   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
+   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
+   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
+   *         exclusion set is provided and {@link SnowballFilter}.
+   */
+  @Override
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new StandardFilter(source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
+    if(!stemExclusionSet.isEmpty())
+      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
+    result = new SnowballFilter(result, new SpanishStemmer());
+    return new TokenStreamComponents(source, result);
+  }
+}
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/es/package.html
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/es/package.html
@ -0,0 +1,22 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html><head></head>
+<body>
+Analyzer for Spanish.
+</body>
+</html>
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
@ -0,0 +1,129 @@
+package org.apache.lucene.analysis.fi;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
+import org.apache.lucene.analysis.snowball.SnowballFilter;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.Version;
+import org.tartarus.snowball.ext.FinnishStemmer;
+
+/**
+ * {@link Analyzer} for Finnish.
+ */
+public final class FinnishAnalyzer extends StopwordAnalyzerBase {
+  private final Set<?> stemExclusionSet;
+  
+  /** File containing default Italian stopwords. */
+  public final static String DEFAULT_STOPWORD_FILE = "finnish_stop.txt";
+  
+  /**
+   * Returns an unmodifiable instance of the default stop words set.
+   * @return default stop words set.
+   */
+  public static Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_STOP_SET;
+  }
+  
+  /**
+   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+   * accesses the static final set the first time.;
+   */
+  private static class DefaultSetHolder {
+    static final Set<?> DEFAULT_STOP_SET;
+
+    static {
+      try {
+        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
+            DEFAULT_STOPWORD_FILE);
+      } catch (IOException ex) {
+        // default set should always be present as it is part of the
+        // distribution (JAR)
+        throw new RuntimeException("Unable to load default stopword set");
+      }
+    }
+  }
+
+  /**
+   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
+   */
+  public FinnishAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   */
+  public FinnishAnalyzer(Version matchVersion, Set<?> stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
+   * stemming.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   * @param stemExclusionSet a set of terms not to be stemmed
+   */
+  public FinnishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
+  }
+
+  /**
+   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
+   * {@link Reader}.
+   * 
+   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
+   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
+   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
+   *         exclusion set is provided and {@link SnowballFilter}.
+   */
+  @Override
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new StandardFilter(source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
+    if(!stemExclusionSet.isEmpty())
+      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
+    result = new SnowballFilter(result, new FinnishStemmer());
+    return new TokenStreamComponents(source, result);
+  }
+}
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fi/package.html
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fi/package.html
@ -0,0 +1,22 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html><head></head>
+<body>
+Analyzer for Finnish.
+</body>
+</html>
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
@ -68,7 +68,7 @@ public final class ElisionFilter extends TokenFilter {
  /**
   * Constructs an elision filter with standard stop words
   */
-  protected ElisionFilter(Version matchVersion, TokenStream input) {
+  public ElisionFilter(Version matchVersion, TokenStream input) {
    this(matchVersion, input, DEFAULT_ARTICLES);
  }

@ -77,7 +77,7 @@ public final class ElisionFilter extends TokenFilter {
   * @deprecated use {@link #ElisionFilter(Version, TokenStream)} instead
   */
  @Deprecated
-  protected ElisionFilter(TokenStream input) {
+  public ElisionFilter(TokenStream input) {
    this(Version.LUCENE_30, input);
  }

--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
@ -27,6 +27,7 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
@ -55,6 +56,9 @@ import java.util.Set;
 * <p>You must specify the required {@link Version}
 * compatibility when creating FrenchAnalyzer:
 * <ul>
+ *   <li> As of 3.1, Snowball stemming is done with SnowballFilter, 
+ *        LowerCaseFilter is used prior to StopFilter, and ElisionFilter and 
+ *        Snowball stopwords are used by default.
 *   <li> As of 2.9, StopFilter preserves position
 *        increments
 * </ul>
@ -68,7 +72,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
   * Extended list of typical French stopwords.
   * @deprecated use {@link #getDefaultStopSet()} instead
   */
-  // TODO make this private in 3.1
+  // TODO make this private in 3.1, remove in 4.0
  @Deprecated
  public final static String[] FRENCH_STOP_WORDS = {
    "a", "afin", "ai", "ainsi", "après", "attendu", "au", "aujourd", "auquel", "aussi",
@ -95,6 +99,9 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
    "été", "être", "ô"
  };

+  /** File containing default French stopwords. */
+  public final static String DEFAULT_STOPWORD_FILE = "french_stop.txt";
+  
  /**
   * Contains words that should be indexed but not stemmed.
   */
@ -110,16 +117,31 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
  }
  
  private static class DefaultSetHolder {
-    static final Set<?> DEFAULT_STOP_SET = CharArraySet
+    /** @deprecated remove this in Lucene 4.0 */
+    @Deprecated
+    static final Set<?> DEFAULT_STOP_SET_30 = CharArraySet
        .unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(FRENCH_STOP_WORDS),
            false));
+    static final Set<?> DEFAULT_STOP_SET;
+    static {
+      try {
+        DEFAULT_STOP_SET = 
+          WordlistLoader.getSnowballWordSet(SnowballFilter.class, DEFAULT_STOPWORD_FILE);
+      } catch (IOException ex) {
+        // default set should always be present as it is part of the
+        // distribution (JAR)
+        throw new RuntimeException("Unable to load default stopword set");
+      }
+    }
  }

  /**
-   * Builds an analyzer with the default stop words ({@link #FRENCH_STOP_WORDS}).
+   * Builds an analyzer with the default stop words ({@link #getDefaultStopSet}).
   */
  public FrenchAnalyzer(Version matchVersion) {
-    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+    this(matchVersion,
+        matchVersion.onOrAfter(Version.LUCENE_31) ? DefaultSetHolder.DEFAULT_STOP_SET
+            : DefaultSetHolder.DEFAULT_STOP_SET_30);
  }
  
  /**
@ -207,20 +229,34 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
   * {@link Reader}.
   *
   * @return {@link TokenStreamComponents} built from a {@link StandardTokenizer} 
-   *         filtered with {@link StandardFilter}, {@link StopFilter}, 
-   *         {@link FrenchStemFilter} and {@link LowerCaseFilter}
+   *         filtered with {@link StandardFilter}, {@link ElisionFilter}, 
+   *         {@link LowerCaseFilter}, {@link StopFilter},
+   *         {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided, 
+   *         and {@link SnowballFilter}
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
-    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
-    result = new StopFilter(matchVersion, result, stopwords);
-    if(!excltable.isEmpty())
-      result = new KeywordMarkerTokenFilter(result, excltable);
-    result = new FrenchStemFilter(result);
-    // Convert to lowercase after stemming!
-    return new TokenStreamComponents(source, new LowerCaseFilter(matchVersion, result));
+    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
+      final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+      TokenStream result = new StandardFilter(source);
+      result = new ElisionFilter(matchVersion, result);
+      result = new LowerCaseFilter(matchVersion, result);
+      result = new StopFilter(matchVersion, result, stopwords);
+      if(!excltable.isEmpty())
+        result = new KeywordMarkerTokenFilter(result, excltable);
+      result = new SnowballFilter(result, new org.tartarus.snowball.ext.FrenchStemmer());
+      return new TokenStreamComponents(source, result);
+    } else {
+      final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+      TokenStream result = new StandardFilter(source);
+      result = new StopFilter(matchVersion, result, stopwords);
+      if(!excltable.isEmpty())
+        result = new KeywordMarkerTokenFilter(result, excltable);
+      result = new FrenchStemFilter(result);
+      // Convert to lowercase after stemming!
+      return new TokenStreamComponents(source, new LowerCaseFilter(matchVersion, result));
+    }
  }
 }

--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.fr;
 import org.apache.lucene.analysis.KeywordMarkerTokenFilter;// for javadoc
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;

@ -40,7 +41,11 @@ import java.util.Set;
 * the {@link KeywordAttribute} before this {@link TokenStream}.
 * </p>
 * @see KeywordMarkerTokenFilter
+ * @deprecated Use {@link SnowballFilter} with 
+ * {@link org.tartarus.snowball.ext.FrenchStemmer} instead, which has the
+ * same functionality. This filter will be removed in Lucene 4.0
 */
+@Deprecated
 public final class FrenchStemFilter extends TokenFilter {

 	/**
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemmer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemmer.java
@ -25,8 +25,10 @@ package org.apache.lucene.analysis.fr;
 * refer to http://snowball.sourceforge.net/french/stemmer.html<br>
 * (French stemming algorithm) for details
 * </p>
+ * @deprecated Use {@link org.tartarus.snowball.ext.FrenchStemmer} instead, 
+ * which has the same functionality. This filter will be removed in Lucene 4.0
 */
-
+@Deprecated
 public class FrenchStemmer {

    /**
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
@ -0,0 +1,129 @@
+package org.apache.lucene.analysis.hu;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
+import org.apache.lucene.analysis.snowball.SnowballFilter;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.Version;
+import org.tartarus.snowball.ext.HungarianStemmer;
+
+/**
+ * {@link Analyzer} for Hungarian.
+ */
+public final class HungarianAnalyzer extends StopwordAnalyzerBase {
+  private final Set<?> stemExclusionSet;
+  
+  /** File containing default Hungarian stopwords. */
+  public final static String DEFAULT_STOPWORD_FILE = "hungarian_stop.txt";
+  
+  /**
+   * Returns an unmodifiable instance of the default stop words set.
+   * @return default stop words set.
+   */
+  public static Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_STOP_SET;
+  }
+  
+  /**
+   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+   * accesses the static final set the first time.;
+   */
+  private static class DefaultSetHolder {
+    static final Set<?> DEFAULT_STOP_SET;
+
+    static {
+      try {
+        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
+            DEFAULT_STOPWORD_FILE);
+      } catch (IOException ex) {
+        // default set should always be present as it is part of the
+        // distribution (JAR)
+        throw new RuntimeException("Unable to load default stopword set");
+      }
+    }
+  }
+
+  /**
+   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
+   */
+  public HungarianAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   */
+  public HungarianAnalyzer(Version matchVersion, Set<?> stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
+   * stemming.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   * @param stemExclusionSet a set of terms not to be stemmed
+   */
+  public HungarianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
+  }
+
+  /**
+   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
+   * {@link Reader}.
+   * 
+   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
+   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
+   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
+   *         exclusion set is provided and {@link SnowballFilter}.
+   */
+  @Override
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new StandardFilter(source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
+    if(!stemExclusionSet.isEmpty())
+      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
+    result = new SnowballFilter(result, new HungarianStemmer());
+    return new TokenStreamComponents(source, result);
+  }
+}
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/hu/package.html
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/hu/package.html
@ -0,0 +1,22 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html><head></head>
+<body>
+Analyzer for Hungarian.
+</body>
+</html>
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
@ -0,0 +1,129 @@
+package org.apache.lucene.analysis.it;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
+import org.apache.lucene.analysis.snowball.SnowballFilter;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.Version;
+import org.tartarus.snowball.ext.ItalianStemmer;
+
+/**
+ * {@link Analyzer} for Italian.
+ */
+public final class ItalianAnalyzer extends StopwordAnalyzerBase {
+  private final Set<?> stemExclusionSet;
+  
+  /** File containing default Italian stopwords. */
+  public final static String DEFAULT_STOPWORD_FILE = "italian_stop.txt";
+  
+  /**
+   * Returns an unmodifiable instance of the default stop words set.
+   * @return default stop words set.
+   */
+  public static Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_STOP_SET;
+  }
+  
+  /**
+   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+   * accesses the static final set the first time.;
+   */
+  private static class DefaultSetHolder {
+    static final Set<?> DEFAULT_STOP_SET;
+
+    static {
+      try {
+        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
+            DEFAULT_STOPWORD_FILE);
+      } catch (IOException ex) {
+        // default set should always be present as it is part of the
+        // distribution (JAR)
+        throw new RuntimeException("Unable to load default stopword set");
+      }
+    }
+  }
+
+  /**
+   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
+   */
+  public ItalianAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   */
+  public ItalianAnalyzer(Version matchVersion, Set<?> stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
+   * stemming.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   * @param stemExclusionSet a set of terms not to be stemmed
+   */
+  public ItalianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
+  }
+
+  /**
+   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
+   * {@link Reader}.
+   * 
+   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
+   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
+   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
+   *         exclusion set is provided and {@link SnowballFilter}.
+   */
+  @Override
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new StandardFilter(source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
+    if(!stemExclusionSet.isEmpty())
+      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
+    result = new SnowballFilter(result, new ItalianStemmer());
+    return new TokenStreamComponents(source, result);
+  }
+}
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/it/package.html
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/it/package.html
@ -0,0 +1,22 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html><head></head>
+<body>
+Analyzer for Italian.
+</body>
+</html>
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/EmptyTokenStream.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/EmptyTokenStream.java
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.miscellaneous;
 */

 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Token;

 import java.io.IOException;

--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java
@ -0,0 +1,70 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.analysis.CharArrayMap;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.util.Version;
+
+/**
+ * Provides the ability to override any {@link KeywordAttribute} aware stemmer
+ * with custom dictionary-based stemming.
+ */
+public final class StemmerOverrideFilter extends TokenFilter {
+  private final CharArrayMap<String> dictionary;
+  
+  private final TermAttribute termAtt = addAttribute(TermAttribute.class);
+  private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
+  
+  /**
+   * Create a new StemmerOverrideFilter, performing dictionary-based stemming
+   * with the provided <code>dictionary</code>.
+   * <p>
+   * Any dictionary-stemmed terms will be marked with {@link KeywordAttribute}
+   * so that they will not be stemmed with stemmers down the chain.
+   * </p>
+   */
+  public StemmerOverrideFilter(Version matchVersion, TokenStream input,
+      Map<?,String> dictionary) {
+    super(input);
+    this.dictionary = dictionary instanceof CharArrayMap ? 
+        (CharArrayMap<String>) dictionary : CharArrayMap.copy(matchVersion, dictionary);
+  }
+
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      if (!keywordAtt.isKeyword()) { // don't muck with already-keyworded terms
+        String stem = dictionary.get(termAtt.termBuffer(), 0, termAtt.termLength());
+        if (stem != null) {
+          termAtt.setTermBuffer(stem);
+          keywordAtt.setKeyword(true);
+        }
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
@ -19,7 +19,6 @@ package org.apache.lucene.analysis.ngram;

 import java.io.IOException;

-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
@ -17,7 +17,6 @@ package org.apache.lucene.analysis.ngram;
 * limitations under the License.
 */

-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
@ -20,11 +20,14 @@ package org.apache.lucene.analysis.nl;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
+import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
+import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
@ -33,7 +36,6 @@ import org.apache.lucene.util.Version;
 import java.io.File;
 import java.io.IOException;
 import java.io.Reader;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
@ -51,6 +53,17 @@ import java.util.Map;
 * exclusion list is empty by default.
 * </p>
 *
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating DutchAnalyzer:
+ * <ul>
+ *   <li> As of 3.1, Snowball stemming is done with SnowballFilter, 
+ *        LowerCaseFilter is used prior to StopFilter, and Snowball 
+ *        stopwords are used by default.
+ *   <li> As of 2.9, StopFilter preserves position
+ *        increments
+ * </ul>
+ * 
 * <p><b>NOTE</b>: This class uses the same {@link Version}
 * dependent settings as {@link StandardAnalyzer}.</p>
 */
@ -60,19 +73,11 @@ public final class DutchAnalyzer extends ReusableAnalyzerBase {
   * @deprecated use {@link #getDefaultStopSet()} instead
   */
  @Deprecated
-  public final static String[] DUTCH_STOP_WORDS =
-      {
-        "de", "en", "van", "ik", "te", "dat", "die", "in", "een",
-        "hij", "het", "niet", "zijn", "is", "was", "op", "aan", "met", "als", "voor", "had",
-        "er", "maar", "om", "hem", "dan", "zou", "of", "wat", "mijn", "men", "dit", "zo",
-        "door", "over", "ze", "zich", "bij", "ook", "tot", "je", "mij", "uit", "der", "daar",
-        "haar", "naar", "heb", "hoe", "heeft", "hebben", "deze", "u", "want", "nog", "zal",
-        "me", "zij", "nu", "ge", "geen", "omdat", "iets", "worden", "toch", "al", "waren",
-        "veel", "meer", "doen", "toen", "moet", "ben", "zonder", "kan", "hun", "dus",
-        "alles", "onder", "ja", "eens", "hier", "wie", "werd", "altijd", "doch", "wordt",
-        "wezen", "kunnen", "ons", "zelf", "tegen", "na", "reeds", "wil", "kon", "niets",
-        "uw", "iemand", "geweest", "andere"
-      };
+  public final static String[] DUTCH_STOP_WORDS = getDefaultStopSet().toArray(new String[0]);
+  
+  /** File containing default Dutch stopwords. */
+  public final static String DEFAULT_STOPWORD_FILE = "dutch_stop.txt";
+
  /**
   * Returns an unmodifiable instance of the default stop-words set.
   * @return an unmodifiable instance of the default stop-words set.
@ -82,9 +87,18 @@ public final class DutchAnalyzer extends ReusableAnalyzerBase {
  }
  
  private static class DefaultSetHolder {
-    static final Set<?> DEFAULT_STOP_SET = CharArraySet
-        .unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, 
-            Arrays.asList(DUTCH_STOP_WORDS), false));
+    static final Set<?> DEFAULT_STOP_SET;
+
+    static {
+      try {
+        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
+            DEFAULT_STOPWORD_FILE);
+      } catch (IOException ex) {
+        // default set should always be present as it is part of the
+        // distribution (JAR)
+        throw new RuntimeException("Unable to load default stopword set");
+      }
+    }
  }


@ -223,18 +237,32 @@ public final class DutchAnalyzer extends ReusableAnalyzerBase {
   * text in the provided {@link Reader}.
   *
   * @return A {@link TokenStream} built from a {@link StandardTokenizer}
-   *   filtered with {@link StandardFilter}, {@link StopFilter}, 
-   *   and {@link DutchStemFilter}
+   *   filtered with {@link StandardFilter}, {@link LowerCaseFilter}, 
+   *   {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided,
+   *   {@link StemmerOverrideFilter}, and {@link SnowballFilter}
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader aReader) {
-    final Tokenizer source = new StandardTokenizer(matchVersion, aReader);
-    TokenStream result = new StandardFilter(source);
-    result = new StopFilter(matchVersion, result, stoptable);
-    if (!excltable.isEmpty())
-      result = new KeywordMarkerTokenFilter(result, excltable);
-    result = new DutchStemFilter(result, stemdict);
-    return new TokenStreamComponents(source, result);
+    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
+      final Tokenizer source = new StandardTokenizer(matchVersion, aReader);
+      TokenStream result = new StandardFilter(source);
+      result = new LowerCaseFilter(matchVersion, result);
+      result = new StopFilter(matchVersion, result, stoptable);
+      if (!excltable.isEmpty())
+        result = new KeywordMarkerTokenFilter(result, excltable);
+      if (!stemdict.isEmpty())
+        result = new StemmerOverrideFilter(matchVersion, result, stemdict);
+      result = new SnowballFilter(result, new org.tartarus.snowball.ext.DutchStemmer());
+      return new TokenStreamComponents(source, result);
+    } else {
+      final Tokenizer source = new StandardTokenizer(matchVersion, aReader);
+      TokenStream result = new StandardFilter(source);
+      result = new StopFilter(matchVersion, result, stoptable);
+      if (!excltable.isEmpty())
+        result = new KeywordMarkerTokenFilter(result, excltable);
+      result = new DutchStemFilter(result, stemdict);
+      return new TokenStreamComponents(source, result);
+    }
  }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java
@ -26,6 +26,7 @@ import java.util.Set;
 import org.apache.lucene.analysis.KeywordMarkerTokenFilter;// for javadoc
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;

@ -42,7 +43,11 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 * the {@link KeywordAttribute} before this {@link TokenStream}.
 * </p>
 * @see KeywordMarkerTokenFilter
+ * @deprecated Use {@link SnowballFilter} with 
+ * {@link org.tartarus.snowball.ext.DutchStemmer} instead, which has the
+ * same functionality. This filter will be removed in Lucene 4.0
 */
+@Deprecated
 public final class DutchStemFilter extends TokenFilter {
  /**
   * The actual token in the input stream.
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java
@ -26,8 +26,10 @@ import java.util.Map;
 * the <a href="http://snowball.tartarus.org/algorithms/dutch/stemmer.html">dutch stemming</a>
 * algorithm in Martin Porter's snowball project.
 * </p>
+ * @deprecated Use {@link org.tartarus.snowball.ext.DutchStemmer} instead, 
+ * which has the same functionality. This filter will be removed in Lucene 4.0
 */
-
+@Deprecated
 public class DutchStemmer {
  /**
   * Buffer for the terms while stemming them.
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
@ -0,0 +1,130 @@
+package org.apache.lucene.analysis.no;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
+import org.apache.lucene.analysis.snowball.SnowballFilter;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.Version;
+import org.tartarus.snowball.ext.NorwegianStemmer;
+
+/**
+ * {@link Analyzer} for Norwegian.
+ */
+public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
+  private final Set<?> stemExclusionSet;
+  
+  /** File containing default Norwegian stopwords. */
+  public final static String DEFAULT_STOPWORD_FILE = "norwegian_stop.txt";
+  
+  /**
+   * Returns an unmodifiable instance of the default stop words set.
+   * @return default stop words set.
+   */
+  public static Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_STOP_SET;
+  }
+  
+  /**
+   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+   * accesses the static final set the first time.;
+   */
+  private static class DefaultSetHolder {
+    static final Set<?> DEFAULT_STOP_SET;
+
+    static {
+      try {
+        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
+            DEFAULT_STOPWORD_FILE);
+      } catch (IOException ex) {
+        // default set should always be present as it is part of the
+        // distribution (JAR)
+        throw new RuntimeException("Unable to load default stopword set");
+      }
+    }
+  }
+
+  /**
+   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
+   */
+  public NorwegianAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   */
+  public NorwegianAnalyzer(Version matchVersion, Set<?> stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
+   * stemming.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   * @param stemExclusionSet a set of terms not to be stemmed
+   */
+  public NorwegianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
+  }
+
+  /**
+   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
+   * {@link Reader}.
+   * 
+   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
+   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
+   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
+   *         exclusion set is provided and {@link SnowballFilter}.
+   */
+  @Override
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new StandardFilter(source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
+    if(!stemExclusionSet.isEmpty())
+      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
+    result = new SnowballFilter(result, new NorwegianStemmer());
+    return new TokenStreamComponents(source, result);
+  }
+}
+
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/no/package.html
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/no/package.html
@ -0,0 +1,22 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html><head></head>
+<body>
+Analyzer for Norwegian.
+</body>
+</html>
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
@ -17,7 +17,6 @@ package org.apache.lucene.analysis.payloads;
 */


-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java
@ -19,7 +19,6 @@ package org.apache.lucene.analysis.payloads;

 import java.io.IOException;

-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
@ -19,7 +19,6 @@ package org.apache.lucene.analysis.payloads;

 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.index.Payload;
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java
@ -21,7 +21,6 @@ import java.io.IOException;

 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;

 /** Set the positionIncrement of all tokens to the "positionIncrement",
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
@ -0,0 +1,129 @@
+package org.apache.lucene.analysis.pt;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
+import org.apache.lucene.analysis.snowball.SnowballFilter;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.Version;
+import org.tartarus.snowball.ext.PortugueseStemmer;
+
+/**
+ * {@link Analyzer} for Portuguese.
+ */
+public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
+  private final Set<?> stemExclusionSet;
+  
+  /** File containing default Portuguese stopwords. */
+  public final static String DEFAULT_STOPWORD_FILE = "portuguese_stop.txt";
+  
+  /**
+   * Returns an unmodifiable instance of the default stop words set.
+   * @return default stop words set.
+   */
+  public static Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_STOP_SET;
+  }
+  
+  /**
+   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+   * accesses the static final set the first time.;
+   */
+  private static class DefaultSetHolder {
+    static final Set<?> DEFAULT_STOP_SET;
+
+    static {
+      try {
+        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
+            DEFAULT_STOPWORD_FILE);
+      } catch (IOException ex) {
+        // default set should always be present as it is part of the
+        // distribution (JAR)
+        throw new RuntimeException("Unable to load default stopword set");
+      }
+    }
+  }
+
+  /**
+   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
+   */
+  public PortugueseAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   */
+  public PortugueseAnalyzer(Version matchVersion, Set<?> stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
+   * stemming.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   * @param stemExclusionSet a set of terms not to be stemmed
+   */
+  public PortugueseAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
+  }
+
+  /**
+   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
+   * {@link Reader}.
+   * 
+   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
+   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
+   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
+   *         exclusion set is provided and {@link SnowballFilter}.
+   */
+  @Override
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new StandardFilter(source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
+    if(!stemExclusionSet.isEmpty())
+      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
+    result = new SnowballFilter(result, new PortugueseStemmer());
+    return new TokenStreamComponents(source, result);
+  }
+}
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/pt/package.html
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/pt/package.html
@ -0,0 +1,22 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html><head></head>
+<body>
+Analyzer for Portuguese.
+</body>
+</html>
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
@ -0,0 +1,133 @@
+package org.apache.lucene.analysis.ro;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
+import org.apache.lucene.analysis.snowball.SnowballFilter;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.Version;
+import org.tartarus.snowball.ext.RomanianStemmer;
+
+/**
+ * {@link Analyzer} for Romanian.
+ */
+public final class RomanianAnalyzer extends StopwordAnalyzerBase {
+  private final Set<?> stemExclusionSet;
+  
+  /** File containing default Romanian stopwords. */
+  public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
+  /**
+   * The comment character in the stopwords file.  
+   * All lines prefixed with this will be ignored.
+   */
+  private static final String STOPWORDS_COMMENT = "#";
+  
+  /**
+   * Returns an unmodifiable instance of the default stop words set.
+   * @return default stop words set.
+   */
+  public static Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_STOP_SET;
+  }
+  
+  /**
+   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+   * accesses the static final set the first time.;
+   */
+  private static class DefaultSetHolder {
+    static final Set<?> DEFAULT_STOP_SET;
+
+    static {
+      try {
+        DEFAULT_STOP_SET = loadStopwordSet(false, RomanianAnalyzer.class, 
+            DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
+      } catch (IOException ex) {
+        // default set should always be present as it is part of the
+        // distribution (JAR)
+        throw new RuntimeException("Unable to load default stopword set");
+      }
+    }
+  }
+
+  /**
+   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
+   */
+  public RomanianAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   */
+  public RomanianAnalyzer(Version matchVersion, Set<?> stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
+   * stemming.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   * @param stemExclusionSet a set of terms not to be stemmed
+   */
+  public RomanianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
+  }
+
+  /**
+   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
+   * {@link Reader}.
+   * 
+   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
+   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
+   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
+   *         exclusion set is provided and {@link SnowballFilter}.
+   */
+  @Override
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new StandardFilter(source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
+    if(!stemExclusionSet.isEmpty())
+      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
+    result = new SnowballFilter(result, new RomanianStemmer());
+    return new TokenStreamComponents(source, result);
+  }
+}
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ro/package.html
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ro/package.html
@ -0,0 +1,22 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html><head></head>
+<body>
+Analyzer for Romanian.
+</body>
+</html>
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
@ -17,6 +17,7 @@ package org.apache.lucene.analysis.ru;
 * limitations under the License.
 */

+import java.io.IOException;
 import java.io.Reader;
 import java.util.Arrays;
 import java.util.Map;
@ -26,11 +27,15 @@ import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
+import org.apache.lucene.analysis.snowball.SnowballFilter;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.util.Version;

 /**
@ -40,13 +45,22 @@ import org.apache.lucene.util.Version;
 * will not be indexed at all).
 * A default set of stopwords is used unless an alternative list is specified.
 * </p>
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating RussianAnalyzer:
+ * <ul>
+ *   <li> As of 3.1, StandardTokenizer is used, Snowball stemming is done with
+ *        SnowballFilter, and Snowball stopwords are used by default.
+ * </ul>
 */
 public final class RussianAnalyzer extends StopwordAnalyzerBase
 {
    /**
-     * List of typical Russian stopwords.
+     * List of typical Russian stopwords. (for backwards compatibility)
+     * @deprecated Remove this for LUCENE 4.0
     */
-    private static final String[] RUSSIAN_STOP_WORDS = {
+    @Deprecated
+    private static final String[] RUSSIAN_STOP_WORDS_30 = {
      "а", "без", "более", "бы", "был", "была", "были", "было", "быть", "в",
      "вам", "вас", "весь", "во", "вот", "все", "всего", "всех", "вы", "где", 
      "да", "даже", "для", "до", "его", "ее", "ей", "ею", "если", "есть", 
@ -59,10 +73,27 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
      "чем", "что", "чтобы", "чье", "чья", "эта", "эти", "это", "я"
    };
    
+    /** File containing default Russian stopwords. */
+    public final static String DEFAULT_STOPWORD_FILE = "russian_stop.txt";
+    
    private static class DefaultSetHolder {
-      static final Set<?> DEFAULT_STOP_SET = CharArraySet
+      /** @deprecated remove this for Lucene 4.0 */
+      @Deprecated
+      static final Set<?> DEFAULT_STOP_SET_30 = CharArraySet
          .unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, 
-              Arrays.asList(RUSSIAN_STOP_WORDS), false));
+              Arrays.asList(RUSSIAN_STOP_WORDS_30), false));
+      static final Set<?> DEFAULT_STOP_SET;
+      
+      static {
+        try {
+          DEFAULT_STOP_SET = 
+            WordlistLoader.getSnowballWordSet(SnowballFilter.class, DEFAULT_STOPWORD_FILE);
+        } catch (IOException ex) {
+          // default set should always be present as it is part of the
+          // distribution (JAR)
+          throw new RuntimeException("Unable to load default stopword set");
+        }
+      }
    }
    
    private final Set<?> stemExclusionSet;
@ -77,7 +108,9 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
    }

    public RussianAnalyzer(Version matchVersion) {
-      this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+      this(matchVersion,
+        matchVersion.onOrAfter(Version.LUCENE_31) ? DefaultSetHolder.DEFAULT_STOP_SET
+            : DefaultSetHolder.DEFAULT_STOP_SET_30);
    }
  
    /**
@ -132,19 +165,30 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
     * provided {@link Reader}.
     *
     * @return {@link TokenStreamComponents} built from a 
-     *   {@link RussianLetterTokenizer} filtered with 
+     *   {@link StandardTokenizer} filtered with {@link StandardFilter},
     *   {@link LowerCaseFilter}, {@link StopFilter}, 
-     *   and {@link RussianStemFilter}
+     *   {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided,
+     *   and {@link SnowballFilter}
     */
    @Override
    protected TokenStreamComponents createComponents(String fieldName,
        Reader reader) {
-      final Tokenizer source = new RussianLetterTokenizer(matchVersion, reader);
-      TokenStream result = new LowerCaseFilter(matchVersion, source);
-      result = new StopFilter(matchVersion, result, stopwords);
-      if(!stemExclusionSet.isEmpty())
-        result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
-      return new TokenStreamComponents(source, new RussianStemFilter(result));
-      
+      if (matchVersion.onOrAfter(Version.LUCENE_31)) {
+        final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+        TokenStream result = new StandardFilter(source);
+        result = new LowerCaseFilter(matchVersion, result);
+        result = new StopFilter(matchVersion, result, stopwords);
+        if (!stemExclusionSet.isEmpty()) result = new KeywordMarkerTokenFilter(
+            result, stemExclusionSet);
+        result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
+        return new TokenStreamComponents(source, result);
+      } else {
+        final Tokenizer source = new RussianLetterTokenizer(matchVersion, reader);
+        TokenStream result = new LowerCaseFilter(matchVersion, source);
+        result = new StopFilter(matchVersion, result, stopwords);
+        if (!stemExclusionSet.isEmpty()) result = new KeywordMarkerTokenFilter(
+          result, stemExclusionSet);
+        return new TokenStreamComponents(source, new RussianStemFilter(result));
+      }
    }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
@ -21,6 +21,7 @@ import java.io.Reader;
 import org.apache.lucene.analysis.CharTokenizer;
 import org.apache.lucene.analysis.Tokenizer; // for javadocs
 import org.apache.lucene.analysis.LetterTokenizer; // for javadocs
+import org.apache.lucene.analysis.standard.StandardTokenizer; // for javadocs
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Version;

@ -35,8 +36,11 @@ import org.apache.lucene.util.Version;
 * <li>As of 3.1, {@link CharTokenizer} uses an int based API to normalize and
 * detect token characters. See {@link CharTokenizer#isTokenChar(int)} and
 * {@link CharTokenizer#normalize(int)} for details.</li>
- * </ul> 
+ * </ul>
+ * @deprecated Use {@link StandardTokenizer} instead, which has the same functionality.
+ * This filter will be removed in Lucene 4.0 
 */
+@Deprecated
 public class RussianLetterTokenizer extends CharTokenizer
 {    
    private static final int DIGIT_0 = '0';
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java
@ -24,6 +24,7 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.analysis.ru.RussianStemmer;//javadoc @link
+import org.apache.lucene.analysis.snowball.SnowballFilter; // javadoc @link

 import java.io.IOException;

@ -40,7 +41,11 @@ import java.io.IOException;
 * the {@link KeywordAttribute} before this {@link TokenStream}.
 * </p>
 * @see KeywordMarkerTokenFilter
+ * @deprecated Use {@link SnowballFilter} with 
+ * {@link org.tartarus.snowball.ext.RussianStemmer} instead, which has the
+ * same functionality. This filter will be removed in Lucene 4.0
 */
+@Deprecated
 public final class RussianStemFilter extends TokenFilter
 {
    /**
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemmer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemmer.java
@ -19,7 +19,10 @@ package org.apache.lucene.analysis.ru;

 /**
 * Russian stemming algorithm implementation (see http://snowball.sourceforge.net for detailed description).
+ * @deprecated Use {@link org.tartarus.snowball.ext.RussianStemmer} instead, 
+ * which has the same functionality. This filter will be removed in Lucene 4.0
 */
+@Deprecated
 class RussianStemmer
 {
    // positions of RV, R1 and R2 respectively
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
@ -39,7 +39,10 @@ import java.util.Set;
 *   <li> As of 3.1, uses {@link TurkishLowerCaseFilter} for Turkish language.
 * </ul>
 * </p>
+ * @deprecated Use the language-specific analyzer in contrib/analyzers instead. 
+ * This analyzer will be removed in Lucene 4.0
 */
+@Deprecated
 public final class SnowballAnalyzer extends Analyzer {
  private String name;
  private Set<?> stopSet;
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
@ -21,6 +21,7 @@ import java.io.IOException;

 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; // javadoc @link
 import org.apache.lucene.analysis.LowerCaseFilter; // javadoc @link
@ -39,14 +40,14 @@ import org.tartarus.snowball.SnowballProgram;
 */
 public final class SnowballFilter extends TokenFilter {

-  private SnowballProgram stemmer;
+  private final SnowballProgram stemmer;

-  private TermAttribute termAtt;
+  private final TermAttribute termAtt = addAttribute(TermAttribute.class);
+  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
  
  public SnowballFilter(TokenStream input, SnowballProgram stemmer) {
    super(input);
    this.stemmer = stemmer;
-    termAtt = addAttribute(TermAttribute.class);
  }

  /**
@ -67,23 +68,24 @@ public final class SnowballFilter extends TokenFilter {
    } catch (Exception e) {
      throw new RuntimeException(e.toString());
    }
-    termAtt = addAttribute(TermAttribute.class);
  }

  /** Returns the next input Token, after being stemmed */
  @Override
  public final boolean incrementToken() throws IOException {
    if (input.incrementToken()) {
-      char termBuffer[] = termAtt.termBuffer();
-      final int length = termAtt.termLength();
-      stemmer.setCurrent(termBuffer, length);
-      stemmer.stem();
-      final char finalTerm[] = stemmer.getCurrentBuffer();
-      final int newLength = stemmer.getCurrentBufferLength();
-      if (finalTerm != termBuffer)
-        termAtt.setTermBuffer(finalTerm, 0, newLength);
-      else
-        termAtt.setTermLength(newLength); 
+      if (!keywordAttr.isKeyword()) {
+        char termBuffer[] = termAtt.termBuffer();
+        final int length = termAtt.termLength();
+        stemmer.setCurrent(termBuffer, length);
+        stemmer.stem();
+        final char finalTerm[] = stemmer.getCurrentBuffer();
+        final int newLength = stemmer.getCurrentBufferLength();
+        if (finalTerm != termBuffer)
+          termAtt.setTermBuffer(finalTerm, 0, newLength);
+        else
+          termAtt.setTermLength(newLength);
+      }
      return true;
    } else {
      return false;
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
@ -0,0 +1,129 @@
+package org.apache.lucene.analysis.sv;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
+import org.apache.lucene.analysis.snowball.SnowballFilter;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.Version;
+import org.tartarus.snowball.ext.SwedishStemmer;
+
+/**
+ * {@link Analyzer} for Swedish.
+ */
+public final class SwedishAnalyzer extends StopwordAnalyzerBase {
+  private final Set<?> stemExclusionSet;
+  
+  /** File containing default Swedish stopwords. */
+  public final static String DEFAULT_STOPWORD_FILE = "swedish_stop.txt";
+  
+  /**
+   * Returns an unmodifiable instance of the default stop words set.
+   * @return default stop words set.
+   */
+  public static Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_STOP_SET;
+  }
+  
+  /**
+   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+   * accesses the static final set the first time.;
+   */
+  private static class DefaultSetHolder {
+    static final Set<?> DEFAULT_STOP_SET;
+
+    static {
+      try {
+        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
+            DEFAULT_STOPWORD_FILE);
+      } catch (IOException ex) {
+        // default set should always be present as it is part of the
+        // distribution (JAR)
+        throw new RuntimeException("Unable to load default stopword set");
+      }
+    }
+  }
+
+  /**
+   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
+   */
+  public SwedishAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   */
+  public SwedishAnalyzer(Version matchVersion, Set<?> stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
+   * stemming.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   * @param stemExclusionSet a set of terms not to be stemmed
+   */
+  public SwedishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
+  }
+
+  /**
+   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
+   * {@link Reader}.
+   * 
+   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
+   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
+   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
+   *         exclusion set is provided and {@link SnowballFilter}.
+   */
+  @Override
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new StandardFilter(source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
+    if(!stemExclusionSet.isEmpty())
+      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
+    result = new SnowballFilter(result, new SwedishStemmer());
+    return new TokenStreamComponents(source, result);
+  }
+}
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sv/package.html
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sv/package.html
@ -0,0 +1,22 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html><head></head>
+<body>
+Analyzer for Swedish.
+</body>
+</html>
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
@ -19,7 +19,6 @@ package org.apache.lucene.analysis.th;
 import java.io.IOException;
 import java.util.Locale;
 import java.lang.Character.UnicodeBlock;
-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
@ -0,0 +1,132 @@
+package org.apache.lucene.analysis.tr;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
+import org.apache.lucene.analysis.snowball.SnowballFilter;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.Version;
+import org.tartarus.snowball.ext.TurkishStemmer;
+
+/**
+ * {@link Analyzer} for Turkish.
+ */
+public final class TurkishAnalyzer extends StopwordAnalyzerBase {
+  private final Set<?> stemExclusionSet;
+  
+  /** File containing default Turkish stopwords. */
+  public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
+  /**
+   * The comment character in the stopwords file.  
+   * All lines prefixed with this will be ignored.
+   */
+  private static final String STOPWORDS_COMMENT = "#";
+  
+  /**
+   * Returns an unmodifiable instance of the default stop words set.
+   * @return default stop words set.
+   */
+  public static Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_STOP_SET;
+  }
+  
+  /**
+   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+   * accesses the static final set the first time.;
+   */
+  private static class DefaultSetHolder {
+    static final Set<?> DEFAULT_STOP_SET;
+
+    static {
+      try {
+        DEFAULT_STOP_SET = loadStopwordSet(false, TurkishAnalyzer.class, 
+            DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
+      } catch (IOException ex) {
+        // default set should always be present as it is part of the
+        // distribution (JAR)
+        throw new RuntimeException("Unable to load default stopword set");
+      }
+    }
+  }
+
+  /**
+   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
+   */
+  public TurkishAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   */
+  public TurkishAnalyzer(Version matchVersion, Set<?> stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
+   * stemming.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   * @param stemExclusionSet a set of terms not to be stemmed
+   */
+  public TurkishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
+  }
+
+  /**
+   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
+   * {@link Reader}.
+   * 
+   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
+   *         filtered with {@link StandardFilter}, {@link TurkishLowerCaseFilter},
+   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
+   *         exclusion set is provided and {@link SnowballFilter}.
+   */
+  @Override
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new StandardFilter(source);
+    result = new TurkishLowerCaseFilter(result);
+    result = new StopFilter(matchVersion, result, stopwords);
+    if(!stemExclusionSet.isEmpty())
+      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
+    result = new SnowballFilter(result, new TurkishStemmer());
+    return new TokenStreamComponents(source, result);
+  }
+}
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/tr/package.html
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/tr/package.html
@ -17,15 +17,6 @@
 -->
 <html><head></head>
 <body>
-Support for Turkish.
-<p>
-This package contains just the TokenStream for handling turkish casing,
-for a stemmer please see the snowball package. 
-</p>
-<p>
-WARNING: SnowballAnalyzer uses LowerCaseFilter by default, even when the
-language is set to Turkish, so you will need to construct your own
-analyzer that combines TurkishLowerCaseFilter and SnowballFilter.
-</p>
+Analyzer for Turkish.
 </body>
-</html>
+</html>
--- a/contrib/analyzers/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt
+++ b/contrib/analyzers/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt
@ -0,0 +1,233 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+acea
+aceasta
+această
+aceea
+acei
+aceia
+acel
+acela
+acele
+acelea
+acest
+acesta
+aceste
+acestea
+aceşti
+aceştia
+acolo
+acum
+ai
+aia
+aibă
+aici
+al
+ăla
+ale
+alea
+ălea
+altceva
+altcineva
+am
+ar
+are
+aş
+aşadar
+asemenea
+asta
+ăsta
+astăzi
+astea
+ăstea
+ăştia
+asupra
+aţi
+au
+avea
+avem
+aveţi
+azi
+bine
+bucur
+bună
+ca
+că
+căci
+când
+care
+cărei
+căror
+cărui
+cât
+câte
+câţi
+către
+câtva
+ce
+cel
+ceva
+chiar
+cînd
+cine
+cineva
+cît
+cîte
+cîţi
+cîtva
+contra
+cu
+cum
+cumva
+curând
+curînd
+da
+dă
+dacă
+dar
+datorită
+de
+deci
+deja
+deoarece
+departe
+deşi
+din
+dinaintea
+dintr
+dintre
+drept
+după
+ea
+ei
+el
+ele
+eram
+este
+eşti
+eu
+face
+fără
+fi
+fie
+fiecare
+fii
+fim
+fiţi
+iar
+ieri
+îi
+îl
+îmi
+împotriva
+în 
+înainte
+înaintea
+încât
+încît
+încotro
+între
+întrucât
+întrucît
+îţi
+la
+lângă
+le
+li
+lîngă
+lor
+lui
+mă
+mâine
+mea
+mei
+mele
+mereu
+meu
+mi
+mine
+mult
+multă
+mulţi
+ne
+nicăieri
+nici
+nimeni
+nişte
+noastră
+noastre
+noi
+noştri
+nostru
+nu
+ori
+oricând
+oricare
+oricât
+orice
+oricînd
+oricine
+oricît
+oricum
+oriunde
+până
+pe
+pentru
+peste
+pînă
+poate
+pot
+prea
+prima
+primul
+prin
+printr
+sa
+să
+săi
+sale
+sau
+său
+se
+şi
+sînt
+sîntem
+sînteţi
+spre
+sub
+sunt
+suntem
+sunteţi
+ta
+tăi
+tale
+tău
+te
+ţi
+ţie
+tine
+toată
+toate
+tot
+toţi
+totuşi
+tu
+un
+una
+unde
+undeva
+unei
+unele
+uneori
+unor
+vă
+vi
+voastră
+voastre
+voi
+voştri
+vostru
+vouă
+vreo
+vreun
--- a/contrib/analyzers/common/src/resources/org/apache/lucene/analysis/tr/stopwords.txt
+++ b/contrib/analyzers/common/src/resources/org/apache/lucene/analysis/tr/stopwords.txt
@ -0,0 +1,212 @@
+# Turkish stopwords from LUCENE-559
+# merged with the list from "Information Retrieval on Turkish Texts"
+#   (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
+acaba
+altmış
+altı
+ama
+ancak
+arada
+aslında
+ayrıca
+bana
+bazı
+belki
+ben
+benden
+beni
+benim
+beri
+beş
+bile
+bin
+bir
+birçok
+biri
+birkaç
+birkez
+birşey
+birşeyi
+biz
+bize
+bizden
+bizi
+bizim
+böyle
+böylece
+bu
+buna
+bunda
+bundan
+bunlar
+bunları
+bunların
+bunu
+bunun
+burada
+çok
+çünkü
+da
+daha
+dahi
+de
+defa
+değil
+diğer
+diye
+doksan
+dokuz
+dolayı
+dolayısıyla
+dört
+edecek
+eden
+ederek
+edilecek
+ediliyor
+edilmesi
+ediyor
+eğer
+elli
+en
+etmesi
+etti
+ettiği
+ettiğini
+gibi
+göre
+halen
+hangi
+hatta
+hem
+henüz
+hep
+hepsi
+her
+herhangi
+herkesin
+hiç
+hiçbir
+için
+iki
+ile
+ilgili
+ise
+işte
+itibaren
+itibariyle
+kadar
+karşın
+katrilyon
+kendi
+kendilerine
+kendini
+kendisi
+kendisine
+kendisini
+kez
+ki
+kim
+kimden
+kime
+kimi
+kimse
+kırk
+milyar
+milyon
+mu
+mü
+mı
+nasıl
+ne
+neden
+nedenle
+nerde
+nerede
+nereye
+niye
+niçin
+o
+olan
+olarak
+oldu
+olduğu
+olduğunu
+olduklarını
+olmadı
+olmadığı
+olmak
+olması
+olmayan
+olmaz
+olsa
+olsun
+olup
+olur
+olursa
+oluyor
+on
+ona
+ondan
+onlar
+onlardan
+onları
+onların
+onu
+onun
+otuz
+oysa
+öyle
+pek
+rağmen
+sadece
+sanki
+sekiz
+seksen
+sen
+senden
+seni
+senin
+siz
+sizden
+sizi
+sizin
+şey
+şeyden
+şeyi
+şeyler
+şöyle
+şu
+şuna
+şunda
+şundan
+şunları
+şunu
+tarafından
+trilyon
+tüm
+üç
+üzere
+var
+vardı
+ve
+veya
+ya
+yani
+yapacak
+yapılan
+yapılması
+yapıyor
+yapmak
+yaptı
+yaptığı
+yaptığını
+yaptıkları
+yedi
+yerine
+yetmiş
+yine
+yirmi
+yoksa
+yüz
+zaten
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
@ -22,8 +22,6 @@ import java.util.Collections;
 import java.util.HashSet;
 import java.util.Set;

-import javax.print.DocFlavor.CHAR_ARRAY;
-
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.util.Version;
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicNormalizationFilter.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicNormalizationFilter.java
@ -21,7 +21,6 @@ import java.io.IOException;
 import java.io.StringReader;

 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.util.Version;

 /**
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java
@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.da;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
+  /** This test fails with NPE when the 
+   * stopwords file is missing in classpath */
+  public void testResourcesAvailable() {
+    new DanishAnalyzer(Version.LUCENE_CURRENT);
+  }
+  
+  /** test stopwords and stemming */
+  public void testBasics() throws IOException {
+    Analyzer a = new DanishAnalyzer(Version.LUCENE_CURRENT);
+    // stemming
+    checkOneTermReuse(a, "undersøg", "undersøg");
+    checkOneTermReuse(a, "undersøgelse", "undersøg");
+    // stopword
+    assertAnalyzesTo(a, "på", new String[] {});
+  }
+  
+  /** test use of exclusion set */
+  public void testExclude() throws IOException {
+    Set<String> exclusionSet = new HashSet<String>();
+    exclusionSet.add("undersøgelse");
+    Analyzer a = new DanishAnalyzer(Version.LUCENE_CURRENT, 
+        DanishAnalyzer.getDefaultStopSet(), exclusionSet);
+    checkOneTermReuse(a, "undersøgelse", "undersøgelse");
+    checkOneTermReuse(a, "undersøg", "undersøg");
+  }
+}
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
@ -0,0 +1,93 @@
+package org.apache.lucene.analysis.de;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
+import org.apache.lucene.analysis.LowerCaseTokenizer;
+import org.apache.lucene.util.Version;
+
+public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
+  public void testReusableTokenStream() throws Exception {
+    Analyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
+    checkOneTermReuse(a, "Tisch", "tisch");
+    checkOneTermReuse(a, "Tische", "tisch");
+    checkOneTermReuse(a, "Tischen", "tisch");
+  }
+  
+  public void testExclusionTableBWCompat() throws IOException {
+    GermanStemFilter filter = new GermanStemFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, 
+        new StringReader("Fischen Trinken")));
+    CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
+    set.add("fischen");
+    filter.setExclusionSet(set);
+    assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
+  }
+
+  public void testWithKeywordAttribute() throws IOException {
+    CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
+    set.add("fischen");
+    GermanStemFilter filter = new GermanStemFilter(
+        new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader( 
+            "Fischen Trinken")), set));
+    assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
+  }
+
+  public void testWithKeywordAttributeAndExclusionTable() throws IOException {
+    CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
+    set.add("fischen");
+    CharArraySet set1 = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
+    set1.add("trinken");
+    set1.add("fischen");
+    GermanStemFilter filter = new GermanStemFilter(
+        new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader(
+            "Fischen Trinken")), set));
+    filter.setExclusionSet(set1);
+    assertTokenStreamContents(filter, new String[] { "fischen", "trinken" });
+  }
+  
+  /* 
+   * Test that changes to the exclusion table are applied immediately
+   * when using reusable token streams.
+   */
+  public void testExclusionTableReuse() throws Exception {
+    GermanAnalyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
+    checkOneTermReuse(a, "tischen", "tisch");
+    a.setStemExclusionTable(new String[] { "tischen" });
+    checkOneTermReuse(a, "tischen", "tischen");
+  }
+  
+  /** test some features of the new snowball filter
+   * these only pass with LUCENE_CURRENT, not if you use o.a.l.a.de.GermanStemmer
+   */
+  public void testGermanSpecials() throws Exception {
+    GermanAnalyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
+    // a/o/u + e is equivalent to the umlaut form
+    checkOneTermReuse(a, "Schaltflächen", "schaltflach");
+    checkOneTermReuse(a, "Schaltflaechen", "schaltflach");
+    // here they are with the old stemmer
+    a = new GermanAnalyzer(Version.LUCENE_30);
+    checkOneTermReuse(a, "Schaltflächen", "schaltflach");
+    checkOneTermReuse(a, "Schaltflaechen", "schaltflaech");
+  }
+}
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
@ -20,15 +20,14 @@ package org.apache.lucene.analysis.de;
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
-import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.StringReader;

 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.CharArraySet;
-import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
-import org.apache.lucene.analysis.LowerCaseTokenizer;
+import org.apache.lucene.analysis.KeywordTokenizer;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.util.Version;

 /**
@ -40,6 +39,8 @@ import org.apache.lucene.util.Version;
 public class TestGermanStemFilter extends BaseTokenStreamTestCase {

  public void testStemming() throws Exception {
+    Tokenizer tokenizer = new KeywordTokenizer(new StringReader(""));
+    TokenFilter filter = new GermanStemFilter(new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer));
    // read test cases from external file:
    File dataDir = new File(System.getProperty("dataDir", "./bin"));
    File testFile = new File(dataDir, "org/apache/lucene/analysis/de/data.txt");
@ -55,68 +56,12 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase {
        continue;    // ignore comments and empty lines
      String[] parts = line.split(";");
      //System.out.println(parts[0] + " -- " + parts[1]);
-      check(parts[0], parts[1]);
+      tokenizer.reset(new StringReader(parts[0]));
+      filter.reset();
+      assertTokenStreamContents(filter, new String[] { parts[1] });
    }
    breader.close();
    isr.close();
    fis.close();
  }
-  
-  public void testReusableTokenStream() throws Exception {
-    Analyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
-    checkReuse(a, "Tisch", "tisch");
-    checkReuse(a, "Tische", "tisch");
-    checkReuse(a, "Tischen", "tisch");
-  }
-  
-  public void testExclusionTableBWCompat() throws IOException {
-    GermanStemFilter filter = new GermanStemFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, 
-        new StringReader("Fischen Trinken")));
-    CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
-    set.add("fischen");
-    filter.setExclusionSet(set);
-    assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
-  }
-
-  public void testWithKeywordAttribute() throws IOException {
-    CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
-    set.add("fischen");
-    GermanStemFilter filter = new GermanStemFilter(
-        new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader( 
-            "Fischen Trinken")), set));
-    assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
-  }
-
-  public void testWithKeywordAttributeAndExclusionTable() throws IOException {
-    CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
-    set.add("fischen");
-    CharArraySet set1 = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
-    set1.add("trinken");
-    set1.add("fischen");
-    GermanStemFilter filter = new GermanStemFilter(
-        new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader(
-            "Fischen Trinken")), set));
-    filter.setExclusionSet(set1);
-    assertTokenStreamContents(filter, new String[] { "fischen", "trinken" });
-  }
-  
-  /* 
-   * Test that changes to the exclusion table are applied immediately
-   * when using reusable token streams.
-   */
-  public void testExclusionTableReuse() throws Exception {
-    GermanAnalyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
-    checkReuse(a, "tischen", "tisch");
-    a.setStemExclusionTable(new String[] { "tischen" });
-    checkReuse(a, "tischen", "tischen");
-  }
-  
-  
-  private void check(final String input, final String expected) throws Exception {
-    checkOneTerm(new GermanAnalyzer(Version.LUCENE_CURRENT), input, expected);
-  }
-  
-  private void checkReuse(Analyzer a, String input, String expected) throws Exception {
-    checkOneTermReuse(a, input, expected);
-  }
 }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.el;

 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.util.Version;

 /**
@ -63,4 +62,23 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase {
 	    assertAnalyzesToReuse(a, "\u03a0\u03a1\u039f\u03ab\u03a0\u039f\u0398\u0395\u03a3\u0395\u0399\u03a3  \u0386\u03c8\u03bf\u03b3\u03bf\u03c2, \u03bf \u03bc\u03b5\u03c3\u03c4\u03cc\u03c2 \u03ba\u03b1\u03b9 \u03bf\u03b9 \u03ac\u03bb\u03bb\u03bf\u03b9",
 	            new String[] { "\u03c0\u03c1\u03bf\u03c5\u03c0\u03bf\u03b8\u03b5\u03c3\u03b5\u03b9\u03c3", "\u03b1\u03c8\u03bf\u03b3\u03bf\u03c3", "\u03bc\u03b5\u03c3\u03c4\u03bf\u03c3", "\u03b1\u03bb\u03bb\u03bf\u03b9" });
 	}
+	
+	/**
+	 * Greek Analyzer didn't call standardFilter, so no normalization of acronyms.
+	 * check that this is preserved.
+	 * @deprecated remove this test in Lucene 4.0
+	 */
+	@Deprecated
+	public void testAcronymBWCompat() throws Exception {
+	  Analyzer a = new GreekAnalyzer(Version.LUCENE_30);
+	  assertAnalyzesTo(a, "Α.Π.Τ.", new String[] { "α.π.τ." });
+	}
+	
+  /**
+   * test that acronym normalization works
+   */
+  public void testAcronym() throws Exception {
+    Analyzer a = new GreekAnalyzer(Version.LUCENE_31);
+    assertAnalyzesTo(a, "Α.Π.Τ.", new String[] { "απτ" });
+  }
 }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.en;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
+  /** This test fails with NPE when the 
+   * stopwords file is missing in classpath */
+  public void testResourcesAvailable() {
+    new EnglishAnalyzer(Version.LUCENE_CURRENT);
+  }
+  
+  /** test stopwords and stemming */
+  public void testBasics() throws IOException {
+    Analyzer a = new EnglishAnalyzer(Version.LUCENE_CURRENT);
+    // stemming
+    checkOneTermReuse(a, "books", "book");
+    checkOneTermReuse(a, "book", "book");
+    // stopword
+    assertAnalyzesTo(a, "the", new String[] {});
+  }
+  
+  /** test use of exclusion set */
+  public void testExclude() throws IOException {
+    Set<String> exclusionSet = new HashSet<String>();
+    exclusionSet.add("books");
+    Analyzer a = new EnglishAnalyzer(Version.LUCENE_CURRENT, 
+        EnglishAnalyzer.getDefaultStopSet(), exclusionSet);
+    checkOneTermReuse(a, "books", "books");
+    checkOneTermReuse(a, "book", "book");
+  }
+}
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java
@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.es;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
+  /** This test fails with NPE when the 
+   * stopwords file is missing in classpath */
+  public void testResourcesAvailable() {
+    new SpanishAnalyzer(Version.LUCENE_CURRENT);
+  }
+  
+  /** test stopwords and stemming */
+  public void testBasics() throws IOException {
+    Analyzer a = new SpanishAnalyzer(Version.LUCENE_CURRENT);
+    // stemming
+    checkOneTermReuse(a, "chicana", "chican");
+    checkOneTermReuse(a, "chicano", "chican");
+    // stopword
+    assertAnalyzesTo(a, "los", new String[] {});
+  }
+  
+  /** test use of exclusion set */
+  public void testExclude() throws IOException {
+    Set<String> exclusionSet = new HashSet<String>();
+    exclusionSet.add("chicano");
+    Analyzer a = new SpanishAnalyzer(Version.LUCENE_CURRENT, 
+        SpanishAnalyzer.getDefaultStopSet(), exclusionSet);
+    checkOneTermReuse(a, "chicana", "chican");
+    checkOneTermReuse(a, "chicano", "chicano");
+  }
+}
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java
@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.fi;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
+  /** This test fails with NPE when the 
+   * stopwords file is missing in classpath */
+  public void testResourcesAvailable() {
+    new FinnishAnalyzer(Version.LUCENE_CURRENT);
+  }
+  
+  /** test stopwords and stemming */
+  public void testBasics() throws IOException {
+    Analyzer a = new FinnishAnalyzer(Version.LUCENE_CURRENT);
+    // stemming
+    checkOneTermReuse(a, "edeltäjiinsä", "edeltäj");
+    checkOneTermReuse(a, "edeltäjistään", "edeltäj");
+    // stopword
+    assertAnalyzesTo(a, "olla", new String[] {});
+  }
+  
+  /** test use of exclusion set */
+  public void testExclude() throws IOException {
+    Set<String> exclusionSet = new HashSet<String>();
+    exclusionSet.add("edeltäjistään");
+    Analyzer a = new FinnishAnalyzer(Version.LUCENE_CURRENT, 
+        FinnishAnalyzer.getDefaultStopSet(), exclusionSet);
+    checkOneTermReuse(a, "edeltäjiinsä", "edeltäj");
+    checkOneTermReuse(a, "edeltäjistään", "edeltäjistään");
+  }
+}
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.fr;
 * limitations under the License.
 */

+import java.io.IOException;
+
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.util.Version;
@ -113,6 +115,94 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {

 	}
 	
+	/**
+	 * @deprecated remove this test for Lucene 4.0
+	 */
+	@Deprecated
+	public void testAnalyzer30() throws Exception {
+	    FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_30);
+	  
+	    assertAnalyzesTo(fa, "", new String[] {
+	    });
+
+	    assertAnalyzesTo(
+	      fa,
+	      "chien chat cheval",
+	      new String[] { "chien", "chat", "cheval" });
+
+	    assertAnalyzesTo(
+	      fa,
+	      "chien CHAT CHEVAL",
+	      new String[] { "chien", "chat", "cheval" });
+
+	    assertAnalyzesTo(
+	      fa,
+	      "  chien  ,? + = -  CHAT /: > CHEVAL",
+	      new String[] { "chien", "chat", "cheval" });
+
+	    assertAnalyzesTo(fa, "chien++", new String[] { "chien" });
+
+	    assertAnalyzesTo(
+	      fa,
+	      "mot \"entreguillemet\"",
+	      new String[] { "mot", "entreguillemet" });
+
+	    // let's do some french specific tests now  
+
+	    /* 1. couldn't resist
+	     I would expect this to stay one term as in French the minus 
+	    sign is often used for composing words */
+	    assertAnalyzesTo(
+	      fa,
+	      "Jean-François",
+	      new String[] { "jean", "françois" });
+
+	    // 2. stopwords
+	    assertAnalyzesTo(
+	      fa,
+	      "le la chien les aux chat du des à cheval",
+	      new String[] { "chien", "chat", "cheval" });
+
+	    // some nouns and adjectives
+	    assertAnalyzesTo(
+	      fa,
+	      "lances chismes habitable chiste éléments captifs",
+	      new String[] {
+	        "lanc",
+	        "chism",
+	        "habit",
+	        "chist",
+	        "élément",
+	        "captif" });
+
+	    // some verbs
+	    assertAnalyzesTo(
+	      fa,
+	      "finissions souffrirent rugissante",
+	      new String[] { "fin", "souffr", "rug" });
+
+	    // some everything else
+	    // aujourd'hui stays one term which is OK
+	    assertAnalyzesTo(
+	      fa,
+	      "C3PO aujourd'hui oeuf ïâöûàä anticonstitutionnellement Java++ ",
+	      new String[] {
+	        "c3po",
+	        "aujourd'hui",
+	        "oeuf",
+	        "ïâöûàä",
+	        "anticonstitutionnel",
+	        "jav" });
+
+	    // some more everything else
+	    // here 1940-1945 stays as one term, 1940:1945 not ?
+	    assertAnalyzesTo(
+	      fa,
+	      "33Bis 1940-1945 1940:1945 (---i+++)*",
+	      new String[] { "33bis", "1940-1945", "1940", "1945", "i" });
+
+	  }
+	
 	public void testReusableTokenStream() throws Exception {
 	  FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
 	  // stopwords
@ -157,4 +247,28 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
    assertAnalyzesTo(fa, "habitable chiste", new String[] { "habitable",
        "chist" });
  }
+  
+  public void testElision() throws Exception {
+    FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
+    assertAnalyzesTo(fa, "voir l'embrouille", new String[] { "voir", "embrouill" });
+  }
+  
+  /**
+   * Prior to 3.1, this analyzer had no lowercase filter.
+   * stopwords were case sensitive. Preserve this for back compat.
+   * @deprecated Remove this test in Lucene 4.0
+   */
+  @Deprecated
+  public void testBuggyStopwordsCasing() throws IOException {
+    FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_30);
+    assertAnalyzesTo(a, "Votre", new String[] { "votr" });
+  }
+  
+  /**
+   * Test that stopwords are not case sensitive
+   */
+  public void testStopwordsCasing() throws IOException {
+    FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_31);
+    assertAnalyzesTo(a, "Votre", new String[] { });
+  }
 }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.hu;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
+  /** This test fails with NPE when the 
+   * stopwords file is missing in classpath */
+  public void testResourcesAvailable() {
+    new HungarianAnalyzer(Version.LUCENE_CURRENT);
+  }
+  
+  /** test stopwords and stemming */
+  public void testBasics() throws IOException {
+    Analyzer a = new HungarianAnalyzer(Version.LUCENE_CURRENT);
+    // stemming
+    checkOneTermReuse(a, "babakocsi", "babakocs");
+    checkOneTermReuse(a, "babakocsijáért", "babakocs");
+    // stopword
+    assertAnalyzesTo(a, "által", new String[] {});
+  }
+  
+  /** test use of exclusion set */
+  public void testExclude() throws IOException {
+    Set<String> exclusionSet = new HashSet<String>();
+    exclusionSet.add("babakocsi");
+    Analyzer a = new HungarianAnalyzer(Version.LUCENE_CURRENT, 
+        HungarianAnalyzer.getDefaultStopSet(), exclusionSet);
+    checkOneTermReuse(a, "babakocsi", "babakocsi");
+    checkOneTermReuse(a, "babakocsijáért", "babakocs");
+  }
+}
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.it;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
+  /** This test fails with NPE when the 
+   * stopwords file is missing in classpath */
+  public void testResourcesAvailable() {
+    new ItalianAnalyzer(Version.LUCENE_CURRENT);
+  }
+  
+  /** test stopwords and stemming */
+  public void testBasics() throws IOException {
+    Analyzer a = new ItalianAnalyzer(Version.LUCENE_CURRENT);
+    // stemming
+    checkOneTermReuse(a, "abbandonata", "abbandon");
+    checkOneTermReuse(a, "abbandonati", "abbandon");
+    // stopword
+    assertAnalyzesTo(a, "dallo", new String[] {});
+  }
+  
+  /** test use of exclusion set */
+  public void testExclude() throws IOException {
+    Set<String> exclusionSet = new HashSet<String>();
+    exclusionSet.add("abbandonata");
+    Analyzer a = new ItalianAnalyzer(Version.LUCENE_CURRENT, 
+        ItalianAnalyzer.getDefaultStopSet(), exclusionSet);
+    checkOneTermReuse(a, "abbandonata", "abbandonata");
+    checkOneTermReuse(a, "abbandonati", "abbandon");
+  }
+}
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
@ -0,0 +1,44 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.KeywordTokenizer;
+import org.apache.lucene.analysis.PorterStemFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.util.Version;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase {
+  public void testOverride() throws IOException {
+    // lets make booked stem to books
+    // the override filter will convert "booked" to "books",
+    // but also mark it with KeywordAttribute so Porter will not change it.
+    Map<String,String> dictionary = new HashMap<String,String>();
+    dictionary.put("booked", "books");
+    Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked"));
+    TokenStream stream = new PorterStemFilter(
+        new StemmerOverrideFilter(Version.LUCENE_CURRENT, tokenizer, dictionary));
+    assertTokenStreamContents(stream, new String[] { "books" });
+  }
+}
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
@ -22,7 +22,6 @@ import org.apache.lucene.analysis.WhitespaceTokenizer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.util.Version;

-import java.io.IOException;
 import java.io.StringReader;

 /**
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java
@ -18,10 +18,8 @@ package org.apache.lucene.analysis.ngram;
 */


-import java.io.IOException;
 import java.io.StringReader;

-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;

 /**
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
@ -100,9 +100,6 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
 	 check("ophalend", "ophal");
 	 check("ophalers", "ophaler");
 	 check("ophef", "ophef");
-	 check("opheffen", "ophef"); // versus snowball 'opheff'
-	 check("opheffende", "ophef"); // versus snowball 'opheff'
-	 check("opheffing", "ophef"); // versus snowball 'opheff'
 	 check("opheldering", "ophelder");
 	 check("ophemelde", "ophemeld");
 	 check("ophemelen", "ophemel");
@ -118,6 +115,24 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
 	 check("ophouden", "ophoud");
  }
  
+  /**
+   * @deprecated remove this test in Lucene 4.0
+   */
+  @Deprecated
+  public void testOldBuggyStemmer() throws Exception {
+    Analyzer a = new DutchAnalyzer(Version.LUCENE_30);
+    checkOneTermReuse(a, "opheffen", "ophef"); // versus snowball 'opheff'
+    checkOneTermReuse(a, "opheffende", "ophef"); // versus snowball 'opheff'
+    checkOneTermReuse(a, "opheffing", "ophef"); // versus snowball 'opheff'
+  }
+  
+  public void testSnowballCorrectness() throws Exception {
+    Analyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
+    checkOneTermReuse(a, "opheffen", "opheff");
+    checkOneTermReuse(a, "opheffende", "opheff");
+    checkOneTermReuse(a, "opheffing", "opheff");
+  }
+  
  public void testReusableTokenStream() throws Exception {
    Analyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT); 
    checkOneTermReuse(a, "lichaamsziek", "lichaamsziek");
@ -161,6 +176,25 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
    checkOneTermReuse(a, "lichamelijk", "somethingentirelydifferent");
  }
  
+  /**
+   * Prior to 3.1, this analyzer had no lowercase filter.
+   * stopwords were case sensitive. Preserve this for back compat.
+   * @deprecated Remove this test in Lucene 4.0
+   */
+  @Deprecated
+  public void testBuggyStopwordsCasing() throws IOException {
+    DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_30);
+    assertAnalyzesTo(a, "Zelf", new String[] { "zelf" });
+  }
+  
+  /**
+   * Test that stopwords are not case sensitive
+   */
+  public void testStopwordsCasing() throws IOException {
+    DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_31);
+    assertAnalyzesTo(a, "Zelf", new String[] { });
+  }
+  
  private void check(final String input, final String expected) throws Exception {
    checkOneTerm(new DutchAnalyzer(Version.LUCENE_CURRENT), input, expected); 
  }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.no;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
+  /** This test fails with NPE when the 
+   * stopwords file is missing in classpath */
+  public void testResourcesAvailable() {
+    new NorwegianAnalyzer(Version.LUCENE_CURRENT);
+  }
+  
+  /** test stopwords and stemming */
+  public void testBasics() throws IOException {
+    Analyzer a = new NorwegianAnalyzer(Version.LUCENE_CURRENT);
+    // stemming
+    checkOneTermReuse(a, "havnedistriktene", "havnedistrikt");
+    checkOneTermReuse(a, "havnedistrikter", "havnedistrikt");
+    // stopword
+    assertAnalyzesTo(a, "det", new String[] {});
+  }
+  
+  /** test use of exclusion set */
+  public void testExclude() throws IOException {
+    Set<String> exclusionSet = new HashSet<String>();
+    exclusionSet.add("havnedistriktene");
+    Analyzer a = new NorwegianAnalyzer(Version.LUCENE_CURRENT, 
+        NorwegianAnalyzer.getDefaultStopSet(), exclusionSet);
+    checkOneTermReuse(a, "havnedistriktene", "havnedistriktene");
+    checkOneTermReuse(a, "havnedistrikter", "havnedistrikt");
+  }
+}
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.pt;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase {
+  /** This test fails with NPE when the 
+   * stopwords file is missing in classpath */
+  public void testResourcesAvailable() {
+    new PortugueseAnalyzer(Version.LUCENE_CURRENT);
+  }
+  
+  /** test stopwords and stemming */
+  public void testBasics() throws IOException {
+    Analyzer a = new PortugueseAnalyzer(Version.LUCENE_CURRENT);
+    // stemming
+    checkOneTermReuse(a, "quilométricas", "quilométr");
+    checkOneTermReuse(a, "quilométricos", "quilométr");
+    // stopword
+    assertAnalyzesTo(a, "não", new String[] {});
+  }
+  
+  /** test use of exclusion set */
+  public void testExclude() throws IOException {
+    Set<String> exclusionSet = new HashSet<String>();
+    exclusionSet.add("quilométricas");
+    Analyzer a = new PortugueseAnalyzer(Version.LUCENE_CURRENT, 
+        PortugueseAnalyzer.getDefaultStopSet(), exclusionSet);
+    checkOneTermReuse(a, "quilométricas", "quilométricas");
+    checkOneTermReuse(a, "quilométricos", "quilométr");
+  }
+}
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.ro;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestRomanianAnalyzer extends BaseTokenStreamTestCase {
+  /** This test fails with NPE when the 
+   * stopwords file is missing in classpath */
+  public void testResourcesAvailable() {
+    new RomanianAnalyzer(Version.LUCENE_CURRENT);
+  }
+  
+  /** test stopwords and stemming */
+  public void testBasics() throws IOException {
+    Analyzer a = new RomanianAnalyzer(Version.LUCENE_CURRENT);
+    // stemming
+    checkOneTermReuse(a, "absenţa", "absenţ");
+    checkOneTermReuse(a, "absenţi", "absenţ");
+    // stopword
+    assertAnalyzesTo(a, "îl", new String[] {});
+  }
+  
+  /** test use of exclusion set */
+  public void testExclude() throws IOException {
+    Set<String> exclusionSet = new HashSet<String>();
+    exclusionSet.add("absenţa");
+    Analyzer a = new RomanianAnalyzer(Version.LUCENE_CURRENT, 
+        RomanianAnalyzer.getDefaultStopSet(), exclusionSet);
+    checkOneTermReuse(a, "absenţa", "absenţa");
+    checkOneTermReuse(a, "absenţi", "absenţ");
+  }
+}
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
@ -50,9 +50,14 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
      dataDir = new File(System.getProperty("dataDir", "./bin"));
    }

-    public void testUnicode() throws IOException
+    /**
+     * @deprecated remove this test and its datafiles in Lucene 4.0
+     * the Snowball version has its own data tests.
+     */
+    @Deprecated
+    public void testUnicode30() throws IOException
    {
-        RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_CURRENT);
+        RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_30);
        inWords =
            new InputStreamReader(
                new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/testUTF8.txt")),
@ -110,12 +115,22 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
        }
    }
    
+    /** @deprecated remove this test in Lucene 4.0: stopwords changed */
+    @Deprecated
+    public void testReusableTokenStream30() throws Exception {
+      Analyzer a = new RussianAnalyzer(Version.LUCENE_30);
+      assertAnalyzesToReuse(a, "Вместе с тем о силе электромагнитной энергии имели представление еще",
+          new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представлен" });
+      assertAnalyzesToReuse(a, "Но знание это хранилось в тайне",
+          new String[] { "знан", "хран", "тайн" });
+    }
+    
    public void testReusableTokenStream() throws Exception {
      Analyzer a = new RussianAnalyzer(Version.LUCENE_CURRENT);
      assertAnalyzesToReuse(a, "Вместе с тем о силе электромагнитной энергии имели представление еще",
          new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представлен" });
      assertAnalyzesToReuse(a, "Но знание это хранилось в тайне",
-          new String[] { "знан", "хран", "тайн" });
+          new String[] { "знан", "эт", "хран", "тайн" });
    }
    
    
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java
@ -25,7 +25,9 @@ import org.apache.lucene.util.Version;

 /**
 * Testcase for {@link RussianLetterTokenizer}
+ * @deprecated Remove this test class in Lucene 4.0
 */
+@Deprecated
 public class TestRussianLetterTokenizer extends BaseTokenStreamTestCase {
  
  public void testRussianLetterTokenizer() throws IOException {
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java
@ -24,6 +24,10 @@ import java.io.InputStreamReader;
 import java.io.FileInputStream;
 import java.util.ArrayList;

+/**
+ * @deprecated Remove this test class (and its datafiles!) in Lucene 4.0
+ */
+@Deprecated
 public class TestRussianStem extends LuceneTestCase
 {
    private ArrayList words = new ArrayList();
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
@ -22,11 +22,8 @@ import java.io.StringReader;
 import java.util.Collection;
 import java.util.Iterator;
 import java.util.LinkedList;
-import java.util.HashSet;
-import java.util.Arrays;

 import org.apache.lucene.analysis.*;
-import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream;
 import org.apache.lucene.analysis.miscellaneous.PrefixAndSuffixAwareTokenFilter;
 import org.apache.lucene.analysis.miscellaneous.SingleTokenTokenStream;
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.sv;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
+  /** This test fails with NPE when the 
+   * stopwords file is missing in classpath */
+  public void testResourcesAvailable() {
+    new SwedishAnalyzer(Version.LUCENE_CURRENT);
+  }
+  
+  /** test stopwords and stemming */
+  public void testBasics() throws IOException {
+    Analyzer a = new SwedishAnalyzer(Version.LUCENE_CURRENT);
+    // stemming
+    checkOneTermReuse(a, "jaktkarlarne", "jaktkarl");
+    checkOneTermReuse(a, "jaktkarlens", "jaktkarl");
+    // stopword
+    assertAnalyzesTo(a, "och", new String[] {});
+  }
+  
+  /** test use of exclusion set */
+  public void testExclude() throws IOException {
+    Set<String> exclusionSet = new HashSet<String>();
+    exclusionSet.add("jaktkarlarne");
+    Analyzer a = new SwedishAnalyzer(Version.LUCENE_CURRENT, 
+        SwedishAnalyzer.getDefaultStopSet(), exclusionSet);
+    checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne");
+    checkOneTermReuse(a, "jaktkarlens", "jaktkarl");
+  }
+}
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.tr;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
+
+public class TestTurkishAnalyzer extends BaseTokenStreamTestCase {
+  /** This test fails with NPE when the 
+   * stopwords file is missing in classpath */
+  public void testResourcesAvailable() {
+    new TurkishAnalyzer(Version.LUCENE_CURRENT);
+  }
+  
+  /** test stopwords and stemming */
+  public void testBasics() throws IOException {
+    Analyzer a = new TurkishAnalyzer(Version.LUCENE_CURRENT);
+    // stemming
+    checkOneTermReuse(a, "ağacı", "ağaç");
+    checkOneTermReuse(a, "ağaç", "ağaç");
+    // stopword
+    assertAnalyzesTo(a, "dolayı", new String[] {});
+  }
+  
+  /** test use of exclusion set */
+  public void testExclude() throws IOException {
+    Set<String> exclusionSet = new HashSet<String>();
+    exclusionSet.add("ağacı");
+    Analyzer a = new TurkishAnalyzer(Version.LUCENE_CURRENT, 
+        TurkishAnalyzer.getDefaultStopSet(), exclusionSet);
+    checkOneTermReuse(a, "ağacı", "ağacı");
+    checkOneTermReuse(a, "ağaç", "ağaç");
+  }
+}