LUCENE-2055: better snowball integration, deprecate buggy handcoded snowball impls, restructure lang support

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@907125 13f79535-47bb-0310-9956-ffa450edef68
2010-02-05 23:05:46 +00:00 · 2010-02-05 23:05:46 +00:00 · a6b7c5552b
parent 57d1387492
commit a6b7c5552b
75 changed files with 3375 additions and 180 deletions
--- a/NOTICE.txt
+++ b/NOTICE.txt
@ -23,6 +23,11 @@ stopword list that is BSD-licensed created by Jacques Savoy.  The file resides i
 contrib/analyzers/common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt.
 See http://members.unine.ch/jacques.savoy/clef/index.html.
 The Romanian analyzer (contrib/analyzers) comes with a default
 stopword list that is BSD-licensed created by Jacques Savoy.  The file resides in
 contrib/analyzers/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt.
 See http://members.unine.ch/jacques.savoy/clef/index.html.
 The Bulgarian analyzer (contrib/analyzers) comes with a default
 stopword list that is BSD-licensed created by Jacques Savoy.  The file resides in
 contrib/analyzers/common/src/resources/org/apache/lucene/analysis/bg/stopwords.txt.
--- a/contrib/CHANGES.txt
+++ b/contrib/CHANGES.txt
@ -27,6 +27,10 @@ Changes in runtime behavior
   used with Version > 3.0 and the TurkishStemmer.
   (Robert Muir via Simon Willnauer)  
 * LUCENE-2055: GermanAnalyzer now uses the Snowball German2 algorithm and 
   stopwords list by default for Version > 3.0.
   (Robert Muir, Uwe Schindler, Simon Willnauer)
 Bug fixes
 * LUCENE-2199: ShingleFilter skipped over tri-gram shingles if outputUnigram
@ -54,6 +58,13 @@ Bug fixes
   CJKTokenizer, ChineseTokenizer, SmartChinese SentenceTokenizer, 
   and WikipediaTokenizer.  (Koji Sekiguchi, Robert Muir)
 * LUCENE-2055: Deprecated RussianTokenizer, RussianStemmer, RussianStemFilter,
   FrenchStemmer, FrenchStemFilter, DutchStemmer, and DutchStemFilter. For
   these Analyzers, SnowballFilter is used instead (for Version > 3.0), as
   the previous code did not always implement the Snowball algorithm correctly.
   Additionally, for Version > 3.0, the Snowball stopword lists are used by
   default.  (Robert Muir, Uwe Schindler, Simon Willnauer)
 API Changes
 * LUCENE-2108: Add SpellChecker.close, to close the underlying
@ -69,6 +80,12 @@ API Changes
 * LUCENE-2204: Change some package private classes/members to publicly accessible to implement
   custom FragmentsBuilders. (Koji Sekiguchi)
 * LUCENE-2055: Integrate snowball into contrib/analyzers. SnowballAnalyzer is
   now deprecated in favor of language-specific analyzers which contain things
   such as stopword lists and any language-specific processing in addition to
   stemming. Add Turkish and Romanian stopwords lists to support this.
   (Robert Muir, Uwe Schindler, Simon Willnauer)
 New features
 * LUCENE-2102: Add a Turkish LowerCase Filter. TurkishLowerCaseFilter handles
@ -105,6 +122,10 @@ New features
 * LUCENE-2234: Add a Hindi analyzer.  (Robert Muir)
 * LUCENE-2055: Add analyzers/misc/StemmerOverrideFilter. This filter provides
   the ability to override any stemmer with a custom dictionary map.
   (Robert Muir, Uwe Schindler, Simon Willnauer)
 Build
 * LUCENE-2124: Moved the JDK-based collation support from contrib/collation 
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
@ -0,0 +1,129 @@
 package org.apache.lucene.analysis.da;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.io.Reader;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.DanishStemmer;
 /**
 * {@link Analyzer} for Danish.
 */
 public final class DanishAnalyzer extends StopwordAnalyzerBase {
  private final Set<?> stemExclusionSet;
  /** File containing default Danish stopwords. */
  public final static String DEFAULT_STOPWORD_FILE = "danish_stop.txt";
  /**
   * Returns an unmodifiable instance of the default stop words set.
   * @return default stop words set.
   */
  public static Set<?> getDefaultStopSet(){
    return DefaultSetHolder.DEFAULT_STOP_SET;
  }
  /**
   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
   * accesses the static final set the first time.;
   */
  private static class DefaultSetHolder {
    static final Set<?> DEFAULT_STOP_SET;
    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
            DEFAULT_STOPWORD_FILE);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
        throw new RuntimeException("Unable to load default stopword set");
      }
    }
  }
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
  public DanishAnalyzer(Version matchVersion) {
    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  /**
   * Builds an analyzer with the given stop words.
   * 
   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
  public DanishAnalyzer(Version matchVersion, Set<?> stopwords) {
    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }
  /**
   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
   * stemming.
   * 
   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
  public DanishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
    super(matchVersion, stopwords);
    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
        matchVersion, stemExclusionSet));
  }
  /**
   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
   * {@link Reader}.
   * 
   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
   *         exclusion set is provided and {@link SnowballFilter}.
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new DanishStemmer());
    return new TokenStreamComponents(source, result);
  }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/da/package.html
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/da/package.html
@ -0,0 +1,22 @@
 <!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 <!--
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
 <html><head></head>
 <body>
 Analyzer for Danish.
 </body>
 </html>
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
@ -36,10 +36,12 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.German2Stemmer;
 /**
 * {@link Analyzer} for German language. 
@ -51,6 +53,16 @@ import org.apache.lucene.util.Version;
 * exclusion list is empty by default.
 * </p>
 * 
 * <a name="version"/>
 * <p>You must specify the required {@link Version}
 * compatibility when creating GermanAnalyzer:
 * <ul>
 *   <li> As of 3.1, Snowball stemming is done with SnowballFilter, and 
 *        Snowball stopwords are used by default.
 *   <li> As of 2.9, StopFilter preserves position
 *        increments
 * </ul>
 * 
 * <p><b>NOTE</b>: This class uses the same {@link Version}
 * dependent settings as {@link StandardAnalyzer}.</p>
 */
@ -60,7 +72,7 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
   * List of typical german stopwords.
   * @deprecated use {@link #getDefaultStopSet()} instead
   */
-  //TODO make this private in 3.1
+  //TODO make this private in 3.1, remove in 4.0
  @Deprecated
  public final static String[] GERMAN_STOP_WORDS = {
    "einer", "eine", "eines", "einem", "einen",
@ -77,6 +89,9 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
    "durch", "wegen", "wird"
  };
  /** File containing default German stopwords. */
  public final static String DEFAULT_STOPWORD_FILE = "german_stop.txt";
  /**
   * Returns a set of default German-stopwords 
   * @return a set of default German-stopwords 
@ -86,8 +101,21 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
  }
  private static class DefaultSetHolder {
-    private static final Set<?> DEFAULT_SET = CharArraySet.unmodifiableSet(new CharArraySet(
+    /** @deprecated remove in Lucene 4.0 */
    @Deprecated
    private static final Set<?> DEFAULT_SET_30 = CharArraySet.unmodifiableSet(new CharArraySet(
        Version.LUCENE_CURRENT, Arrays.asList(GERMAN_STOP_WORDS), false));
    private static final Set<?> DEFAULT_SET;
    static {
      try {
        DEFAULT_SET = 
          WordlistLoader.getSnowballWordSet(SnowballFilter.class, DEFAULT_STOPWORD_FILE);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
        throw new RuntimeException("Unable to load default stopword set");
      }
    }
  }
  /**
@ -105,7 +133,9 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
   * {@link #getDefaultStopSet()}.
   */
  public GermanAnalyzer(Version matchVersion) {
-    this(matchVersion, DefaultSetHolder.DEFAULT_SET);
+    this(matchVersion,
        matchVersion.onOrAfter(Version.LUCENE_31) ? DefaultSetHolder.DEFAULT_SET
            : DefaultSetHolder.DEFAULT_SET_30);
  }
  /**
@ -199,8 +229,9 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
   * 
   * @return {@link TokenStreamComponents} built from a
   *         {@link StandardTokenizer} filtered with {@link StandardFilter},
-   *         {@link LowerCaseFilter}, {@link StopFilter}, and
+   *         {@link LowerCaseFilter}, {@link StopFilter}, 
-   *         {@link GermanStemFilter}
+   *         {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided, and
   *         {@link SnowballFilter}
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
@ -210,6 +241,10 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter( matchVersion, result, stopwords);
    result = new KeywordMarkerTokenFilter(result, exclusionSet);
-    return new TokenStreamComponents(source, new GermanStemFilter(result));
+    if (matchVersion.onOrAfter(Version.LUCENE_31))
      result = new SnowballFilter(result, new German2Stemmer());
    else
      result = new GermanStemFilter(result);
    return new TokenStreamComponents(source, result);
  }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
@ -24,6 +24,7 @@ import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
 import org.apache.lucene.util.Version;
@ -41,6 +42,15 @@ import java.util.Set;
 * A default set of stopwords is used unless an alternative list is specified.
 * </p>
 *
 * <a name="version"/>
 * <p>You must specify the required {@link Version}
 * compatibility when creating GreekAnalyzer:
 * <ul>
 *   <li> As of 3.1, StandardFilter is used by default.
 *   <li> As of 2.9, StopFilter preserves position
 *        increments
 * </ul>
 * 
 * <p><b>NOTE</b>: This class uses the same {@link Version}
 * dependent settings as {@link StandardAnalyzer}.</p>
 */
@ -117,13 +127,15 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase
    * 
    * @return {@link TokenStreamComponents} built from a
    *         {@link StandardTokenizer} filtered with
-    *         {@link GreekLowerCaseFilter} and {@link StopFilter}
+    *         {@link GreekLowerCaseFilter}, {@link StandardFilter} and {@link StopFilter}
    */
    @Override
    protected TokenStreamComponents createComponents(String fieldName,
        Reader reader) {
      final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-      final TokenStream result = new GreekLowerCaseFilter(source);
+      TokenStream result = new GreekLowerCaseFilter(source);
      if (matchVersion.onOrAfter(Version.LUCENE_31))
        result = new StandardFilter(result);
      return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords));
    }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
@ -0,0 +1,113 @@
 package org.apache.lucene.analysis.en;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.Reader;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.PorterStemFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.util.Version;
 /**
 * {@link Analyzer} for English.
 */
 public final class EnglishAnalyzer extends StopwordAnalyzerBase {
  private final Set<?> stemExclusionSet;
  /**
   * Returns an unmodifiable instance of the default stop words set.
   * @return default stop words set.
   */
  public static Set<?> getDefaultStopSet(){
    return DefaultSetHolder.DEFAULT_STOP_SET;
  }
  /**
   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
   * accesses the static final set the first time.;
   */
  private static class DefaultSetHolder {
    static final Set<?> DEFAULT_STOP_SET = StandardAnalyzer.STOP_WORDS_SET;
  }
  /**
   * Builds an analyzer with the default stop words: {@link #getDefaultStopSet}.
   */
  public EnglishAnalyzer(Version matchVersion) {
    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  /**
   * Builds an analyzer with the given stop words.
   * 
   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
  public EnglishAnalyzer(Version matchVersion, Set<?> stopwords) {
    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }
  /**
   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
   * stemming.
   * 
   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
  public EnglishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
    super(matchVersion, stopwords);
    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
        matchVersion, stemExclusionSet));
  }
  /**
   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
   * {@link Reader}.
   * 
   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
   *         exclusion set is provided and {@link PorterStemFilter}.
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
    result = new PorterStemFilter(result);
    return new TokenStreamComponents(source, result);
  }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/package.html
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/package.html
@ -0,0 +1,22 @@
 <!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 <!--
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
 <html><head></head>
 <body>
 Analyzer for English.
 </body>
 </html>
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
@ -0,0 +1,129 @@
 package org.apache.lucene.analysis.es;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.io.Reader;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.SpanishStemmer;
 /**
 * {@link Analyzer} for Spanish.
 */
 public final class SpanishAnalyzer extends StopwordAnalyzerBase {
  private final Set<?> stemExclusionSet;
  /** File containing default Spanish stopwords. */
  public final static String DEFAULT_STOPWORD_FILE = "spanish_stop.txt";
  /**
   * Returns an unmodifiable instance of the default stop words set.
   * @return default stop words set.
   */
  public static Set<?> getDefaultStopSet(){
    return DefaultSetHolder.DEFAULT_STOP_SET;
  }
  /**
   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
   * accesses the static final set the first time.;
   */
  private static class DefaultSetHolder {
    static final Set<?> DEFAULT_STOP_SET;
    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
            DEFAULT_STOPWORD_FILE);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
        throw new RuntimeException("Unable to load default stopword set");
      }
    }
  }
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
  public SpanishAnalyzer(Version matchVersion) {
    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  /**
   * Builds an analyzer with the given stop words.
   * 
   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
  public SpanishAnalyzer(Version matchVersion, Set<?> stopwords) {
    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }
  /**
   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
   * stemming.
   * 
   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
  public SpanishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
    super(matchVersion, stopwords);
    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
        matchVersion, stemExclusionSet));
  }
  /**
   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
   * {@link Reader}.
   * 
   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
   *         exclusion set is provided and {@link SnowballFilter}.
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new SpanishStemmer());
    return new TokenStreamComponents(source, result);
  }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/es/package.html
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/es/package.html
@ -0,0 +1,22 @@
 <!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 <!--
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
 <html><head></head>
 <body>
 Analyzer for Spanish.
 </body>
 </html>
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
@ -0,0 +1,129 @@
 package org.apache.lucene.analysis.fi;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.io.Reader;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.FinnishStemmer;
 /**
 * {@link Analyzer} for Finnish.
 */
 public final class FinnishAnalyzer extends StopwordAnalyzerBase {
  private final Set<?> stemExclusionSet;
  /** File containing default Italian stopwords. */
  public final static String DEFAULT_STOPWORD_FILE = "finnish_stop.txt";
  /**
   * Returns an unmodifiable instance of the default stop words set.
   * @return default stop words set.
   */
  public static Set<?> getDefaultStopSet(){
    return DefaultSetHolder.DEFAULT_STOP_SET;
  }
  /**
   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
   * accesses the static final set the first time.;
   */
  private static class DefaultSetHolder {
    static final Set<?> DEFAULT_STOP_SET;
    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
            DEFAULT_STOPWORD_FILE);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
        throw new RuntimeException("Unable to load default stopword set");
      }
    }
  }
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
  public FinnishAnalyzer(Version matchVersion) {
    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  /**
   * Builds an analyzer with the given stop words.
   * 
   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
  public FinnishAnalyzer(Version matchVersion, Set<?> stopwords) {
    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }
  /**
   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
   * stemming.
   * 
   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
  public FinnishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
    super(matchVersion, stopwords);
    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
        matchVersion, stemExclusionSet));
  }
  /**
   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
   * {@link Reader}.
   * 
   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
   *         exclusion set is provided and {@link SnowballFilter}.
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new FinnishStemmer());
    return new TokenStreamComponents(source, result);
  }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fi/package.html
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fi/package.html
@ -0,0 +1,22 @@
 <!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 <!--
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
 <html><head></head>
 <body>
 Analyzer for Finnish.
 </body>
 </html>
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
@ -68,7 +68,7 @@ public final class ElisionFilter extends TokenFilter {
  /**
   * Constructs an elision filter with standard stop words
   */
-  protected ElisionFilter(Version matchVersion, TokenStream input) {
+  public ElisionFilter(Version matchVersion, TokenStream input) {
    this(matchVersion, input, DEFAULT_ARTICLES);
  }
@ -77,7 +77,7 @@ public final class ElisionFilter extends TokenFilter {
   * @deprecated use {@link #ElisionFilter(Version, TokenStream)} instead
   */
  @Deprecated
-  protected ElisionFilter(TokenStream input) {
+  public ElisionFilter(TokenStream input) {
    this(Version.LUCENE_30, input);
  }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
@ -27,6 +27,7 @@ import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
@ -55,6 +56,9 @@ import java.util.Set;
 * <p>You must specify the required {@link Version}
 * compatibility when creating FrenchAnalyzer:
 * <ul>
 *   <li> As of 3.1, Snowball stemming is done with SnowballFilter, 
 *        LowerCaseFilter is used prior to StopFilter, and ElisionFilter and 
 *        Snowball stopwords are used by default.
 *   <li> As of 2.9, StopFilter preserves position
 *        increments
 * </ul>
@ -68,7 +72,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
   * Extended list of typical French stopwords.
   * @deprecated use {@link #getDefaultStopSet()} instead
   */
-  // TODO make this private in 3.1
+  // TODO make this private in 3.1, remove in 4.0
  @Deprecated
  public final static String[] FRENCH_STOP_WORDS = {
    "a", "afin", "ai", "ainsi", "après", "attendu", "au", "aujourd", "auquel", "aussi",
@ -95,6 +99,9 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
    "été", "être", "ô"
  };
  /** File containing default French stopwords. */
  public final static String DEFAULT_STOPWORD_FILE = "french_stop.txt";
  /**
   * Contains words that should be indexed but not stemmed.
   */
@ -110,16 +117,31 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
  }
  private static class DefaultSetHolder {
-    static final Set<?> DEFAULT_STOP_SET = CharArraySet
+    /** @deprecated remove this in Lucene 4.0 */
    @Deprecated
    static final Set<?> DEFAULT_STOP_SET_30 = CharArraySet
        .unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(FRENCH_STOP_WORDS),
            false));
    static final Set<?> DEFAULT_STOP_SET;
    static {
      try {
        DEFAULT_STOP_SET = 
          WordlistLoader.getSnowballWordSet(SnowballFilter.class, DEFAULT_STOPWORD_FILE);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
        throw new RuntimeException("Unable to load default stopword set");
      }
    }
  }
  /**
-   * Builds an analyzer with the default stop words ({@link #FRENCH_STOP_WORDS}).
+   * Builds an analyzer with the default stop words ({@link #getDefaultStopSet}).
   */
  public FrenchAnalyzer(Version matchVersion) {
-    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+    this(matchVersion,
        matchVersion.onOrAfter(Version.LUCENE_31) ? DefaultSetHolder.DEFAULT_STOP_SET
            : DefaultSetHolder.DEFAULT_STOP_SET_30);
  }
  /**
@ -207,12 +229,25 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
   * {@link Reader}.
   *
   * @return {@link TokenStreamComponents} built from a {@link StandardTokenizer} 
-   *         filtered with {@link StandardFilter}, {@link StopFilter}, 
+   *         filtered with {@link StandardFilter}, {@link ElisionFilter}, 
-   *         {@link FrenchStemFilter} and {@link LowerCaseFilter}
+   *         {@link LowerCaseFilter}, {@link StopFilter},
   *         {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided, 
   *         and {@link SnowballFilter}
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
      final Tokenizer source = new StandardTokenizer(matchVersion, reader);
      TokenStream result = new StandardFilter(source);
      result = new ElisionFilter(matchVersion, result);
      result = new LowerCaseFilter(matchVersion, result);
      result = new StopFilter(matchVersion, result, stopwords);
      if(!excltable.isEmpty())
        result = new KeywordMarkerTokenFilter(result, excltable);
      result = new SnowballFilter(result, new org.tartarus.snowball.ext.FrenchStemmer());
      return new TokenStreamComponents(source, result);
    } else {
      final Tokenizer source = new StandardTokenizer(matchVersion, reader);
      TokenStream result = new StandardFilter(source);
      result = new StopFilter(matchVersion, result, stopwords);
@ -222,5 +257,6 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
      // Convert to lowercase after stemming!
      return new TokenStreamComponents(source, new LowerCaseFilter(matchVersion, result));
    }
  }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.fr;
 import org.apache.lucene.analysis.KeywordMarkerTokenFilter;// for javadoc
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
@ -40,7 +41,11 @@ import java.util.Set;
 * the {@link KeywordAttribute} before this {@link TokenStream}.
 * </p>
 * @see KeywordMarkerTokenFilter
 * @deprecated Use {@link SnowballFilter} with 
 * {@link org.tartarus.snowball.ext.FrenchStemmer} instead, which has the
 * same functionality. This filter will be removed in Lucene 4.0
 */
@Deprecated
 public final class FrenchStemFilter extends TokenFilter {
 	/**
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemmer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemmer.java
@ -25,8 +25,10 @@ package org.apache.lucene.analysis.fr;
 * refer to http://snowball.sourceforge.net/french/stemmer.html<br>
 * (French stemming algorithm) for details
 * </p>
 * @deprecated Use {@link org.tartarus.snowball.ext.FrenchStemmer} instead, 
 * which has the same functionality. This filter will be removed in Lucene 4.0
 */
-
+@Deprecated
 public class FrenchStemmer {
    /**
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
@ -0,0 +1,129 @@
 package org.apache.lucene.analysis.hu;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.io.Reader;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.HungarianStemmer;
 /**
 * {@link Analyzer} for Hungarian.
 */
 public final class HungarianAnalyzer extends StopwordAnalyzerBase {
  private final Set<?> stemExclusionSet;
  /** File containing default Hungarian stopwords. */
  public final static String DEFAULT_STOPWORD_FILE = "hungarian_stop.txt";
  /**
   * Returns an unmodifiable instance of the default stop words set.
   * @return default stop words set.
   */
  public static Set<?> getDefaultStopSet(){
    return DefaultSetHolder.DEFAULT_STOP_SET;
  }
  /**
   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
   * accesses the static final set the first time.;
   */
  private static class DefaultSetHolder {
    static final Set<?> DEFAULT_STOP_SET;
    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
            DEFAULT_STOPWORD_FILE);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
        throw new RuntimeException("Unable to load default stopword set");
      }
    }
  }
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
  public HungarianAnalyzer(Version matchVersion) {
    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  /**
   * Builds an analyzer with the given stop words.
   * 
   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
  public HungarianAnalyzer(Version matchVersion, Set<?> stopwords) {
    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }
  /**
   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
   * stemming.
   * 
   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
  public HungarianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
    super(matchVersion, stopwords);
    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
        matchVersion, stemExclusionSet));
  }
  /**
   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
   * {@link Reader}.
   * 
   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
   *         exclusion set is provided and {@link SnowballFilter}.
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new HungarianStemmer());
    return new TokenStreamComponents(source, result);
  }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/hu/package.html
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/hu/package.html
@ -0,0 +1,22 @@
 <!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 <!--
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
 <html><head></head>
 <body>
 Analyzer for Hungarian.
 </body>
 </html>
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
@ -0,0 +1,129 @@
 package org.apache.lucene.analysis.it;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.io.Reader;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.ItalianStemmer;
 /**
 * {@link Analyzer} for Italian.
 */
 public final class ItalianAnalyzer extends StopwordAnalyzerBase {
  private final Set<?> stemExclusionSet;
  /** File containing default Italian stopwords. */
  public final static String DEFAULT_STOPWORD_FILE = "italian_stop.txt";
  /**
   * Returns an unmodifiable instance of the default stop words set.
   * @return default stop words set.
   */
  public static Set<?> getDefaultStopSet(){
    return DefaultSetHolder.DEFAULT_STOP_SET;
  }
  /**
   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
   * accesses the static final set the first time.;
   */
  private static class DefaultSetHolder {
    static final Set<?> DEFAULT_STOP_SET;
    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
            DEFAULT_STOPWORD_FILE);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
        throw new RuntimeException("Unable to load default stopword set");
      }
    }
  }
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
  public ItalianAnalyzer(Version matchVersion) {
    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  /**
   * Builds an analyzer with the given stop words.
   * 
   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
  public ItalianAnalyzer(Version matchVersion, Set<?> stopwords) {
    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }
  /**
   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
   * stemming.
   * 
   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
  public ItalianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
    super(matchVersion, stopwords);
    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
        matchVersion, stemExclusionSet));
  }
  /**
   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
   * {@link Reader}.
   * 
   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
   *         exclusion set is provided and {@link SnowballFilter}.
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new ItalianStemmer());
    return new TokenStreamComponents(source, result);
  }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/it/package.html
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/it/package.html
@ -0,0 +1,22 @@
 <!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 <!--
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
 <html><head></head>
 <body>
 Analyzer for Italian.
 </body>
 </html>
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/EmptyTokenStream.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/EmptyTokenStream.java
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.miscellaneous;
 */
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Token;
 import java.io.IOException;
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java
@ -0,0 +1,70 @@
 package org.apache.lucene.analysis.miscellaneous;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.Map;
 import org.apache.lucene.analysis.CharArrayMap;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.util.Version;
 /**
 * Provides the ability to override any {@link KeywordAttribute} aware stemmer
 * with custom dictionary-based stemming.
 */
 public final class StemmerOverrideFilter extends TokenFilter {
  private final CharArrayMap<String> dictionary;
  private final TermAttribute termAtt = addAttribute(TermAttribute.class);
  private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
  /**
   * Create a new StemmerOverrideFilter, performing dictionary-based stemming
   * with the provided <code>dictionary</code>.
   * <p>
   * Any dictionary-stemmed terms will be marked with {@link KeywordAttribute}
   * so that they will not be stemmed with stemmers down the chain.
   * </p>
   */
  public StemmerOverrideFilter(Version matchVersion, TokenStream input,
      Map<?,String> dictionary) {
    super(input);
    this.dictionary = dictionary instanceof CharArrayMap ? 
        (CharArrayMap<String>) dictionary : CharArrayMap.copy(matchVersion, dictionary);
  }
  @Override
  public boolean incrementToken() throws IOException {
    if (input.incrementToken()) {
      if (!keywordAtt.isKeyword()) { // don't muck with already-keyworded terms
        String stem = dictionary.get(termAtt.termBuffer(), 0, termAtt.termLength());
        if (stem != null) {
          termAtt.setTermBuffer(stem);
          keywordAtt.setKeyword(true);
        }
      }
      return true;
    } else {
      return false;
    }
  }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
@ -19,7 +19,6 @@ package org.apache.lucene.analysis.ngram;
 import java.io.IOException;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
@ -17,7 +17,6 @@ package org.apache.lucene.analysis.ngram;
 * limitations under the License.
 */
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
@ -20,11 +20,14 @@ package org.apache.lucene.analysis.nl;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
@ -33,7 +36,6 @@ import org.apache.lucene.util.Version;
 import java.io.File;
 import java.io.IOException;
 import java.io.Reader;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
@ -51,6 +53,17 @@ import java.util.Map;
 * exclusion list is empty by default.
 * </p>
 *
 * <a name="version"/>
 * <p>You must specify the required {@link Version}
 * compatibility when creating DutchAnalyzer:
 * <ul>
 *   <li> As of 3.1, Snowball stemming is done with SnowballFilter, 
 *        LowerCaseFilter is used prior to StopFilter, and Snowball 
 *        stopwords are used by default.
 *   <li> As of 2.9, StopFilter preserves position
 *        increments
 * </ul>
 * 
 * <p><b>NOTE</b>: This class uses the same {@link Version}
 * dependent settings as {@link StandardAnalyzer}.</p>
 */
@ -60,19 +73,11 @@ public final class DutchAnalyzer extends ReusableAnalyzerBase {
   * @deprecated use {@link #getDefaultStopSet()} instead
   */
  @Deprecated
-  public final static String[] DUTCH_STOP_WORDS =
+  public final static String[] DUTCH_STOP_WORDS = getDefaultStopSet().toArray(new String[0]);
-      {
+  
-        "de", "en", "van", "ik", "te", "dat", "die", "in", "een",
+  /** File containing default Dutch stopwords. */
-        "hij", "het", "niet", "zijn", "is", "was", "op", "aan", "met", "als", "voor", "had",
+  public final static String DEFAULT_STOPWORD_FILE = "dutch_stop.txt";
-        "er", "maar", "om", "hem", "dan", "zou", "of", "wat", "mijn", "men", "dit", "zo",
+
        "door", "over", "ze", "zich", "bij", "ook", "tot", "je", "mij", "uit", "der", "daar",
        "haar", "naar", "heb", "hoe", "heeft", "hebben", "deze", "u", "want", "nog", "zal",
        "me", "zij", "nu", "ge", "geen", "omdat", "iets", "worden", "toch", "al", "waren",
        "veel", "meer", "doen", "toen", "moet", "ben", "zonder", "kan", "hun", "dus",
        "alles", "onder", "ja", "eens", "hier", "wie", "werd", "altijd", "doch", "wordt",
        "wezen", "kunnen", "ons", "zelf", "tegen", "na", "reeds", "wil", "kon", "niets",
        "uw", "iemand", "geweest", "andere"
      };
  /**
   * Returns an unmodifiable instance of the default stop-words set.
   * @return an unmodifiable instance of the default stop-words set.
@ -82,9 +87,18 @@ public final class DutchAnalyzer extends ReusableAnalyzerBase {
  }
  private static class DefaultSetHolder {
-    static final Set<?> DEFAULT_STOP_SET = CharArraySet
+    static final Set<?> DEFAULT_STOP_SET;
-        .unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, 
+
-            Arrays.asList(DUTCH_STOP_WORDS), false));
+    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
            DEFAULT_STOPWORD_FILE);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
        throw new RuntimeException("Unable to load default stopword set");
      }
    }
  }
@ -223,12 +237,25 @@ public final class DutchAnalyzer extends ReusableAnalyzerBase {
   * text in the provided {@link Reader}.
   *
   * @return A {@link TokenStream} built from a {@link StandardTokenizer}
-   *   filtered with {@link StandardFilter}, {@link StopFilter}, 
+   *   filtered with {@link StandardFilter}, {@link LowerCaseFilter}, 
-   *   and {@link DutchStemFilter}
+   *   {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided,
   *   {@link StemmerOverrideFilter}, and {@link SnowballFilter}
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader aReader) {
    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
      final Tokenizer source = new StandardTokenizer(matchVersion, aReader);
      TokenStream result = new StandardFilter(source);
      result = new LowerCaseFilter(matchVersion, result);
      result = new StopFilter(matchVersion, result, stoptable);
      if (!excltable.isEmpty())
        result = new KeywordMarkerTokenFilter(result, excltable);
      if (!stemdict.isEmpty())
        result = new StemmerOverrideFilter(matchVersion, result, stemdict);
      result = new SnowballFilter(result, new org.tartarus.snowball.ext.DutchStemmer());
      return new TokenStreamComponents(source, result);
    } else {
      final Tokenizer source = new StandardTokenizer(matchVersion, aReader);
      TokenStream result = new StandardFilter(source);
      result = new StopFilter(matchVersion, result, stoptable);
@ -237,4 +264,5 @@ public final class DutchAnalyzer extends ReusableAnalyzerBase {
      result = new DutchStemFilter(result, stemdict);
      return new TokenStreamComponents(source, result);
    }
  }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java
@ -26,6 +26,7 @@ import java.util.Set;
 import org.apache.lucene.analysis.KeywordMarkerTokenFilter;// for javadoc
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
@ -42,7 +43,11 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 * the {@link KeywordAttribute} before this {@link TokenStream}.
 * </p>
 * @see KeywordMarkerTokenFilter
 * @deprecated Use {@link SnowballFilter} with 
 * {@link org.tartarus.snowball.ext.DutchStemmer} instead, which has the
 * same functionality. This filter will be removed in Lucene 4.0
 */
@Deprecated
 public final class DutchStemFilter extends TokenFilter {
  /**
   * The actual token in the input stream.
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java
@ -26,8 +26,10 @@ import java.util.Map;
 * the <a href="http://snowball.tartarus.org/algorithms/dutch/stemmer.html">dutch stemming</a>
 * algorithm in Martin Porter's snowball project.
 * </p>
 * @deprecated Use {@link org.tartarus.snowball.ext.DutchStemmer} instead, 
 * which has the same functionality. This filter will be removed in Lucene 4.0
 */
-
+@Deprecated
 public class DutchStemmer {
  /**
   * Buffer for the terms while stemming them.
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
@ -0,0 +1,130 @@
 package org.apache.lucene.analysis.no;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.io.Reader;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.NorwegianStemmer;
 /**
 * {@link Analyzer} for Norwegian.
 */
 public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
  private final Set<?> stemExclusionSet;
  /** File containing default Norwegian stopwords. */
  public final static String DEFAULT_STOPWORD_FILE = "norwegian_stop.txt";
  /**
   * Returns an unmodifiable instance of the default stop words set.
   * @return default stop words set.
   */
  public static Set<?> getDefaultStopSet(){
    return DefaultSetHolder.DEFAULT_STOP_SET;
  }
  /**
   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
   * accesses the static final set the first time.;
   */
  private static class DefaultSetHolder {
    static final Set<?> DEFAULT_STOP_SET;
    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
            DEFAULT_STOPWORD_FILE);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
        throw new RuntimeException("Unable to load default stopword set");
      }
    }
  }
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
  public NorwegianAnalyzer(Version matchVersion) {
    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  /**
   * Builds an analyzer with the given stop words.
   * 
   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
  public NorwegianAnalyzer(Version matchVersion, Set<?> stopwords) {
    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }
  /**
   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
   * stemming.
   * 
   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
  public NorwegianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
    super(matchVersion, stopwords);
    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
        matchVersion, stemExclusionSet));
  }
  /**
   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
   * {@link Reader}.
   * 
   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
   *         exclusion set is provided and {@link SnowballFilter}.
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new NorwegianStemmer());
    return new TokenStreamComponents(source, result);
  }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/no/package.html
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/no/package.html
@ -0,0 +1,22 @@
 <!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 <!--
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
 <html><head></head>
 <body>
 Analyzer for Norwegian.
 </body>
 </html>
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
@ -17,7 +17,6 @@ package org.apache.lucene.analysis.payloads;
 */
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java
@ -19,7 +19,6 @@ package org.apache.lucene.analysis.payloads;
 import java.io.IOException;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
@ -19,7 +19,6 @@ package org.apache.lucene.analysis.payloads;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.index.Payload;
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java
@ -21,7 +21,6 @@ import java.io.IOException;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 /** Set the positionIncrement of all tokens to the "positionIncrement",
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
@ -0,0 +1,129 @@
 package org.apache.lucene.analysis.pt;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.io.Reader;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.PortugueseStemmer;
 /**
 * {@link Analyzer} for Portuguese.
 */
 public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
  private final Set<?> stemExclusionSet;
  /** File containing default Portuguese stopwords. */
  public final static String DEFAULT_STOPWORD_FILE = "portuguese_stop.txt";
  /**
   * Returns an unmodifiable instance of the default stop words set.
   * @return default stop words set.
   */
  public static Set<?> getDefaultStopSet(){
    return DefaultSetHolder.DEFAULT_STOP_SET;
  }
  /**
   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
   * accesses the static final set the first time.;
   */
  private static class DefaultSetHolder {
    static final Set<?> DEFAULT_STOP_SET;
    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
            DEFAULT_STOPWORD_FILE);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
        throw new RuntimeException("Unable to load default stopword set");
      }
    }
  }
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
  public PortugueseAnalyzer(Version matchVersion) {
    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  /**
   * Builds an analyzer with the given stop words.
   * 
   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
  public PortugueseAnalyzer(Version matchVersion, Set<?> stopwords) {
    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }
  /**
   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
   * stemming.
   * 
   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
  public PortugueseAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
    super(matchVersion, stopwords);
    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
        matchVersion, stemExclusionSet));
  }
  /**
   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
   * {@link Reader}.
   * 
   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
   *         exclusion set is provided and {@link SnowballFilter}.
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new PortugueseStemmer());
    return new TokenStreamComponents(source, result);
  }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/pt/package.html
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/pt/package.html
@ -0,0 +1,22 @@
 <!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 <!--
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
 <html><head></head>
 <body>
 Analyzer for Portuguese.
 </body>
 </html>
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
@ -0,0 +1,133 @@
 package org.apache.lucene.analysis.ro;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.io.Reader;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.RomanianStemmer;
 /**
 * {@link Analyzer} for Romanian.
 */
 public final class RomanianAnalyzer extends StopwordAnalyzerBase {
  private final Set<?> stemExclusionSet;
  /** File containing default Romanian stopwords. */
  public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
  /**
   * The comment character in the stopwords file.  
   * All lines prefixed with this will be ignored.
   */
  private static final String STOPWORDS_COMMENT = "#";
  /**
   * Returns an unmodifiable instance of the default stop words set.
   * @return default stop words set.
   */
  public static Set<?> getDefaultStopSet(){
    return DefaultSetHolder.DEFAULT_STOP_SET;
  }
  /**
   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
   * accesses the static final set the first time.;
   */
  private static class DefaultSetHolder {
    static final Set<?> DEFAULT_STOP_SET;
    static {
      try {
        DEFAULT_STOP_SET = loadStopwordSet(false, RomanianAnalyzer.class, 
            DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
        throw new RuntimeException("Unable to load default stopword set");
      }
    }
  }
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
  public RomanianAnalyzer(Version matchVersion) {
    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  /**
   * Builds an analyzer with the given stop words.
   * 
   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
  public RomanianAnalyzer(Version matchVersion, Set<?> stopwords) {
    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }
  /**
   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
   * stemming.
   * 
   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
  public RomanianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
    super(matchVersion, stopwords);
    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
        matchVersion, stemExclusionSet));
  }
  /**
   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
   * {@link Reader}.
   * 
   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
   *         exclusion set is provided and {@link SnowballFilter}.
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new RomanianStemmer());
    return new TokenStreamComponents(source, result);
  }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ro/package.html
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ro/package.html
@ -0,0 +1,22 @@
 <!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 <!--
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
 <html><head></head>
 <body>
 Analyzer for Romanian.
 </body>
 </html>
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
@ -17,6 +17,7 @@ package org.apache.lucene.analysis.ru;
 * limitations under the License.
 */
 import java.io.IOException;
 import java.io.Reader;
 import java.util.Arrays;
 import java.util.Map;
@ -26,11 +27,15 @@ import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.util.Version;
 /**
@ -40,13 +45,22 @@ import org.apache.lucene.util.Version;
 * will not be indexed at all).
 * A default set of stopwords is used unless an alternative list is specified.
 * </p>
 * <a name="version"/>
 * <p>You must specify the required {@link Version}
 * compatibility when creating RussianAnalyzer:
 * <ul>
 *   <li> As of 3.1, StandardTokenizer is used, Snowball stemming is done with
 *        SnowballFilter, and Snowball stopwords are used by default.
 * </ul>
 */
 public final class RussianAnalyzer extends StopwordAnalyzerBase
 {
    /**
-     * List of typical Russian stopwords.
+     * List of typical Russian stopwords. (for backwards compatibility)
     * @deprecated Remove this for LUCENE 4.0
     */
-    private static final String[] RUSSIAN_STOP_WORDS = {
+    @Deprecated
    private static final String[] RUSSIAN_STOP_WORDS_30 = {
      "а", "без", "более", "бы", "был", "была", "были", "было", "быть", "в",
      "вам", "вас", "весь", "во", "вот", "все", "всего", "всех", "вы", "где", 
      "да", "даже", "для", "до", "его", "ее", "ей", "ею", "если", "есть", 
@ -59,10 +73,27 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
      "чем", "что", "чтобы", "чье", "чья", "эта", "эти", "это", "я"
    };
    /** File containing default Russian stopwords. */
    public final static String DEFAULT_STOPWORD_FILE = "russian_stop.txt";
    private static class DefaultSetHolder {
-      static final Set<?> DEFAULT_STOP_SET = CharArraySet
+      /** @deprecated remove this for Lucene 4.0 */
      @Deprecated
      static final Set<?> DEFAULT_STOP_SET_30 = CharArraySet
          .unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, 
-              Arrays.asList(RUSSIAN_STOP_WORDS), false));
+              Arrays.asList(RUSSIAN_STOP_WORDS_30), false));
      static final Set<?> DEFAULT_STOP_SET;
      static {
        try {
          DEFAULT_STOP_SET = 
            WordlistLoader.getSnowballWordSet(SnowballFilter.class, DEFAULT_STOPWORD_FILE);
        } catch (IOException ex) {
          // default set should always be present as it is part of the
          // distribution (JAR)
          throw new RuntimeException("Unable to load default stopword set");
        }
      }
    }
    private final Set<?> stemExclusionSet;
@ -77,7 +108,9 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
    }
    public RussianAnalyzer(Version matchVersion) {
-      this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+      this(matchVersion,
        matchVersion.onOrAfter(Version.LUCENE_31) ? DefaultSetHolder.DEFAULT_STOP_SET
            : DefaultSetHolder.DEFAULT_STOP_SET_30);
    }
    /**
@ -132,19 +165,30 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
     * provided {@link Reader}.
     *
     * @return {@link TokenStreamComponents} built from a 
-     *   {@link RussianLetterTokenizer} filtered with 
+     *   {@link StandardTokenizer} filtered with {@link StandardFilter},
     *   {@link LowerCaseFilter}, {@link StopFilter}, 
-     *   and {@link RussianStemFilter}
+     *   {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided,
     *   and {@link SnowballFilter}
     */
    @Override
    protected TokenStreamComponents createComponents(String fieldName,
        Reader reader) {
      if (matchVersion.onOrAfter(Version.LUCENE_31)) {
        final Tokenizer source = new StandardTokenizer(matchVersion, reader);
        TokenStream result = new StandardFilter(source);
        result = new LowerCaseFilter(matchVersion, result);
        result = new StopFilter(matchVersion, result, stopwords);
        if (!stemExclusionSet.isEmpty()) result = new KeywordMarkerTokenFilter(
            result, stemExclusionSet);
        result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
        return new TokenStreamComponents(source, result);
      } else {
        final Tokenizer source = new RussianLetterTokenizer(matchVersion, reader);
        TokenStream result = new LowerCaseFilter(matchVersion, source);
        result = new StopFilter(matchVersion, result, stopwords);
-      if(!stemExclusionSet.isEmpty())
+        if (!stemExclusionSet.isEmpty()) result = new KeywordMarkerTokenFilter(
-        result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
+          result, stemExclusionSet);
        return new TokenStreamComponents(source, new RussianStemFilter(result));
-      
+      }
    }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
@ -21,6 +21,7 @@ import java.io.Reader;
 import org.apache.lucene.analysis.CharTokenizer;
 import org.apache.lucene.analysis.Tokenizer; // for javadocs
 import org.apache.lucene.analysis.LetterTokenizer; // for javadocs
 import org.apache.lucene.analysis.standard.StandardTokenizer; // for javadocs
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Version;
@ -36,7 +37,10 @@ import org.apache.lucene.util.Version;
 * detect token characters. See {@link CharTokenizer#isTokenChar(int)} and
 * {@link CharTokenizer#normalize(int)} for details.</li>
 * </ul>
 * @deprecated Use {@link StandardTokenizer} instead, which has the same functionality.
 * This filter will be removed in Lucene 4.0 
 */
@Deprecated
 public class RussianLetterTokenizer extends CharTokenizer
 {    
    private static final int DIGIT_0 = '0';
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java
@ -24,6 +24,7 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.analysis.ru.RussianStemmer;//javadoc @link
 import org.apache.lucene.analysis.snowball.SnowballFilter; // javadoc @link
 import java.io.IOException;
@ -40,7 +41,11 @@ import java.io.IOException;
 * the {@link KeywordAttribute} before this {@link TokenStream}.
 * </p>
 * @see KeywordMarkerTokenFilter
 * @deprecated Use {@link SnowballFilter} with 
 * {@link org.tartarus.snowball.ext.RussianStemmer} instead, which has the
 * same functionality. This filter will be removed in Lucene 4.0
 */
@Deprecated
 public final class RussianStemFilter extends TokenFilter
 {
    /**
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemmer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemmer.java
@ -19,7 +19,10 @@ package org.apache.lucene.analysis.ru;
 /**
 * Russian stemming algorithm implementation (see http://snowball.sourceforge.net for detailed description).
 * @deprecated Use {@link org.tartarus.snowball.ext.RussianStemmer} instead, 
 * which has the same functionality. This filter will be removed in Lucene 4.0
 */
@Deprecated
 class RussianStemmer
 {
    // positions of RV, R1 and R2 respectively
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
@ -39,7 +39,10 @@ import java.util.Set;
 *   <li> As of 3.1, uses {@link TurkishLowerCaseFilter} for Turkish language.
 * </ul>
 * </p>
 * @deprecated Use the language-specific analyzer in contrib/analyzers instead. 
 * This analyzer will be removed in Lucene 4.0
 */
@Deprecated
 public final class SnowballAnalyzer extends Analyzer {
  private String name;
  private Set<?> stopSet;
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
@ -21,6 +21,7 @@ import java.io.IOException;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; // javadoc @link
 import org.apache.lucene.analysis.LowerCaseFilter; // javadoc @link
@ -39,14 +40,14 @@ import org.tartarus.snowball.SnowballProgram;
 */
 public final class SnowballFilter extends TokenFilter {
-  private SnowballProgram stemmer;
+  private final SnowballProgram stemmer;
-  private TermAttribute termAtt;
+  private final TermAttribute termAtt = addAttribute(TermAttribute.class);
  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
  public SnowballFilter(TokenStream input, SnowballProgram stemmer) {
    super(input);
    this.stemmer = stemmer;
    termAtt = addAttribute(TermAttribute.class);
  }
  /**
@ -67,13 +68,13 @@ public final class SnowballFilter extends TokenFilter {
    } catch (Exception e) {
      throw new RuntimeException(e.toString());
    }
    termAtt = addAttribute(TermAttribute.class);
  }
  /** Returns the next input Token, after being stemmed */
  @Override
  public final boolean incrementToken() throws IOException {
    if (input.incrementToken()) {
      if (!keywordAttr.isKeyword()) {
        char termBuffer[] = termAtt.termBuffer();
        final int length = termAtt.termLength();
        stemmer.setCurrent(termBuffer, length);
@ -84,6 +85,7 @@ public final class SnowballFilter extends TokenFilter {
          termAtt.setTermBuffer(finalTerm, 0, newLength);
        else
          termAtt.setTermLength(newLength);
      }
      return true;
    } else {
      return false;
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
@ -0,0 +1,129 @@
 package org.apache.lucene.analysis.sv;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.io.Reader;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.SwedishStemmer;
 /**
 * {@link Analyzer} for Swedish.
 */
 public final class SwedishAnalyzer extends StopwordAnalyzerBase {
  private final Set<?> stemExclusionSet;
  /** File containing default Swedish stopwords. */
  public final static String DEFAULT_STOPWORD_FILE = "swedish_stop.txt";
  /**
   * Returns an unmodifiable instance of the default stop words set.
   * @return default stop words set.
   */
  public static Set<?> getDefaultStopSet(){
    return DefaultSetHolder.DEFAULT_STOP_SET;
  }
  /**
   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
   * accesses the static final set the first time.;
   */
  private static class DefaultSetHolder {
    static final Set<?> DEFAULT_STOP_SET;
    static {
      try {
        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class, 
            DEFAULT_STOPWORD_FILE);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
        throw new RuntimeException("Unable to load default stopword set");
      }
    }
  }
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
  public SwedishAnalyzer(Version matchVersion) {
    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  /**
   * Builds an analyzer with the given stop words.
   * 
   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
  public SwedishAnalyzer(Version matchVersion, Set<?> stopwords) {
    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }
  /**
   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
   * stemming.
   * 
   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
  public SwedishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
    super(matchVersion, stopwords);
    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
        matchVersion, stemExclusionSet));
  }
  /**
   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
   * {@link Reader}.
   * 
   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
   *         exclusion set is provided and {@link SnowballFilter}.
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new SwedishStemmer());
    return new TokenStreamComponents(source, result);
  }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sv/package.html
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/sv/package.html
@ -0,0 +1,22 @@
 <!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 <!--
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at
     http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
 <html><head></head>
 <body>
 Analyzer for Swedish.
 </body>
 </html>
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
@ -19,7 +19,6 @@ package org.apache.lucene.analysis.th;
 import java.io.IOException;
 import java.util.Locale;
 import java.lang.Character.UnicodeBlock;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
@ -0,0 +1,132 @@
 package org.apache.lucene.analysis.tr;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.io.Reader;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.util.Version;
 import org.tartarus.snowball.ext.TurkishStemmer;
 /**
 * {@link Analyzer} for Turkish.
 */
 public final class TurkishAnalyzer extends StopwordAnalyzerBase {
  private final Set<?> stemExclusionSet;
  /** File containing default Turkish stopwords. */
  public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
  /**
   * The comment character in the stopwords file.  
   * All lines prefixed with this will be ignored.
   */
  private static final String STOPWORDS_COMMENT = "#";
  /**
   * Returns an unmodifiable instance of the default stop words set.
   * @return default stop words set.
   */
  public static Set<?> getDefaultStopSet(){
    return DefaultSetHolder.DEFAULT_STOP_SET;
  }
  /**
   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
   * accesses the static final set the first time.;
   */
  private static class DefaultSetHolder {
    static final Set<?> DEFAULT_STOP_SET;
    static {
      try {
        DEFAULT_STOP_SET = loadStopwordSet(false, TurkishAnalyzer.class, 
            DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
      } catch (IOException ex) {
        // default set should always be present as it is part of the
        // distribution (JAR)
        throw new RuntimeException("Unable to load default stopword set");
      }
    }
  }
  /**
   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
   */
  public TurkishAnalyzer(Version matchVersion) {
    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
  }
  /**
   * Builds an analyzer with the given stop words.
   * 
   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   */
  public TurkishAnalyzer(Version matchVersion, Set<?> stopwords) {
    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
  }
  /**
   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
   * stemming.
   * 
   * @param matchVersion lucene compatibility version
   * @param stopwords a stopword set
   * @param stemExclusionSet a set of terms not to be stemmed
   */
  public TurkishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
    super(matchVersion, stopwords);
    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
        matchVersion, stemExclusionSet));
  }
  /**
   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
   * {@link Reader}.
   * 
   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
   *         filtered with {@link StandardFilter}, {@link TurkishLowerCaseFilter},
   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
   *         exclusion set is provided and {@link SnowballFilter}.
   */
  @Override
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(source);
    result = new TurkishLowerCaseFilter(result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
    result = new SnowballFilter(result, new TurkishStemmer());
    return new TokenStreamComponents(source, result);
  }
 }
--- a/contrib/analyzers/common/src/java/org/apache/lucene/analysis/tr/package.html
+++ b/contrib/analyzers/common/src/java/org/apache/lucene/analysis/tr/package.html
@ -17,15 +17,6 @@
 -->
 <html><head></head>
 <body>
-Support for Turkish.
+Analyzer for Turkish.
 <p>
 This package contains just the TokenStream for handling turkish casing,
 for a stemmer please see the snowball package. 
 </p>
 <p>
 WARNING: SnowballAnalyzer uses LowerCaseFilter by default, even when the
 language is set to Turkish, so you will need to construct your own
 analyzer that combines TurkishLowerCaseFilter and SnowballFilter.
 </p>
 </body>
 </html>
--- a/contrib/analyzers/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt
+++ b/contrib/analyzers/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt
@ -0,0 +1,233 @@
 # This file was created by Jacques Savoy and is distributed under the BSD license.
 # See http://members.unine.ch/jacques.savoy/clef/index.html.
 # Also see http://www.opensource.org/licenses/bsd-license.html
 acea
 aceasta
 această
 aceea
 acei
 aceia
 acel
 acela
 acele
 acelea
 acest
 acesta
 aceste
 acestea
 aceşti
 aceştia
 acolo
 acum
 ai
 aia
 aibă
 aici
 al
 ăla
 ale
 alea
 ălea
 altceva
 altcineva
 am
 ar
 are
 aş
 aşadar
 asemenea
 asta
 ăsta
 astăzi
 astea
 ăstea
 ăştia
 asupra
 aţi
 au
 avea
 avem
 aveţi
 azi
 bine
 bucur
 bună
 ca
 că
 căci
 când
 care
 cărei
 căror
 cărui
 cât
 câte
 câţi
 către
 câtva
 ce
 cel
 ceva
 chiar
 cînd
 cine
 cineva
 cît
 cîte
 cîţi
 cîtva
 contra
 cu
 cum
 cumva
 curând
 curînd
 da
 dă
 dacă
 dar
 datorită
 de
 deci
 deja
 deoarece
 departe
 deşi
 din
 dinaintea
 dintr
 dintre
 drept
 după
 ea
 ei
 el
 ele
 eram
 este
 eşti
 eu
 face
 fără
 fi
 fie
 fiecare
 fii
 fim
 fiţi
 iar
 ieri
 îi
 îl
 îmi
 împotriva
 în 
 înainte
 înaintea
 încât
 încît
 încotro
 între
 întrucât
 întrucît
 îţi
 la
 lângă
 le
 li
 lîngă
 lor
 lui
 mă
 mâine
 mea
 mei
 mele
 mereu
 meu
 mi
 mine
 mult
 multă
 mulţi
 ne
 nicăieri
 nici
 nimeni
 nişte
 noastră
 noastre
 noi
 noştri
 nostru
 nu
 ori
 oricând
 oricare
 oricât
 orice
 oricînd
 oricine
 oricît
 oricum
 oriunde
 până
 pe
 pentru
 peste
 pînă
 poate
 pot
 prea
 prima
 primul
 prin
 printr
 sa
 să
 săi
 sale
 sau
 său
 se
 şi
 sînt
 sîntem
 sînteţi
 spre
 sub
 sunt
 suntem
 sunteţi
 ta
 tăi
 tale
 tău
 te
 ţi
 ţie
 tine
 toată
 toate
 tot
 toţi
 totuşi
 tu
 un
 una
 unde
 undeva
 unei
 unele
 uneori
 unor
 vă
 vi
 voastră
 voastre
 voi
 voştri
 vostru
 vouă
 vreo
 vreun
--- a/contrib/analyzers/common/src/resources/org/apache/lucene/analysis/tr/stopwords.txt
+++ b/contrib/analyzers/common/src/resources/org/apache/lucene/analysis/tr/stopwords.txt
@ -0,0 +1,212 @@
 # Turkish stopwords from LUCENE-559
 # merged with the list from "Information Retrieval on Turkish Texts"
 #   (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
 acaba
 altmış
 altı
 ama
 ancak
 arada
 aslında
 ayrıca
 bana
 bazı
 belki
 ben
 benden
 beni
 benim
 beri
 beş
 bile
 bin
 bir
 birçok
 biri
 birkaç
 birkez
 birşey
 birşeyi
 biz
 bize
 bizden
 bizi
 bizim
 böyle
 böylece
 bu
 buna
 bunda
 bundan
 bunlar
 bunları
 bunların
 bunu
 bunun
 burada
 çok
 çünkü
 da
 daha
 dahi
 de
 defa
 değil
 diğer
 diye
 doksan
 dokuz
 dolayı
 dolayısıyla
 dört
 edecek
 eden
 ederek
 edilecek
 ediliyor
 edilmesi
 ediyor
 eğer
 elli
 en
 etmesi
 etti
 ettiği
 ettiğini
 gibi
 göre
 halen
 hangi
 hatta
 hem
 henüz
 hep
 hepsi
 her
 herhangi
 herkesin
 hiç
 hiçbir
 için
 iki
 ile
 ilgili
 ise
 işte
 itibaren
 itibariyle
 kadar
 karşın
 katrilyon
 kendi
 kendilerine
 kendini
 kendisi
 kendisine
 kendisini
 kez
 ki
 kim
 kimden
 kime
 kimi
 kimse
 kırk
 milyar
 milyon
 mu
 mü
 mı
 nasıl
 ne
 neden
 nedenle
 nerde
 nerede
 nereye
 niye
 niçin
 o
 olan
 olarak
 oldu
 olduğu
 olduğunu
 olduklarını
 olmadı
 olmadığı
 olmak
 olması
 olmayan
 olmaz
 olsa
 olsun
 olup
 olur
 olursa
 oluyor
 on
 ona
 ondan
 onlar
 onlardan
 onları
 onların
 onu
 onun
 otuz
 oysa
 öyle
 pek
 rağmen
 sadece
 sanki
 sekiz
 seksen
 sen
 senden
 seni
 senin
 siz
 sizden
 sizi
 sizin
 şey
 şeyden
 şeyi
 şeyler
 şöyle
 şu
 şuna
 şunda
 şundan
 şunları
 şunu
 tarafından
 trilyon
 tüm
 üç
 üzere
 var
 vardı
 ve
 veya
 ya
 yani
 yapacak
 yapılan
 yapılması
 yapıyor
 yapmak
 yaptı
 yaptığı
 yaptığını
 yaptıkları
 yedi
 yerine
 yetmiş
 yine
 yirmi
 yoksa
 yüz
 zaten
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
@ -22,8 +22,6 @@ import java.util.Collections;
 import java.util.HashSet;
 import java.util.Set;
 import javax.print.DocFlavor.CHAR_ARRAY;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.util.Version;
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicNormalizationFilter.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicNormalizationFilter.java
@ -21,7 +21,6 @@ import java.io.IOException;
 import java.io.StringReader;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.util.Version;
 /**
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java
@ -0,0 +1,54 @@
 package org.apache.lucene.analysis.da;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.HashSet;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.util.Version;
 public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
  /** This test fails with NPE when the 
   * stopwords file is missing in classpath */
  public void testResourcesAvailable() {
    new DanishAnalyzer(Version.LUCENE_CURRENT);
  }
  /** test stopwords and stemming */
  public void testBasics() throws IOException {
    Analyzer a = new DanishAnalyzer(Version.LUCENE_CURRENT);
    // stemming
    checkOneTermReuse(a, "undersøg", "undersøg");
    checkOneTermReuse(a, "undersøgelse", "undersøg");
    // stopword
    assertAnalyzesTo(a, "på", new String[] {});
  }
  /** test use of exclusion set */
  public void testExclude() throws IOException {
    Set<String> exclusionSet = new HashSet<String>();
    exclusionSet.add("undersøgelse");
    Analyzer a = new DanishAnalyzer(Version.LUCENE_CURRENT, 
        DanishAnalyzer.getDefaultStopSet(), exclusionSet);
    checkOneTermReuse(a, "undersøgelse", "undersøgelse");
    checkOneTermReuse(a, "undersøg", "undersøg");
  }
 }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
@ -0,0 +1,93 @@
 package org.apache.lucene.analysis.de;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.io.StringReader;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 import org.apache.lucene.analysis.LowerCaseTokenizer;
 import org.apache.lucene.util.Version;
 public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
  public void testReusableTokenStream() throws Exception {
    Analyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
    checkOneTermReuse(a, "Tisch", "tisch");
    checkOneTermReuse(a, "Tische", "tisch");
    checkOneTermReuse(a, "Tischen", "tisch");
  }
  public void testExclusionTableBWCompat() throws IOException {
    GermanStemFilter filter = new GermanStemFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, 
        new StringReader("Fischen Trinken")));
    CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
    set.add("fischen");
    filter.setExclusionSet(set);
    assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
  }
  public void testWithKeywordAttribute() throws IOException {
    CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
    set.add("fischen");
    GermanStemFilter filter = new GermanStemFilter(
        new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader( 
            "Fischen Trinken")), set));
    assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
  }
  public void testWithKeywordAttributeAndExclusionTable() throws IOException {
    CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
    set.add("fischen");
    CharArraySet set1 = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
    set1.add("trinken");
    set1.add("fischen");
    GermanStemFilter filter = new GermanStemFilter(
        new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader(
            "Fischen Trinken")), set));
    filter.setExclusionSet(set1);
    assertTokenStreamContents(filter, new String[] { "fischen", "trinken" });
  }
  /* 
   * Test that changes to the exclusion table are applied immediately
   * when using reusable token streams.
   */
  public void testExclusionTableReuse() throws Exception {
    GermanAnalyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
    checkOneTermReuse(a, "tischen", "tisch");
    a.setStemExclusionTable(new String[] { "tischen" });
    checkOneTermReuse(a, "tischen", "tischen");
  }
  /** test some features of the new snowball filter
   * these only pass with LUCENE_CURRENT, not if you use o.a.l.a.de.GermanStemmer
   */
  public void testGermanSpecials() throws Exception {
    GermanAnalyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
    // a/o/u + e is equivalent to the umlaut form
    checkOneTermReuse(a, "Schaltflächen", "schaltflach");
    checkOneTermReuse(a, "Schaltflaechen", "schaltflach");
    // here they are with the old stemmer
    a = new GermanAnalyzer(Version.LUCENE_30);
    checkOneTermReuse(a, "Schaltflächen", "schaltflach");
    checkOneTermReuse(a, "Schaltflaechen", "schaltflaech");
  }
 }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
@ -20,15 +20,14 @@ package org.apache.lucene.analysis.de;
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.StringReader;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.KeywordTokenizer;
-import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
-import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
+import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.LowerCaseTokenizer;
+import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.util.Version;
 /**
@ -40,6 +39,8 @@ import org.apache.lucene.util.Version;
 public class TestGermanStemFilter extends BaseTokenStreamTestCase {
  public void testStemming() throws Exception {
    Tokenizer tokenizer = new KeywordTokenizer(new StringReader(""));
    TokenFilter filter = new GermanStemFilter(new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer));
    // read test cases from external file:
    File dataDir = new File(System.getProperty("dataDir", "./bin"));
    File testFile = new File(dataDir, "org/apache/lucene/analysis/de/data.txt");
@ -55,68 +56,12 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase {
        continue;    // ignore comments and empty lines
      String[] parts = line.split(";");
      //System.out.println(parts[0] + " -- " + parts[1]);
-      check(parts[0], parts[1]);
+      tokenizer.reset(new StringReader(parts[0]));
      filter.reset();
      assertTokenStreamContents(filter, new String[] { parts[1] });
    }
    breader.close();
    isr.close();
    fis.close();
  }
  public void testReusableTokenStream() throws Exception {
    Analyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
    checkReuse(a, "Tisch", "tisch");
    checkReuse(a, "Tische", "tisch");
    checkReuse(a, "Tischen", "tisch");
  }
  public void testExclusionTableBWCompat() throws IOException {
    GermanStemFilter filter = new GermanStemFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, 
        new StringReader("Fischen Trinken")));
    CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
    set.add("fischen");
    filter.setExclusionSet(set);
    assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
  }
  public void testWithKeywordAttribute() throws IOException {
    CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
    set.add("fischen");
    GermanStemFilter filter = new GermanStemFilter(
        new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader( 
            "Fischen Trinken")), set));
    assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
  }
  public void testWithKeywordAttributeAndExclusionTable() throws IOException {
    CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
    set.add("fischen");
    CharArraySet set1 = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
    set1.add("trinken");
    set1.add("fischen");
    GermanStemFilter filter = new GermanStemFilter(
        new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader(
            "Fischen Trinken")), set));
    filter.setExclusionSet(set1);
    assertTokenStreamContents(filter, new String[] { "fischen", "trinken" });
  }
  /* 
   * Test that changes to the exclusion table are applied immediately
   * when using reusable token streams.
   */
  public void testExclusionTableReuse() throws Exception {
    GermanAnalyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
    checkReuse(a, "tischen", "tisch");
    a.setStemExclusionTable(new String[] { "tischen" });
    checkReuse(a, "tischen", "tischen");
  }
  private void check(final String input, final String expected) throws Exception {
    checkOneTerm(new GermanAnalyzer(Version.LUCENE_CURRENT), input, expected);
  }
  private void checkReuse(Analyzer a, String input, String expected) throws Exception {
    checkOneTermReuse(a, input, expected);
  }
 }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
@ -18,7 +18,6 @@ package org.apache.lucene.analysis.el;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.util.Version;
 /**
@ -63,4 +62,23 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase {
 	    assertAnalyzesToReuse(a, "\u03a0\u03a1\u039f\u03ab\u03a0\u039f\u0398\u0395\u03a3\u0395\u0399\u03a3  \u0386\u03c8\u03bf\u03b3\u03bf\u03c2, \u03bf \u03bc\u03b5\u03c3\u03c4\u03cc\u03c2 \u03ba\u03b1\u03b9 \u03bf\u03b9 \u03ac\u03bb\u03bb\u03bf\u03b9",
 	            new String[] { "\u03c0\u03c1\u03bf\u03c5\u03c0\u03bf\u03b8\u03b5\u03c3\u03b5\u03b9\u03c3", "\u03b1\u03c8\u03bf\u03b3\u03bf\u03c3", "\u03bc\u03b5\u03c3\u03c4\u03bf\u03c3", "\u03b1\u03bb\u03bb\u03bf\u03b9" });
 	}
 	/**
 	 * Greek Analyzer didn't call standardFilter, so no normalization of acronyms.
 	 * check that this is preserved.
 	 * @deprecated remove this test in Lucene 4.0
 	 */
 	@Deprecated
 	public void testAcronymBWCompat() throws Exception {
 	  Analyzer a = new GreekAnalyzer(Version.LUCENE_30);
 	  assertAnalyzesTo(a, "Α.Π.Τ.", new String[] { "α.π.τ." });
 	}
  /**
   * test that acronym normalization works
   */
  public void testAcronym() throws Exception {
    Analyzer a = new GreekAnalyzer(Version.LUCENE_31);
    assertAnalyzesTo(a, "Α.Π.Τ.", new String[] { "απτ" });
  }
 }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
@ -0,0 +1,54 @@
 package org.apache.lucene.analysis.en;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.HashSet;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.util.Version;
 public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
  /** This test fails with NPE when the 
   * stopwords file is missing in classpath */
  public void testResourcesAvailable() {
    new EnglishAnalyzer(Version.LUCENE_CURRENT);
  }
  /** test stopwords and stemming */
  public void testBasics() throws IOException {
    Analyzer a = new EnglishAnalyzer(Version.LUCENE_CURRENT);
    // stemming
    checkOneTermReuse(a, "books", "book");
    checkOneTermReuse(a, "book", "book");
    // stopword
    assertAnalyzesTo(a, "the", new String[] {});
  }
  /** test use of exclusion set */
  public void testExclude() throws IOException {
    Set<String> exclusionSet = new HashSet<String>();
    exclusionSet.add("books");
    Analyzer a = new EnglishAnalyzer(Version.LUCENE_CURRENT, 
        EnglishAnalyzer.getDefaultStopSet(), exclusionSet);
    checkOneTermReuse(a, "books", "books");
    checkOneTermReuse(a, "book", "book");
  }
 }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java
@ -0,0 +1,54 @@
 package org.apache.lucene.analysis.es;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.HashSet;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.util.Version;
 public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
  /** This test fails with NPE when the 
   * stopwords file is missing in classpath */
  public void testResourcesAvailable() {
    new SpanishAnalyzer(Version.LUCENE_CURRENT);
  }
  /** test stopwords and stemming */
  public void testBasics() throws IOException {
    Analyzer a = new SpanishAnalyzer(Version.LUCENE_CURRENT);
    // stemming
    checkOneTermReuse(a, "chicana", "chican");
    checkOneTermReuse(a, "chicano", "chican");
    // stopword
    assertAnalyzesTo(a, "los", new String[] {});
  }
  /** test use of exclusion set */
  public void testExclude() throws IOException {
    Set<String> exclusionSet = new HashSet<String>();
    exclusionSet.add("chicano");
    Analyzer a = new SpanishAnalyzer(Version.LUCENE_CURRENT, 
        SpanishAnalyzer.getDefaultStopSet(), exclusionSet);
    checkOneTermReuse(a, "chicana", "chican");
    checkOneTermReuse(a, "chicano", "chicano");
  }
 }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java
@ -0,0 +1,54 @@
 package org.apache.lucene.analysis.fi;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.HashSet;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.util.Version;
 public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
  /** This test fails with NPE when the 
   * stopwords file is missing in classpath */
  public void testResourcesAvailable() {
    new FinnishAnalyzer(Version.LUCENE_CURRENT);
  }
  /** test stopwords and stemming */
  public void testBasics() throws IOException {
    Analyzer a = new FinnishAnalyzer(Version.LUCENE_CURRENT);
    // stemming
    checkOneTermReuse(a, "edeltäjiinsä", "edeltäj");
    checkOneTermReuse(a, "edeltäjistään", "edeltäj");
    // stopword
    assertAnalyzesTo(a, "olla", new String[] {});
  }
  /** test use of exclusion set */
  public void testExclude() throws IOException {
    Set<String> exclusionSet = new HashSet<String>();
    exclusionSet.add("edeltäjistään");
    Analyzer a = new FinnishAnalyzer(Version.LUCENE_CURRENT, 
        FinnishAnalyzer.getDefaultStopSet(), exclusionSet);
    checkOneTermReuse(a, "edeltäjiinsä", "edeltäj");
    checkOneTermReuse(a, "edeltäjistään", "edeltäjistään");
  }
 }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.fr;
 * limitations under the License.
 */
 import java.io.IOException;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.util.Version;
@ -113,6 +115,94 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
 	}
 	/**
 	 * @deprecated remove this test for Lucene 4.0
 	 */
 	@Deprecated
 	public void testAnalyzer30() throws Exception {
 	    FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_30);
 	    assertAnalyzesTo(fa, "", new String[] {
 	    });
 	    assertAnalyzesTo(
 	      fa,
 	      "chien chat cheval",
 	      new String[] { "chien", "chat", "cheval" });
 	    assertAnalyzesTo(
 	      fa,
 	      "chien CHAT CHEVAL",
 	      new String[] { "chien", "chat", "cheval" });
 	    assertAnalyzesTo(
 	      fa,
 	      "  chien  ,? + = -  CHAT /: > CHEVAL",
 	      new String[] { "chien", "chat", "cheval" });
 	    assertAnalyzesTo(fa, "chien++", new String[] { "chien" });
 	    assertAnalyzesTo(
 	      fa,
 	      "mot \"entreguillemet\"",
 	      new String[] { "mot", "entreguillemet" });
 	    // let's do some french specific tests now  
 	    /* 1. couldn't resist
 	     I would expect this to stay one term as in French the minus 
 	    sign is often used for composing words */
 	    assertAnalyzesTo(
 	      fa,
 	      "Jean-François",
 	      new String[] { "jean", "françois" });
 	    // 2. stopwords
 	    assertAnalyzesTo(
 	      fa,
 	      "le la chien les aux chat du des à cheval",
 	      new String[] { "chien", "chat", "cheval" });
 	    // some nouns and adjectives
 	    assertAnalyzesTo(
 	      fa,
 	      "lances chismes habitable chiste éléments captifs",
 	      new String[] {
 	        "lanc",
 	        "chism",
 	        "habit",
 	        "chist",
 	        "élément",
 	        "captif" });
 	    // some verbs
 	    assertAnalyzesTo(
 	      fa,
 	      "finissions souffrirent rugissante",
 	      new String[] { "fin", "souffr", "rug" });
 	    // some everything else
 	    // aujourd'hui stays one term which is OK
 	    assertAnalyzesTo(
 	      fa,
 	      "C3PO aujourd'hui oeuf ïâöûàä anticonstitutionnellement Java++ ",
 	      new String[] {
 	        "c3po",
 	        "aujourd'hui",
 	        "oeuf",
 	        "ïâöûàä",
 	        "anticonstitutionnel",
 	        "jav" });
 	    // some more everything else
 	    // here 1940-1945 stays as one term, 1940:1945 not ?
 	    assertAnalyzesTo(
 	      fa,
 	      "33Bis 1940-1945 1940:1945 (---i+++)*",
 	      new String[] { "33bis", "1940-1945", "1940", "1945", "i" });
 	  }
 	public void testReusableTokenStream() throws Exception {
 	  FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
 	  // stopwords
@ -157,4 +247,28 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
    assertAnalyzesTo(fa, "habitable chiste", new String[] { "habitable",
        "chist" });
  }
  public void testElision() throws Exception {
    FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
    assertAnalyzesTo(fa, "voir l'embrouille", new String[] { "voir", "embrouill" });
  }
  /**
   * Prior to 3.1, this analyzer had no lowercase filter.
   * stopwords were case sensitive. Preserve this for back compat.
   * @deprecated Remove this test in Lucene 4.0
   */
  @Deprecated
  public void testBuggyStopwordsCasing() throws IOException {
    FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_30);
    assertAnalyzesTo(a, "Votre", new String[] { "votr" });
  }
  /**
   * Test that stopwords are not case sensitive
   */
  public void testStopwordsCasing() throws IOException {
    FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_31);
    assertAnalyzesTo(a, "Votre", new String[] { });
  }
 }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
@ -0,0 +1,54 @@
 package org.apache.lucene.analysis.hu;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.HashSet;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.util.Version;
 public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
  /** This test fails with NPE when the 
   * stopwords file is missing in classpath */
  public void testResourcesAvailable() {
    new HungarianAnalyzer(Version.LUCENE_CURRENT);
  }
  /** test stopwords and stemming */
  public void testBasics() throws IOException {
    Analyzer a = new HungarianAnalyzer(Version.LUCENE_CURRENT);
    // stemming
    checkOneTermReuse(a, "babakocsi", "babakocs");
    checkOneTermReuse(a, "babakocsijáért", "babakocs");
    // stopword
    assertAnalyzesTo(a, "által", new String[] {});
  }
  /** test use of exclusion set */
  public void testExclude() throws IOException {
    Set<String> exclusionSet = new HashSet<String>();
    exclusionSet.add("babakocsi");
    Analyzer a = new HungarianAnalyzer(Version.LUCENE_CURRENT, 
        HungarianAnalyzer.getDefaultStopSet(), exclusionSet);
    checkOneTermReuse(a, "babakocsi", "babakocsi");
    checkOneTermReuse(a, "babakocsijáért", "babakocs");
  }
 }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
@ -0,0 +1,54 @@
 package org.apache.lucene.analysis.it;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.HashSet;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.util.Version;
 public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
  /** This test fails with NPE when the 
   * stopwords file is missing in classpath */
  public void testResourcesAvailable() {
    new ItalianAnalyzer(Version.LUCENE_CURRENT);
  }
  /** test stopwords and stemming */
  public void testBasics() throws IOException {
    Analyzer a = new ItalianAnalyzer(Version.LUCENE_CURRENT);
    // stemming
    checkOneTermReuse(a, "abbandonata", "abbandon");
    checkOneTermReuse(a, "abbandonati", "abbandon");
    // stopword
    assertAnalyzesTo(a, "dallo", new String[] {});
  }
  /** test use of exclusion set */
  public void testExclude() throws IOException {
    Set<String> exclusionSet = new HashSet<String>();
    exclusionSet.add("abbandonata");
    Analyzer a = new ItalianAnalyzer(Version.LUCENE_CURRENT, 
        ItalianAnalyzer.getDefaultStopSet(), exclusionSet);
    checkOneTermReuse(a, "abbandonata", "abbandonata");
    checkOneTermReuse(a, "abbandonati", "abbandon");
  }
 }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
@ -0,0 +1,44 @@
 package org.apache.lucene.analysis.miscellaneous;
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.HashMap;
 import java.util.Map;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.KeywordTokenizer;
 import org.apache.lucene.analysis.PorterStemFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.util.Version;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase {
  public void testOverride() throws IOException {
    // lets make booked stem to books
    // the override filter will convert "booked" to "books",
    // but also mark it with KeywordAttribute so Porter will not change it.
    Map<String,String> dictionary = new HashMap<String,String>();
    dictionary.put("booked", "books");
    Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked"));
    TokenStream stream = new PorterStemFilter(
        new StemmerOverrideFilter(Version.LUCENE_CURRENT, tokenizer, dictionary));
    assertTokenStreamContents(stream, new String[] { "books" });
  }
 }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
@ -22,7 +22,6 @@ import org.apache.lucene.analysis.WhitespaceTokenizer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.util.Version;
 import java.io.IOException;
 import java.io.StringReader;
 /**
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java
@ -18,10 +18,8 @@ package org.apache.lucene.analysis.ngram;
 */
 import java.io.IOException;
 import java.io.StringReader;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 /**
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
@ -100,9 +100,6 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
 	 check("ophalend", "ophal");
 	 check("ophalers", "ophaler");
 	 check("ophef", "ophef");
 	 check("opheffen", "ophef"); // versus snowball 'opheff'
 	 check("opheffende", "ophef"); // versus snowball 'opheff'
 	 check("opheffing", "ophef"); // versus snowball 'opheff'
 	 check("opheldering", "ophelder");
 	 check("ophemelde", "ophemeld");
 	 check("ophemelen", "ophemel");
@ -118,6 +115,24 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
 	 check("ophouden", "ophoud");
  }
  /**
   * @deprecated remove this test in Lucene 4.0
   */
  @Deprecated
  public void testOldBuggyStemmer() throws Exception {
    Analyzer a = new DutchAnalyzer(Version.LUCENE_30);
    checkOneTermReuse(a, "opheffen", "ophef"); // versus snowball 'opheff'
    checkOneTermReuse(a, "opheffende", "ophef"); // versus snowball 'opheff'
    checkOneTermReuse(a, "opheffing", "ophef"); // versus snowball 'opheff'
  }
  public void testSnowballCorrectness() throws Exception {
    Analyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
    checkOneTermReuse(a, "opheffen", "opheff");
    checkOneTermReuse(a, "opheffende", "opheff");
    checkOneTermReuse(a, "opheffing", "opheff");
  }
  public void testReusableTokenStream() throws Exception {
    Analyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT); 
    checkOneTermReuse(a, "lichaamsziek", "lichaamsziek");
@ -161,6 +176,25 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
    checkOneTermReuse(a, "lichamelijk", "somethingentirelydifferent");
  }
  /**
   * Prior to 3.1, this analyzer had no lowercase filter.
   * stopwords were case sensitive. Preserve this for back compat.
   * @deprecated Remove this test in Lucene 4.0
   */
  @Deprecated
  public void testBuggyStopwordsCasing() throws IOException {
    DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_30);
    assertAnalyzesTo(a, "Zelf", new String[] { "zelf" });
  }
  /**
   * Test that stopwords are not case sensitive
   */
  public void testStopwordsCasing() throws IOException {
    DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_31);
    assertAnalyzesTo(a, "Zelf", new String[] { });
  }
  private void check(final String input, final String expected) throws Exception {
    checkOneTerm(new DutchAnalyzer(Version.LUCENE_CURRENT), input, expected); 
  }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
@ -0,0 +1,54 @@
 package org.apache.lucene.analysis.no;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.HashSet;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.util.Version;
 public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
  /** This test fails with NPE when the 
   * stopwords file is missing in classpath */
  public void testResourcesAvailable() {
    new NorwegianAnalyzer(Version.LUCENE_CURRENT);
  }
  /** test stopwords and stemming */
  public void testBasics() throws IOException {
    Analyzer a = new NorwegianAnalyzer(Version.LUCENE_CURRENT);
    // stemming
    checkOneTermReuse(a, "havnedistriktene", "havnedistrikt");
    checkOneTermReuse(a, "havnedistrikter", "havnedistrikt");
    // stopword
    assertAnalyzesTo(a, "det", new String[] {});
  }
  /** test use of exclusion set */
  public void testExclude() throws IOException {
    Set<String> exclusionSet = new HashSet<String>();
    exclusionSet.add("havnedistriktene");
    Analyzer a = new NorwegianAnalyzer(Version.LUCENE_CURRENT, 
        NorwegianAnalyzer.getDefaultStopSet(), exclusionSet);
    checkOneTermReuse(a, "havnedistriktene", "havnedistriktene");
    checkOneTermReuse(a, "havnedistrikter", "havnedistrikt");
  }
 }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
@ -0,0 +1,54 @@
 package org.apache.lucene.analysis.pt;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.HashSet;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.util.Version;
 public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase {
  /** This test fails with NPE when the 
   * stopwords file is missing in classpath */
  public void testResourcesAvailable() {
    new PortugueseAnalyzer(Version.LUCENE_CURRENT);
  }
  /** test stopwords and stemming */
  public void testBasics() throws IOException {
    Analyzer a = new PortugueseAnalyzer(Version.LUCENE_CURRENT);
    // stemming
    checkOneTermReuse(a, "quilométricas", "quilométr");
    checkOneTermReuse(a, "quilométricos", "quilométr");
    // stopword
    assertAnalyzesTo(a, "não", new String[] {});
  }
  /** test use of exclusion set */
  public void testExclude() throws IOException {
    Set<String> exclusionSet = new HashSet<String>();
    exclusionSet.add("quilométricas");
    Analyzer a = new PortugueseAnalyzer(Version.LUCENE_CURRENT, 
        PortugueseAnalyzer.getDefaultStopSet(), exclusionSet);
    checkOneTermReuse(a, "quilométricas", "quilométricas");
    checkOneTermReuse(a, "quilométricos", "quilométr");
  }
 }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
@ -0,0 +1,54 @@
 package org.apache.lucene.analysis.ro;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.HashSet;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.util.Version;
 public class TestRomanianAnalyzer extends BaseTokenStreamTestCase {
  /** This test fails with NPE when the 
   * stopwords file is missing in classpath */
  public void testResourcesAvailable() {
    new RomanianAnalyzer(Version.LUCENE_CURRENT);
  }
  /** test stopwords and stemming */
  public void testBasics() throws IOException {
    Analyzer a = new RomanianAnalyzer(Version.LUCENE_CURRENT);
    // stemming
    checkOneTermReuse(a, "absenţa", "absenţ");
    checkOneTermReuse(a, "absenţi", "absenţ");
    // stopword
    assertAnalyzesTo(a, "îl", new String[] {});
  }
  /** test use of exclusion set */
  public void testExclude() throws IOException {
    Set<String> exclusionSet = new HashSet<String>();
    exclusionSet.add("absenţa");
    Analyzer a = new RomanianAnalyzer(Version.LUCENE_CURRENT, 
        RomanianAnalyzer.getDefaultStopSet(), exclusionSet);
    checkOneTermReuse(a, "absenţa", "absenţa");
    checkOneTermReuse(a, "absenţi", "absenţ");
  }
 }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
@ -50,9 +50,14 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
      dataDir = new File(System.getProperty("dataDir", "./bin"));
    }
-    public void testUnicode() throws IOException
+    /**
     * @deprecated remove this test and its datafiles in Lucene 4.0
     * the Snowball version has its own data tests.
     */
    @Deprecated
    public void testUnicode30() throws IOException
    {
-        RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_CURRENT);
+        RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_30);
        inWords =
            new InputStreamReader(
                new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/testUTF8.txt")),
@ -110,12 +115,22 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
        }
    }
    /** @deprecated remove this test in Lucene 4.0: stopwords changed */
    @Deprecated
    public void testReusableTokenStream30() throws Exception {
      Analyzer a = new RussianAnalyzer(Version.LUCENE_30);
      assertAnalyzesToReuse(a, "Вместе с тем о силе электромагнитной энергии имели представление еще",
          new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представлен" });
      assertAnalyzesToReuse(a, "Но знание это хранилось в тайне",
          new String[] { "знан", "хран", "тайн" });
    }
    public void testReusableTokenStream() throws Exception {
      Analyzer a = new RussianAnalyzer(Version.LUCENE_CURRENT);
      assertAnalyzesToReuse(a, "Вместе с тем о силе электромагнитной энергии имели представление еще",
          new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представлен" });
      assertAnalyzesToReuse(a, "Но знание это хранилось в тайне",
-          new String[] { "знан", "хран", "тайн" });
+          new String[] { "знан", "эт", "хран", "тайн" });
    }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java
@ -25,7 +25,9 @@ import org.apache.lucene.util.Version;
 /**
 * Testcase for {@link RussianLetterTokenizer}
 * @deprecated Remove this test class in Lucene 4.0
 */
@Deprecated
 public class TestRussianLetterTokenizer extends BaseTokenStreamTestCase {
  public void testRussianLetterTokenizer() throws IOException {
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java
@ -24,6 +24,10 @@ import java.io.InputStreamReader;
 import java.io.FileInputStream;
 import java.util.ArrayList;
 /**
 * @deprecated Remove this test class (and its datafiles!) in Lucene 4.0
 */
@Deprecated
 public class TestRussianStem extends LuceneTestCase
 {
    private ArrayList words = new ArrayList();
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
@ -22,11 +22,8 @@ import java.io.StringReader;
 import java.util.Collection;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.HashSet;
 import java.util.Arrays;
 import org.apache.lucene.analysis.*;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream;
 import org.apache.lucene.analysis.miscellaneous.PrefixAndSuffixAwareTokenFilter;
 import org.apache.lucene.analysis.miscellaneous.SingleTokenTokenStream;
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
@ -0,0 +1,54 @@
 package org.apache.lucene.analysis.sv;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.HashSet;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.util.Version;
 public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
  /** This test fails with NPE when the 
   * stopwords file is missing in classpath */
  public void testResourcesAvailable() {
    new SwedishAnalyzer(Version.LUCENE_CURRENT);
  }
  /** test stopwords and stemming */
  public void testBasics() throws IOException {
    Analyzer a = new SwedishAnalyzer(Version.LUCENE_CURRENT);
    // stemming
    checkOneTermReuse(a, "jaktkarlarne", "jaktkarl");
    checkOneTermReuse(a, "jaktkarlens", "jaktkarl");
    // stopword
    assertAnalyzesTo(a, "och", new String[] {});
  }
  /** test use of exclusion set */
  public void testExclude() throws IOException {
    Set<String> exclusionSet = new HashSet<String>();
    exclusionSet.add("jaktkarlarne");
    Analyzer a = new SwedishAnalyzer(Version.LUCENE_CURRENT, 
        SwedishAnalyzer.getDefaultStopSet(), exclusionSet);
    checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne");
    checkOneTermReuse(a, "jaktkarlens", "jaktkarl");
  }
 }
--- a/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
+++ b/contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
@ -0,0 +1,54 @@
 package org.apache.lucene.analysis.tr;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.HashSet;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.util.Version;
 public class TestTurkishAnalyzer extends BaseTokenStreamTestCase {
  /** This test fails with NPE when the 
   * stopwords file is missing in classpath */
  public void testResourcesAvailable() {
    new TurkishAnalyzer(Version.LUCENE_CURRENT);
  }
  /** test stopwords and stemming */
  public void testBasics() throws IOException {
    Analyzer a = new TurkishAnalyzer(Version.LUCENE_CURRENT);
    // stemming
    checkOneTermReuse(a, "ağacı", "ağaç");
    checkOneTermReuse(a, "ağaç", "ağaç");
    // stopword
    assertAnalyzesTo(a, "dolayı", new String[] {});
  }
  /** test use of exclusion set */
  public void testExclude() throws IOException {
    Set<String> exclusionSet = new HashSet<String>();
    exclusionSet.add("ağacı");
    Analyzer a = new TurkishAnalyzer(Version.LUCENE_CURRENT, 
        TurkishAnalyzer.getDefaultStopSet(), exclusionSet);
    checkOneTermReuse(a, "ağacı", "ağacı");
    checkOneTermReuse(a, "ağaç", "ağaç");
  }
 }