LUCENE-2503: add light stemmers for european languages

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@964019 13f79535-47bb-0310-9956-ffa450edef68
2010-07-14 12:10:34 +00:00 · 2010-07-14 12:10:34 +00:00 · 3241eb9291
parent d49603b939
commit 3241eb9291
95 changed files with 4686 additions and 367 deletions
--- a/lucene/contrib/CHANGES.txt
+++ b/lucene/contrib/CHANGES.txt
@ -187,6 +187,9 @@ New features
 * LUCENE-2464: FastVectorHighlighter: add SingleFragListBuilder to return
   entire field contents. (Koji Sekiguchi)

+ * LUCENE-2503: Added lighter stemming alternatives for European languages. 
+   (Robert Muir)
+
 Build

 * LUCENE-2124: Moved the JDK-based collation support from contrib/collation 
--- a/modules/analysis/NOTICE.txt
+++ b/modules/analysis/NOTICE.txt
@ -17,30 +17,29 @@ were developed by Martin Porter and Richard Boulton.
 The full snowball package is available from
  http://snowball.tartarus.org/

-The Arabic stemmer (common) comes with a default
-stopword list that is BSD-licensed created by Jacques Savoy.  The file resides in 
-common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt.
+The Arabic,Persian,Romanian,Bulgarian, and Hindi analyzers (common) come with a default
+stopword list that is BSD-licensed created by Jacques Savoy.  These files reside in:
+common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt,
+common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt,
+common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt,
+common/src/resources/org/apache/lucene/analysis/bg/stopwords.txt,
+common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt
 See http://members.unine.ch/jacques.savoy/clef/index.html.

-The Persian analyzer (common) comes with a default
-stopword list that is BSD-licensed created by Jacques Savoy.  The file resides in
-common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt.
-See http://members.unine.ch/jacques.savoy/clef/index.html.
-
-The Romanian analyzer (common) comes with a default
-stopword list that is BSD-licensed created by Jacques Savoy.  The file resides in
-common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt.
-See http://members.unine.ch/jacques.savoy/clef/index.html.
-
-The Bulgarian analyzer (common) comes with a default
-stopword list that is BSD-licensed created by Jacques Savoy.  The file resides in
-common/src/resources/org/apache/lucene/analysis/bg/stopwords.txt.
-See http://members.unine.ch/jacques.savoy/clef/index.html.
-
-The Hindi analyzer (common) comes with a default
-stopword list that is BSD-licensed created by Jacques Savoy.  The file resides in
-common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt.
-See http://members.unine.ch/jacques.savoy/clef/index.html.
+The German,Spanish,Finnish,French,Hungarian,Italian,Portuguese,Russian and Swedish light stemmers
+(common) are based on BSD-licensed reference implementations created by Jacques Savoy and
+Ljiljana Dolamic. These files reside in:
+common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java
+common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java
+common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java
+common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemmer.java
+common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemmer.java
+common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemmer.java
+common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java
+common/src/java/org/apache/lucene/analysis/it/ItalianLightStemmer.java
+common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.java
+common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java
+common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java

 The Stempel analyzer (stempel) includes BSD-licensed software developed 
 by the Egothor project http://egothor.sf.net/, created by Leo Galambos, Martin Kvapil,
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizer.java
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.ar;
 * limitations under the License.
 */

+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
 /**
 *  Normalizer for Arabic.
 *  <p>
@ -96,20 +98,4 @@ public class ArabicNormalizer {

    return len;
  }
-
-  /**
-   * Delete a character in-place
-   * 
-   * @param s Input Buffer
-   * @param pos Position of character to delete
-   * @param len length of input buffer
-   * @return length of input buffer after deletion
-   */
-  protected int delete(char s[], int pos, int len) {
-    if (pos < len) 
-      System.arraycopy(s, pos + 1, s, pos, len - pos - 1);
-
-    return len - 1;
-  }
-
 }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemmer.java
@ -1,4 +1,6 @@
 package org.apache.lucene.analysis.ar;
+
+
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
@ -16,6 +18,8 @@ package org.apache.lucene.analysis.ar;
 * limitations under the License.
 */

+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
 /**
 *  Stemmer for Arabic.
 *  <p>
@ -86,7 +90,7 @@ public class ArabicStemmer {
   */
  public int stemPrefix(char s[], int len) {
    for (int i = 0; i < prefixes.length; i++) 
-      if (startsWith(s, len, prefixes[i]))
+      if (startsWithCheckLength(s, len, prefixes[i]))
        return deleteN(s, 0, len, prefixes[i].length);
    return len;
  }
@ -99,7 +103,7 @@ public class ArabicStemmer {
   */
  public int stemSuffix(char s[], int len) {
    for (int i = 0; i < suffixes.length; i++) 
-      if (endsWith(s, len, suffixes[i]))
+      if (endsWithCheckLength(s, len, suffixes[i]))
        len = deleteN(s, len - suffixes[i].length, len, suffixes[i].length);
    return len;
  }
@ -111,7 +115,7 @@ public class ArabicStemmer {
   * @param prefix prefix to check
   * @return true if the prefix matches and can be stemmed
   */
-  boolean startsWith(char s[], int len, char prefix[]) {
+  boolean startsWithCheckLength(char s[], int len, char prefix[]) {
    if (prefix.length == 1 && len < 4) { // wa- prefix requires at least 3 characters
      return false;
    } else if (len < prefix.length + 2) { // other prefixes require only 2.
@ -132,7 +136,7 @@ public class ArabicStemmer {
   * @param suffix suffix to check
   * @return true if the suffix matches and can be stemmed
   */
-  boolean endsWith(char s[], int len, char suffix[]) {
+  boolean endsWithCheckLength(char s[], int len, char suffix[]) {
    if (len < suffix.length + 2) { // all suffixes require at least 2 characters after stemming
      return false;
    } else {
@ -142,37 +146,5 @@ public class ArabicStemmer {
        
      return true;
    }
-  }
-  
-  
-  /**
-   * Delete n characters in-place
-   * 
-   * @param s Input Buffer
-   * @param pos Position of character to delete
-   * @param len Length of input buffer
-   * @param nChars number of characters to delete
-   * @return length of input buffer after deletion
-   */
-  protected int deleteN(char s[], int pos, int len, int nChars) {
-    for (int i = 0; i < nChars; i++)
-      len = delete(s, pos, len);
-    return len;
-  }
-  
-  /**
-   * Delete a character in-place
-   * 
-   * @param s Input Buffer
-   * @param pos Position of character to delete
-   * @param len length of input buffer
-   * @return length of input buffer after deletion
-   */
-  protected int delete(char s[], int pos, int len) {
-    if (pos < len) 
-      System.arraycopy(s, pos + 1, s, pos, len - pos - 1);
-    
-    return len - 1;
-  }
-  
+  }  
 }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemmer.java
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.bg;
 * limitations under the License.
 */

+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
 /**
 * Light Stemmer for Bulgarian.
 * <p>
@ -138,15 +140,4 @@ public class BulgarianStemmer {
    
    return len;
  }
-  
-  private boolean endsWith(final char s[], final int len, final String suffix) {
-    final int suffixLen = suffix.length();
-    if (suffixLen > len)
-      return false;
-    for (int i = suffixLen - 1; i >= 0; i--)
-      if (s[len -(suffixLen - i)] != suffix.charAt(i))
-        return false;
-    
-    return true;
-  }
 }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemmer.java
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.cz;
 * limitations under the License.
 */

+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
 /**
 * Light Stemmer for Czech.
 * <p>
@ -166,16 +168,4 @@ public class CzechStemmer {

    return len;
  }
-  
-  private boolean endsWith(char s[], int len, String suffix) {
-    int suffixLen = suffix.length();
-    if (suffixLen > len)
-      return false;
-    
-    for (int i = suffixLen - 1; i >= 0; i--)
-      if (s[len - (suffixLen - i)] != suffix.charAt(i))
-        return false;
-    
-    return true;
-  }
 }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilter.java
@ -0,0 +1,58 @@
+package org.apache.lucene.analysis.de;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+/**
+ * A {@link TokenFilter} that applies {@link GermanLightStemmer} to stem German
+ * words.
+ * <p>
+ * To prevent terms from being stemmed use an instance of
+ * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * the {@link KeywordAttribute} before this {@link TokenStream}.
+ * </p>
+ */
+public final class GermanLightStemFilter extends TokenFilter {
+  private final GermanLightStemmer stemmer = new GermanLightStemmer();
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
+
+  public GermanLightStemFilter(TokenStream input) {
+    super(input);
+  }
+  
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      if (!keywordAttr.isKeyword()) {
+        final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+        termAtt.setLength(newlen);
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java
@ -0,0 +1,138 @@
+package org.apache.lucene.analysis.de;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* 
+ * This algorithm is updated based on code located at:
+ * http://members.unine.ch/jacques.savoy/clef/
+ * 
+ * Full copyright for that code follows:
+ */
+
+/*
+ * Copyright (c) 2005, Jacques Savoy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this 
+ * list of conditions and the following disclaimer. Redistributions in binary 
+ * form must reproduce the above copyright notice, this list of conditions and
+ * the following disclaimer in the documentation and/or other materials 
+ * provided with the distribution. Neither the name of the author nor the names 
+ * of its contributors may be used to endorse or promote products derived from 
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Light Stemmer for German.
+ * <p>
+ * This stemmer implements the "UniNE" algorithm in:
+ * <i>Light Stemming Approaches for the French, Portuguese, German and Hungarian Languages</i>
+ * Jacques Savoy
+ */
+public class GermanLightStemmer {
+  
+  public int stem(char s[], int len) {   
+    for (int i = 0; i < len; i++)
+      switch(s[i]) {
+        case 'ä':
+        case 'à':
+        case 'á':
+        case 'â': s[i] = 'a'; break;
+        case 'ö':
+        case 'ò':
+        case 'ó':
+        case 'ô': s[i] = 'o'; break;
+        case 'ï':
+        case 'ì':
+        case 'í':
+        case 'î': s[i] = 'i'; break;
+        case 'ü': 
+        case 'ù': 
+        case 'ú':
+        case 'û': s[i] = 'u'; break;
+      }
+    
+    len = step1(s, len);
+    return step2(s, len);
+  }
+  
+  private boolean stEnding(char ch) {
+    switch(ch) {
+      case 'b':
+      case 'd':
+      case 'f':
+      case 'g':
+      case 'h':
+      case 'k':
+      case 'l':
+      case 'm':
+      case 'n':
+      case 't': return true;
+      default: return false;
+    }
+  }
+  
+  private int step1(char s[], int len) {
+    if (len > 5 && s[len-3] == 'e' && s[len-2] == 'r' && s[len-1] == 'n')
+      return len - 3;
+    
+    if (len > 4 && s[len-2] == 'e')
+      switch(s[len-1]) {
+        case 'm':
+        case 'n':
+        case 'r':
+        case 's': return len - 2;
+      }
+    
+    if (len > 3 && s[len-1] == 'e')
+      return len - 1;
+    
+    if (len > 3 && s[len-1] == 's' && stEnding(s[len-2]))
+      return len - 1;
+    
+    return len;
+  }
+  
+  private int step2(char s[], int len) {
+    if (len > 5 && s[len-3] == 'e' && s[len-2] == 's' && s[len-1] == 't')
+      return len - 3;
+    
+    if (len > 4 && s[len-2] == 'e' && (s[len-1] == 'r' || s[len-1] == 'n'))
+      return len - 2;
+    
+    if (len > 4 && s[len-2] == 's' && s[len-1] == 't' && stEnding(s[len-3]))
+      return len - 2;
+    
+    return len;
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilter.java
@ -0,0 +1,58 @@
+package org.apache.lucene.analysis.de;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+/**
+ * A {@link TokenFilter} that applies {@link GermanMinimalStemmer} to stem German
+ * words.
+ * <p>
+ * To prevent terms from being stemmed use an instance of
+ * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * the {@link KeywordAttribute} before this {@link TokenStream}.
+ * </p>
+ */
+public final class GermanMinimalStemFilter extends TokenFilter {
+  private final GermanMinimalStemmer stemmer = new GermanMinimalStemmer();
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
+
+  public GermanMinimalStemFilter(TokenStream input) {
+    super(input);
+  }
+  
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      if (!keywordAttr.isKeyword()) {
+        final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+        termAtt.setLength(newlen);
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java
@ -0,0 +1,95 @@
+package org.apache.lucene.analysis.de;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* 
+ * This algorithm is updated based on code located at:
+ * http://members.unine.ch/jacques.savoy/clef/
+ * 
+ * Full copyright for that code follows:
+ */
+
+/*
+ * Copyright (c) 2005, Jacques Savoy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this 
+ * list of conditions and the following disclaimer. Redistributions in binary 
+ * form must reproduce the above copyright notice, this list of conditions and
+ * the following disclaimer in the documentation and/or other materials 
+ * provided with the distribution. Neither the name of the author nor the names 
+ * of its contributors may be used to endorse or promote products derived from 
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Minimal Stemmer for German.
+ * <p>
+ * This stemmer implements the following algorithm:
+ * <i>Morphologie et recherche d'information</i>
+ * Jacques Savoy.
+ */
+public class GermanMinimalStemmer {
+  
+  public int stem(char s[], int len) {
+    if (len < 5)
+      return len;
+    
+    for (int i = 0; i < len; i++)
+      switch(s[i]) {
+        case 'ä': s[i] = 'a'; break;
+        case 'ö': s[i] = 'o'; break;
+        case 'ü': s[i] = 'u'; break;
+      }
+    
+    if (len > 6 && s[len-3] == 'n' && s[len-2] == 'e' && s[len-1] == 'n')
+        return len - 3;
+    
+    if (len > 5)
+      switch(s[len-1]) {
+        case 'n': if (s[len-2] == 'e') return len - 2; else break;
+        case 'e': if (s[len-2] == 's') return len - 2; else break;
+        case 's': if (s[len-2] == 'e') return len - 2; else break;
+        case 'r': if (s[len-2] == 'e') return len - 2; else break;
+      }
+    
+    switch(s[len-1]) {
+      case 'n': 
+      case 'e':
+      case 's':
+      case 'r': return len - 1;
+    }
+    
+    return len;
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilter.java
@ -0,0 +1,58 @@
+package org.apache.lucene.analysis.en;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+/**
+ * A {@link TokenFilter} that applies {@link EnglishMinimalStemmer} to stem 
+ * English words.
+ * <p>
+ * To prevent terms from being stemmed use an instance of
+ * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * the {@link KeywordAttribute} before this {@link TokenStream}.
+ * </p>
+ */
+public final class EnglishMinimalStemFilter extends TokenFilter {
+  private final EnglishMinimalStemmer stemmer = new EnglishMinimalStemmer();
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
+
+  public EnglishMinimalStemFilter(TokenStream input) {
+    super(input);
+  }
+  
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      if (!keywordAttr.isKeyword()) {
+        final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+        termAtt.setLength(newlen);
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemmer.java
@ -0,0 +1,45 @@
+package org.apache.lucene.analysis.en;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Minimal plural stemmer for English.
+ * <p>
+ * This stemmer implements the "S-Stemmer" from
+ * <i>How Effective Is Suffixing?</i>
+ * Donna Harman.
+ */
+public class EnglishMinimalStemmer {
+  public int stem(char s[], int len) {
+    if (len < 3 || s[len-1] != 's')
+      return len;
+    
+    switch(s[len-2]) {
+      case 'u':
+      case 's': return len;
+      case 'e':
+        if (len > 3 && s[len-3] == 'i' && s[len-4] != 'a' && s[len-4] != 'e') {
+          s[len - 3] = 'y';
+          return len - 2;
+        }
+        if (s[len-3] == 'i' || s[len-3] == 'a' || s[len-3] == 'o' || s[len-3] == 'e')
+          return len;
+      default: return len - 1;
+    }
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilter.java
@ -0,0 +1,58 @@
+package org.apache.lucene.analysis.es;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+/**
+ * A {@link TokenFilter} that applies {@link SpanishLightStemmer} to stem Spanish
+ * words.
+ * <p>
+ * To prevent terms from being stemmed use an instance of
+ * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * the {@link KeywordAttribute} before this {@link TokenStream}.
+ * </p>
+ */
+public final class SpanishLightStemFilter extends TokenFilter {
+  private final SpanishLightStemmer stemmer = new SpanishLightStemmer();
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
+
+  public SpanishLightStemFilter(TokenStream input) {
+    super(input);
+  }
+  
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      if (!keywordAttr.isKeyword()) {
+        final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+        termAtt.setLength(newlen);
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java
@ -0,0 +1,109 @@
+package org.apache.lucene.analysis.es;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* 
+ * This algorithm is updated based on code located at:
+ * http://members.unine.ch/jacques.savoy/clef/
+ * 
+ * Full copyright for that code follows:
+ */
+
+/*
+ * Copyright (c) 2005, Jacques Savoy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this 
+ * list of conditions and the following disclaimer. Redistributions in binary 
+ * form must reproduce the above copyright notice, this list of conditions and
+ * the following disclaimer in the documentation and/or other materials 
+ * provided with the distribution. Neither the name of the author nor the names 
+ * of its contributors may be used to endorse or promote products derived from 
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Light Stemmer for Spanish
+ * <p>
+ * This stemmer implements the algorithm described in:
+ * <i>Report on CLEF-2001 Experiments</i>
+ * Jacques Savoy
+ */
+public class SpanishLightStemmer {
+  
+  public int stem(char s[], int len) {
+    if (len < 5)
+      return len;
+    
+    for (int i = 0; i < len; i++)
+      switch(s[i]) {
+        case 'à': 
+        case 'á':
+        case 'â':
+        case 'ä': s[i] = 'a'; break;
+        case 'ò':
+        case 'ó':
+        case 'ô':
+        case 'ö': s[i] = 'o'; break;
+        case 'è':
+        case 'é':
+        case 'ê':
+        case 'ë': s[i] = 'e'; break;
+        case 'ù':
+        case 'ú':
+        case 'û':
+        case 'ü': s[i] = 'u'; break;
+        case 'ì':
+        case 'í':
+        case 'î':
+        case 'ï': s[i] = 'i'; break;
+      }
+    
+    switch(s[len-1]) {
+      case 'o':
+      case 'a':
+      case 'e': return len - 1;
+      case 's':
+        if (s[len-2] == 'e' && s[len-3] == 's' && s[len-4] == 'e')
+          return len-2;
+        if (s[len-2] == 'e' && s[len-3] == 'c') {
+          s[len-3] = 'z';
+          return len - 2;
+        }
+        if (s[len-2] == 'o' || s[len-2] == 'a' || s[len-2] == 'e')
+          return len - 2;
+    }
+    
+    return len;
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizer.java
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.fa;
 * limitations under the License.
 */

+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
 /**
 * Normalizer for Persian.
 * <p>
@ -82,20 +84,4 @@ public class PersianNormalizer {

    return len;
  }
-
-  /**
-   * Delete a character in-place
-   * 
-   * @param s Input Buffer
-   * @param pos Position of character to delete
-   * @param len length of input buffer
-   * @return length of input buffer after deletion
-   */
-  protected int delete(char s[], int pos, int len) {
-    if (pos < len)
-      System.arraycopy(s, pos + 1, s, pos, len - pos - 1);
-    
-    return len - 1;
-  }
-
 }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilter.java
@ -0,0 +1,58 @@
+package org.apache.lucene.analysis.fi;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+/**
+ * A {@link TokenFilter} that applies {@link FinnishLightStemmer} to stem Finnish
+ * words.
+ * <p>
+ * To prevent terms from being stemmed use an instance of
+ * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * the {@link KeywordAttribute} before this {@link TokenStream}.
+ * </p>
+ */
+public final class FinnishLightStemFilter extends TokenFilter {
+  private final FinnishLightStemmer stemmer = new FinnishLightStemmer();
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
+
+  public FinnishLightStemFilter(TokenStream input) {
+    super(input);
+  }
+  
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      if (!keywordAttr.isKeyword()) {
+        final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+        termAtt.setLength(newlen);
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemmer.java
@ -0,0 +1,259 @@
+package org.apache.lucene.analysis.fi;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* 
+ * This algorithm is updated based on code located at:
+ * http://members.unine.ch/jacques.savoy/clef/
+ * 
+ * Full copyright for that code follows:
+ */
+
+/*
+ * Copyright (c) 2005, Jacques Savoy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this 
+ * list of conditions and the following disclaimer. Redistributions in binary 
+ * form must reproduce the above copyright notice, this list of conditions and
+ * the following disclaimer in the documentation and/or other materials 
+ * provided with the distribution. Neither the name of the author nor the names 
+ * of its contributors may be used to endorse or promote products derived from 
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+/**
+ * Light Stemmer for Finnish.
+ * <p>
+ * This stemmer implements the algorithm described in:
+ * <i>Report on CLEF-2003 Monolingual Tracks</i>
+ * Jacques Savoy
+ */
+public class FinnishLightStemmer {
+  
+  public int stem(char s[], int len) {
+    if (len < 4)
+      return len;
+    
+    for (int i = 0; i < len; i++)
+      switch(s[i]) {
+        case 'ä':
+        case 'å': s[i] = 'a'; break;
+        case 'ö': s[i] = 'o'; break;
+      }
+    
+    len = step1(s, len);
+    len = step2(s, len);
+    len = step3(s, len);
+    len = norm1(s, len);
+    len = norm2(s, len);
+    return len;
+  }
+  
+  private int step1(char s[], int len) {
+    if (len > 8) {
+      if (endsWith(s, len, "kin"))
+        return step1(s, len-3);
+      if (endsWith(s, len, "ko"))
+        return step1(s, len-2);
+    }
+    
+    if (len > 11) {
+      if (endsWith(s, len, "dellinen"))
+        return len-8;
+      if (endsWith(s, len, "dellisuus"))
+        return len-9;
+    }
+    return len;
+  }
+  
+  private int step2(char s[], int len) {
+    if (len > 5) {
+      if (endsWith(s, len, "lla")
+          || endsWith(s, len, "tse")
+          || endsWith(s, len, "sti"))
+        return len-3;
+      
+      if (endsWith(s, len, "ni"))
+        return len-2;
+      
+      if (endsWith(s, len, "aa"))
+        return len-1; // aa -> a
+    }
+    
+    return len;
+  }
+  
+  private int step3(char s[], int len) {
+    if (len > 8) {
+      if (endsWith(s, len, "nnen")) {
+        s[len-4] = 's';
+        return len-3;
+      }
+      
+      if (endsWith(s, len, "ntena")) {
+        s[len-5] = 's';
+        return len-4;
+      }
+      
+      if (endsWith(s, len, "tten"))
+        return len-4;
+      
+      if (endsWith(s, len, "eiden"))
+        return len-5;
+    }
+    
+    if (len > 6) {
+      if (endsWith(s, len, "neen")
+          || endsWith(s, len, "niin")
+          || endsWith(s, len, "seen")
+          || endsWith(s, len, "teen")
+          || endsWith(s, len, "inen"))
+          return len-4;
+      
+      if (s[len-3] == 'h' && isVowel(s[len-2]) && s[len-1] == 'n')
+        return len-3;
+      
+      if (endsWith(s, len, "den")) {
+        s[len-3] = 's';
+        return len-2;
+      }
+      
+      if (endsWith(s, len, "ksen")) {
+        s[len-4] = 's';
+        return len-3;
+      }
+      
+      if (endsWith(s, len, "ssa")
+          || endsWith(s, len, "sta")
+          || endsWith(s, len, "lla")
+          || endsWith(s, len, "lta")
+          || endsWith(s, len, "tta")
+          || endsWith(s, len, "ksi")
+          || endsWith(s, len, "lle"))
+        return len-3; 
+    }
+    
+    if (len > 5) {
+      if (endsWith(s, len, "na")
+          || endsWith(s, len, "ne"))
+        return len-2;
+      
+      if (endsWith(s, len, "nei"))
+        return len-3;
+    }
+    
+    if (len > 4) {
+      if (endsWith(s, len, "ja")
+          || endsWith(s, len, "ta"))
+        return len-2;
+      
+      if (s[len-1] == 'a')
+        return len-1;
+      
+      if (s[len-1] == 'n' && isVowel(s[len-2]))
+        return len-2;
+      
+      if (s[len-1] == 'n')
+        return len-1;
+    }
+    
+    return len;
+  }
+  
+  private int norm1(char s[], int len) {
+    if (len > 5 && endsWith(s, len, "hde")) {
+        s[len-3] = 'k';
+        s[len-2] = 's';
+        s[len-1] = 'i';
+    }
+    
+    if (len > 4) {
+      if (endsWith(s, len, "ei") || endsWith(s, len, "at"))
+        return len-2;
+    }
+    
+    if (len > 3)
+      switch(s[len-1]) {
+        case 't':
+        case 's':
+        case 'j':
+        case 'e':
+        case 'a':
+        case 'i': return len-1;
+      }
+    
+    return len;
+  }
+  
+  private int norm2(char s[], int len) {
+    if (len > 8) {
+      if (s[len-1] == 'e' 
+          || s[len-1] == 'o' 
+          || s[len-1] == 'u')
+        len--;
+    }
+    
+    if (len > 4) {
+      if (s[len-1] == 'i')
+        len--;
+      
+      if (len > 4) {
+        char ch = s[0];
+        for (int i = 1; i < len; i++) {
+          if (s[i] == ch &&
+              (ch == 'k' || ch == 'p' || ch == 't'))
+            len = delete(s, i--, len);
+          else
+            ch = s[i];
+        }
+      }
+    }
+    
+    return len;
+  }
+  
+  private boolean isVowel(char ch) {
+    switch(ch) {
+      case 'a':
+      case 'e':
+      case 'i':
+      case 'o':
+      case 'u':
+      case 'y': return true;
+      default: return false;
+    }
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilter.java
@ -0,0 +1,58 @@
+package org.apache.lucene.analysis.fr;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+/**
+ * A {@link TokenFilter} that applies {@link FrenchLightStemmer} to stem French
+ * words.
+ * <p>
+ * To prevent terms from being stemmed use an instance of
+ * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * the {@link KeywordAttribute} before this {@link TokenStream}.
+ * </p>
+ */
+public final class FrenchLightStemFilter extends TokenFilter {
+  private final FrenchLightStemmer stemmer = new FrenchLightStemmer();
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
+
+  public FrenchLightStemFilter(TokenStream input) {
+    super(input);
+  }
+  
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      if (!keywordAttr.isKeyword()) {
+        final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+        termAtt.setLength(newlen);
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemmer.java
@ -0,0 +1,267 @@
+package org.apache.lucene.analysis.fr;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* 
+ * This algorithm is updated based on code located at:
+ * http://members.unine.ch/jacques.savoy/clef/
+ * 
+ * Full copyright for that code follows:
+ */
+
+/*
+ * Copyright (c) 2005, Jacques Savoy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this 
+ * list of conditions and the following disclaimer. Redistributions in binary 
+ * form must reproduce the above copyright notice, this list of conditions and
+ * the following disclaimer in the documentation and/or other materials 
+ * provided with the distribution. Neither the name of the author nor the names 
+ * of its contributors may be used to endorse or promote products derived from 
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+/**
+ * Light Stemmer for French.
+ * <p>
+ * This stemmer implements the "UniNE" algorithm in:
+ * <i>Light Stemming Approaches for the French, Portuguese, German and Hungarian Languages</i>
+ * Jacques Savoy
+ */
+public class FrenchLightStemmer {
+ 
+  public int stem(char s[], int len) {
+    if (len > 5 && s[len-1] == 'x') {
+      if (s[len-3] == 'a' && s[len-2] == 'u' && s[len-4] != 'e')
+        s[len-2] = 'l';
+      len--;
+    }
+    
+    if (len > 3 && s[len-1] == 'x')
+      len--;
+    
+    if (len > 3 && s[len-1] == 's')
+      len--;
+    
+    if (len > 9 && endsWith(s, len, "issement")) {
+      len -= 6;
+      s[len-1] = 'r';
+      return norm(s, len);
+    }
+    
+    if (len > 8 && endsWith(s, len, "issant")) {
+      len -= 4;
+      s[len-1] = 'r';
+      return norm(s, len);
+    }
+    
+    if (len > 6 && endsWith(s, len, "ement")) {
+      len -= 4;
+      if (len > 3 && endsWith(s, len, "ive")) {
+        len--;
+        s[len-1] = 'f';
+      }
+      return norm(s, len);
+    }
+    
+    if (len > 11 && endsWith(s, len, "ficatrice")) {
+      len -= 5;
+      s[len-2] = 'e';
+      s[len-1] = 'r';
+      return norm(s, len);
+    }
+    
+    if (len > 10 && endsWith(s, len, "ficateur")) {
+      len -= 4;
+      s[len-2] = 'e';
+      s[len-1] = 'r';
+      return norm(s, len);
+    }
+    
+    if (len > 9 && endsWith(s, len, "catrice")) {
+      len -= 3;
+      s[len-4] = 'q';
+      s[len-3] = 'u';
+      s[len-2] = 'e';
+      //s[len-1] = 'r' <-- unnecessary, already 'r'.
+      return norm(s, len);
+    }
+    
+    if (len > 8 && endsWith(s, len, "cateur")) {
+      len -= 2;
+      s[len-4] = 'q';
+      s[len-3] = 'u';
+      s[len-2] = 'e';
+      s[len-1] = 'r';
+      return norm(s, len);
+    }
+    
+    if (len > 8 && endsWith(s, len, "atrice")) {
+      len -= 4;
+      s[len-2] = 'e';
+      s[len-1] = 'r';
+      return norm(s, len);
+    }
+    
+    if (len > 7 && endsWith(s, len, "ateur")) {
+      len -= 3;
+      s[len-2] = 'e';
+      s[len-1] = 'r';
+      return norm(s, len);
+    }
+    
+    if (len > 6 && endsWith(s, len, "trice")) {
+      len--;
+      s[len-3] = 'e';
+      s[len-2] = 'u';
+      s[len-1] = 'r';
+    }
+    
+    if (len > 5 && endsWith(s, len, "ième"))
+      return norm(s, len-4);
+    
+    if (len > 7 && endsWith(s, len, "teuse")) {
+      len -= 2;
+      s[len-1] = 'r';
+      return norm(s, len);
+    }
+    
+    if (len > 6 && endsWith(s, len, "teur")) {
+      len--;
+      s[len-1] = 'r';
+      return norm(s, len);
+    }
+    
+    if (len > 5 && endsWith(s, len, "euse"))
+      return norm(s, len-2);
+    
+    if (len > 8 && endsWith(s, len, "ère")) {
+      len--;
+      s[len-2] = 'e';
+      return norm(s, len);
+    }
+    
+    if (len > 7 && endsWith(s, len, "ive")) {
+      len--;
+      s[len-1] = 'f';
+      return norm(s, len);
+    }
+    
+    if (len > 4 && 
+        (endsWith(s, len, "folle") ||
+         endsWith(s, len, "molle"))) {
+      len -= 2;
+      s[len-1] = 'u';
+      return norm(s, len);
+    }
+    
+    if (len > 9 && endsWith(s, len, "nnelle"))
+      return norm(s, len-5);
+    
+    if (len > 9 && endsWith(s, len, "nnel"))
+      return norm(s, len-3);
+    
+    if (len > 4 && endsWith(s, len, "ète")) {
+      len--;
+      s[len-2] = 'e';
+    }
+    
+    if (len > 8 && endsWith(s, len, "ique"))
+      len -= 4;
+    
+    if (len > 8 && endsWith(s, len, "esse"))
+      return norm(s, len-3);
+    
+    if (len > 7 && endsWith(s, len, "inage"))
+      return norm(s, len-3);
+    
+    if (len > 9 && endsWith(s, len, "isation")) {
+      len -= 7;
+      if (len > 5 && endsWith(s, len, "ual"))
+        s[len-2] = 'e';
+      return norm(s, len);
+    }
+    
+    if (len > 9 && endsWith(s, len, "isateur"))
+      return norm(s, len-7);
+    
+    if (len > 8 && endsWith(s, len, "ation"))
+      return norm(s, len-5);
+
+    if (len > 8 && endsWith(s, len, "ition"))
+      return norm(s, len-5);
+    
+    return norm(s, len);
+  }
+
+  private int norm(char s[], int len) {
+    if (len > 4) {
+      for (int i = 0; i < len; i++)
+        switch(s[i]) {
+          case 'à': 
+          case 'á':
+          case 'â': s[i] = 'a'; break;
+          case 'ô': s[i] = 'o'; break;
+          case 'è':
+          case 'é':
+          case 'ê': s[i] = 'e'; break;
+          case 'ù':
+          case 'û': s[i] = 'u'; break;
+          case 'î': s[i] = 'i'; break;
+          case 'ç': s[i] = 'c'; break;
+        }
+      
+      char ch = s[0];
+      for (int i = 1; i < len; i++) {
+        if (s[i] == ch)
+          len = delete(s, i--, len);
+        else
+          ch = s[i];
+      }
+    }
+    
+    if (len > 4 && endsWith(s, len, "ie"))
+      len -= 2;
+    
+    if (len > 4) {
+        if (s[len-1] == 'r') len--;
+        if (s[len-1] == 'e') len--;
+        if (s[len-1] == 'e') len--;
+        if (s[len-1] == s[len-2]) len--;
+    }
+    return len;
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilter.java
@ -0,0 +1,58 @@
+package org.apache.lucene.analysis.fr;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+/**
+ * A {@link TokenFilter} that applies {@link FrenchMinimalStemmer} to stem French
+ * words.
+ * <p>
+ * To prevent terms from being stemmed use an instance of
+ * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * the {@link KeywordAttribute} before this {@link TokenStream}.
+ * </p>
+ */
+public final class FrenchMinimalStemFilter extends TokenFilter {
+  private final FrenchMinimalStemmer stemmer = new FrenchMinimalStemmer();
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
+
+  public FrenchMinimalStemFilter(TokenStream input) {
+    super(input);
+  }
+  
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      if (!keywordAttr.isKeyword()) {
+        final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+        termAtt.setLength(newlen);
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemmer.java
@ -0,0 +1,80 @@
+package org.apache.lucene.analysis.fr;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* 
+ * This algorithm is updated based on code located at:
+ * http://members.unine.ch/jacques.savoy/clef/
+ * 
+ * Full copyright for that code follows:
+ */
+
+/*
+ * Copyright (c) 2005, Jacques Savoy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this 
+ * list of conditions and the following disclaimer. Redistributions in binary 
+ * form must reproduce the above copyright notice, this list of conditions and
+ * the following disclaimer in the documentation and/or other materials 
+ * provided with the distribution. Neither the name of the author nor the names 
+ * of its contributors may be used to endorse or promote products derived from 
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Light Stemmer for French.
+ * <p>
+ * This stemmer implements the following algorithm:
+ * <i>A Stemming procedure and stopword list for general French corpora.</i>
+ * Jacques Savoy.
+ */
+public class FrenchMinimalStemmer {
+  public int stem(char s[], int len) {
+    if (len < 6)
+      return len;
+    
+    if (s[len-1] == 'x') {
+      if (s[len-3] == 'a' && s[len-2] == 'u')
+        s[len-2] = 'l';
+      return len - 1;
+    }
+    
+    if (s[len-1] == 's') len--;
+    if (s[len-1] == 'r') len--;
+    if (s[len-1] == 'e') len--;
+    if (s[len-1] == 'é') len--;
+    if (s[len-1] == s[len-2]) len--;
+    return len;
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizer.java
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.hi;
 * limitations under the License.
 */

+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
 /**
 * Normalizer for Hindi.
 * <p>
@ -176,19 +178,4 @@ public class HindiNormalizer {

    return len;
  }
-
-  /**
-   * Delete a character in-place
-   * 
-   * @param s Input Buffer
-   * @param pos Position of character to delete
-   * @param len length of input buffer
-   * @return length of input buffer after deletion
-   */
-  protected int delete(char s[], int pos, int len) {
-    if (pos < len)
-      System.arraycopy(s, pos + 1, s, pos, len - pos - 1);
-    
-    return len - 1;
-  }
 }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemmer.java
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.hi;
 * limitations under the License.
 */

+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
 /**
 * Light Stemmer for Hindi.
 * <p>
@ -116,15 +118,4 @@ public class HindiStemmer {
      return len - 1;
    return len;
  }
-  
-  private boolean endsWith(final char s[], final int len, final String suffix) {
-    final int suffixLen = suffix.length();
-    if (suffixLen > len)
-      return false;
-    for (int i = suffixLen - 1; i >= 0; i--)
-      if (s[len -(suffixLen - i)] != suffix.charAt(i))
-        return false;
-    
-    return true;
-  }
 }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilter.java
@ -0,0 +1,58 @@
+package org.apache.lucene.analysis.hu;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+/**
+ * A {@link TokenFilter} that applies {@link HungarianLightStemmer} to stem
+ * Hungarian words.
+ * <p>
+ * To prevent terms from being stemmed use an instance of
+ * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * the {@link KeywordAttribute} before this {@link TokenStream}.
+ * </p>
+ */
+public final class HungarianLightStemFilter extends TokenFilter {
+  private final HungarianLightStemmer stemmer = new HungarianLightStemmer();
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
+
+  public HungarianLightStemFilter(TokenStream input) {
+    super(input);
+  }
+  
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      if (!keywordAttr.isKeyword()) {
+        final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+        termAtt.setLength(newlen);
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java
@ -0,0 +1,238 @@
+package org.apache.lucene.analysis.hu;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* 
+ * This algorithm is updated based on code located at:
+ * http://members.unine.ch/jacques.savoy/clef/
+ * 
+ * Full copyright for that code follows:
+ */
+
+/*
+ * Copyright (c) 2005, Jacques Savoy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this 
+ * list of conditions and the following disclaimer. Redistributions in binary 
+ * form must reproduce the above copyright notice, this list of conditions and
+ * the following disclaimer in the documentation and/or other materials 
+ * provided with the distribution. Neither the name of the author nor the names 
+ * of its contributors may be used to endorse or promote products derived from 
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+/**
+ * Light Stemmer for Hungarian.
+ * <p>
+ * This stemmer implements the "UniNE" algorithm in:
+ * <i>Light Stemming Approaches for the French, Portuguese, German and Hungarian Languages</i>
+ * Jacques Savoy
+ */
+public class HungarianLightStemmer {
+  public int stem(char s[], int len) {
+    for (int i = 0; i < len; i++)
+      switch(s[i]) {
+        case 'á': s[i] = 'a'; break;
+        case 'ë':
+        case 'é': s[i] = 'e'; break;
+        case 'í': s[i] = 'i'; break;
+        case 'ó':
+        case 'ő':
+        case 'õ':
+        case 'ö': s[i] = 'o'; break;
+        case 'ú':
+        case 'ű':
+        case 'ũ':
+        case 'û':
+        case 'ü': s[i] = 'u'; break;
+      }
+    
+    len = removeCase(s, len);
+    len = removePossessive(s, len);
+    len = removePlural(s, len);
+    return normalize(s, len);
+  }
+  
+  private int removeCase(char s[], int len) {
+    if (len > 6 && endsWith(s, len, "kent"))
+      return len - 4;
+    
+    if (len > 5) {
+      if (endsWith(s, len, "nak") ||
+          endsWith(s, len, "nek") ||
+          endsWith(s, len, "val") ||
+          endsWith(s, len, "vel") ||
+          endsWith(s, len, "ert") ||
+          endsWith(s, len, "rol") ||
+          endsWith(s, len, "ban") ||
+          endsWith(s, len, "ben") ||
+          endsWith(s, len, "bol") ||
+          endsWith(s, len, "nal") ||
+          endsWith(s, len, "nel") ||
+          endsWith(s, len, "hoz") ||
+          endsWith(s, len, "hez") ||
+          endsWith(s, len, "tol"))
+        return len - 3;
+      
+      if (endsWith(s, len, "al") || endsWith(s, len, "el")) {
+        if (!isVowel(s[len-3]) && s[len-3] == s[len-4])
+          return len - 3;
+      }
+    }
+    
+    if (len > 4) {
+      if (endsWith(s, len, "at") ||
+          endsWith(s, len, "et") ||
+          endsWith(s, len, "ot") ||
+          endsWith(s, len, "va") ||
+          endsWith(s, len, "ve") ||
+          endsWith(s, len, "ra") ||
+          endsWith(s, len, "re") ||
+          endsWith(s, len, "ba") ||
+          endsWith(s, len, "be") ||
+          endsWith(s, len, "ul") ||
+          endsWith(s, len, "ig"))
+        return len - 2;
+      
+      if ((endsWith(s, len, "on") || endsWith(s, len, "en")) && !isVowel(s[len-3]))
+          return len - 2;
+      
+      switch(s[len-1]) {
+        case 't':
+        case 'n': return len - 1;
+        case 'a':
+        case 'e': if (s[len-2] == s[len-3] && !isVowel(s[len-2])) return len - 2;
+      }
+    }
+    
+    return len;
+  }
+
+  private int removePossessive(char s[], int len) {
+    if (len > 6) {
+      if (!isVowel(s[len-5]) && 
+         (endsWith(s, len, "atok") || 
+          endsWith(s, len, "otok") || 
+          endsWith(s, len, "etek")))
+        return len - 4;
+      
+      if (endsWith(s, len, "itek") || endsWith(s, len, "itok"))
+        return len - 4;
+    }
+    
+    if (len > 5) {
+      if (!isVowel(s[len-4]) &&
+        (endsWith(s, len, "unk") ||
+         endsWith(s, len, "tok") ||
+         endsWith(s, len, "tek")))
+        return len - 3;
+      
+      if (isVowel(s[len-4]) && endsWith(s, len, "juk"))
+        return len - 3;
+      
+      if (endsWith(s, len, "ink"))
+        return len - 3;
+    }
+    
+    if (len > 4) {
+      if (!isVowel(s[len-3]) &&
+         (endsWith(s, len, "am") ||
+          endsWith(s, len, "em") ||
+          endsWith(s, len, "om") ||
+          endsWith(s, len, "ad") ||
+          endsWith(s, len, "ed") ||
+          endsWith(s, len, "od") ||
+          endsWith(s, len, "uk")))
+        return len - 2;
+      
+      if (isVowel(s[len-3]) &&
+         (endsWith(s, len, "nk") ||
+          endsWith(s, len, "ja") ||
+          endsWith(s, len, "je")))
+        return len - 2;
+      
+      if (endsWith(s, len, "im") ||
+          endsWith(s, len, "id") ||
+          endsWith(s, len, "ik"))
+        return len - 2;
+    }
+    
+    if (len > 3)
+      switch(s[len-1]) {
+        case 'a':
+        case 'e': if (!isVowel(s[len-2])) return len - 1; break;
+        case 'm':
+        case 'd': if (isVowel(s[len-2])) return len - 1; break;
+        case 'i': return len - 1;
+      }
+    
+    return len;
+  }
+
+  private int removePlural(char s[], int len) {
+    if (len > 3 && s[len-1] == 'k')
+      switch(s[len-2]) {
+        case 'a':
+        case 'o':
+        case 'e': if (len > 4) return len - 2; /* intentional fallthru */
+        default: return len - 1;
+      }
+    return len;
+  }
+
+  private int normalize(char s[], int len) {
+    if (len > 3)
+      switch(s[len-1]) {
+        case 'a':
+        case 'e':
+        case 'i':
+        case 'o': return len - 1;
+      }
+    return len;
+  }
+
+  private boolean isVowel(char ch) {
+    switch(ch) {
+      case 'a':
+      case 'e':
+      case 'i':
+      case 'o':
+      case 'u':
+      case 'y': return true;
+      default: return false;
+    }
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianStemmer.java
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.id;
 * limitations under the License.
 */

+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
 /**
 * Stemmer for Indonesian.
 * <p>
@ -266,39 +268,5 @@ public class IndonesianStemmer {
      return length - 1;
    }
    return length;
-  }
-  
-  private boolean startsWith(char s[], int len, String prefix) {
-    final int prefixLen = prefix.length();
-    if (prefixLen > len)
-      return false;
-    for (int i = 0; i < prefixLen; i++)
-      if (s[i] != prefix.charAt(i)) 
-        return false;
-    return true;
-  }
-  
-  private boolean endsWith(char s[], int len, String suffix) {
-    final int suffixLen = suffix.length();
-    if (suffixLen > len)
-      return false;
-    for (int i = suffixLen - 1; i >= 0; i--)
-      if (s[len -(suffixLen - i)] != suffix.charAt(i))
-        return false;
-    
-    return true;
-  }
-  
-  private int deleteN(char s[], int pos, int len, int nChars) {
-    for (int i = 0; i < nChars; i++)
-      len = delete(s, pos, len);
-    return len;
-  }
-  
-  private int delete(char s[], int pos, int len) {
-    if (pos < len) 
-      System.arraycopy(s, pos + 1, s, pos, len - pos - 1);
-    
-    return len - 1;
-  }
+  }  
 }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizer.java
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.in;
 import java.util.BitSet;
 import java.util.IdentityHashMap;
 import static java.lang.Character.UnicodeBlock.*;
+import static org.apache.lucene.analysis.util.StemmerUtil.*;

 /**
 * Normalizes the Unicode representation of text in Indian languages.
@ -290,14 +291,4 @@ public class IndicNormalizer {
    
    return len;
  }
-  
-  /**
-   * Delete a character in-place
-   */
-  private int delete(char s[], int pos, int len) {
-    if (pos < len) 
-      System.arraycopy(s, pos + 1, s, pos, len - pos - 1);
-    
-    return len - 1;
-  }
 }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilter.java
@ -0,0 +1,58 @@
+package org.apache.lucene.analysis.it;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+/**
+ * A {@link TokenFilter} that applies {@link ItalianLightStemmer} to stem Italian
+ * words.
+ * <p>
+ * To prevent terms from being stemmed use an instance of
+ * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * the {@link KeywordAttribute} before this {@link TokenStream}.
+ * </p>
+ */
+public final class ItalianLightStemFilter extends TokenFilter {
+  private final ItalianLightStemmer stemmer = new ItalianLightStemmer();
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
+
+  public ItalianLightStemFilter(TokenStream input) {
+    super(input);
+  }
+  
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      if (!keywordAttr.isKeyword()) {
+        final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+        termAtt.setLength(newlen);
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemmer.java
@ -0,0 +1,117 @@
+package org.apache.lucene.analysis.it;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* 
+ * This algorithm is updated based on code located at:
+ * http://members.unine.ch/jacques.savoy/clef/
+ * 
+ * Full copyright for that code follows:
+ */
+
+/*
+ * Copyright (c) 2005, Jacques Savoy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this 
+ * list of conditions and the following disclaimer. Redistributions in binary 
+ * form must reproduce the above copyright notice, this list of conditions and
+ * the following disclaimer in the documentation and/or other materials 
+ * provided with the distribution. Neither the name of the author nor the names 
+ * of its contributors may be used to endorse or promote products derived from 
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * Light Stemmer for Italian.
+ * <p>
+ * This stemmer implements the algorithm described in:
+ * <i>Report on CLEF-2001 Experiments</i>
+ * Jacques Savoy
+ */
+public class ItalianLightStemmer {
+  
+  public int stem(char s[], int len) {
+    if (len < 6)
+      return len;
+    
+    for (int i = 0; i < len; i++)
+      switch(s[i]) {
+        case 'à': 
+        case 'á':
+        case 'â':
+        case 'ä': s[i] = 'a'; break;
+        case 'ò':
+        case 'ó':
+        case 'ô':
+        case 'ö': s[i] = 'o'; break;
+        case 'è':
+        case 'é':
+        case 'ê':
+        case 'ë': s[i] = 'e'; break;
+        case 'ù':
+        case 'ú':
+        case 'û':
+        case 'ü': s[i] = 'u'; break;
+        case 'ì':
+        case 'í':
+        case 'î':
+        case 'ï': s[i] = 'i'; break;
+      }
+    
+    switch(s[len-1]) {
+      case 'e':
+        if (s[len-2] == 'i' || s[len-2] == 'h')
+          return len - 2;
+        else
+          return len - 1;
+      case 'i':
+        if (s[len-2] == 'h' || s[len-2] == 'i')
+          return len - 2;
+        else
+          return len - 1;
+      case 'a':
+        if (s[len-2] == 'i')
+          return len - 2;
+        else
+          return len - 1;
+      case 'o':
+        if (s[len-2] == 'i')
+          return len - 2;
+        else
+          return len - 1;
+    }
+    
+    return len;
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java
@ -0,0 +1,58 @@
+package org.apache.lucene.analysis.pt;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+/**
+ * A {@link TokenFilter} that applies {@link PortugueseLightStemmer} to stem 
+ * Portuguese words.
+ * <p>
+ * To prevent terms from being stemmed use an instance of
+ * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * the {@link KeywordAttribute} before this {@link TokenStream}.
+ * </p>
+ */
+public final class PortugueseLightStemFilter extends TokenFilter {
+  private final PortugueseLightStemmer stemmer = new PortugueseLightStemmer();
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
+
+  public PortugueseLightStemFilter(TokenStream input) {
+    super(input);
+  }
+  
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      if (!keywordAttr.isKeyword()) {
+        final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+        termAtt.setLength(newlen);
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.java
@ -0,0 +1,202 @@
+package org.apache.lucene.analysis.pt;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* 
+ * This algorithm is updated based on code located at:
+ * http://members.unine.ch/jacques.savoy/clef/
+ * 
+ * Full copyright for that code follows:
+ */
+
+/*
+ * Copyright (c) 2005, Jacques Savoy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this 
+ * list of conditions and the following disclaimer. Redistributions in binary 
+ * form must reproduce the above copyright notice, this list of conditions and
+ * the following disclaimer in the documentation and/or other materials 
+ * provided with the distribution. Neither the name of the author nor the names 
+ * of its contributors may be used to endorse or promote products derived from 
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+/**
+ * Light Stemmer for Portuguese
+ */
+public class PortugueseLightStemmer {
+  
+  public int stem(char s[], int len) {
+    if (len < 4)
+      return len;
+    
+    len = removeSuffix(s, len);
+    
+    if (len > 3 && s[len-1] == 'a')
+      len = normFeminine(s, len);
+    
+    if (len > 4)
+      switch(s[len-1]) {
+        case 'e':
+        case 'a':
+        case 'o': len--; break;
+      }
+    
+    for (int i = 0; i < len; i++)
+      switch(s[i]) {
+        case 'à': 
+        case 'á':
+        case 'â':
+        case 'ä': 
+        case 'ã': s[i] = 'a'; break;
+        case 'ò':
+        case 'ó':
+        case 'ô':
+        case 'ö': 
+        case 'õ': s[i] = 'o'; break;
+        case 'è':
+        case 'é':
+        case 'ê':
+        case 'ë': s[i] = 'e'; break;
+        case 'ù':
+        case 'ú':
+        case 'û':
+        case 'ü': s[i] = 'u'; break;
+        case 'ì':
+        case 'í':
+        case 'î':
+        case 'ï': s[i] = 'i'; break;
+        case 'ç': s[i] = 'c'; break;
+      }
+
+    return len;
+  }
+  
+  private int removeSuffix(char s[], int len) {
+    if (len > 4 && endsWith(s, len, "es"))
+      switch(s[len-3]) {
+        case 'r':
+        case 's':
+        case 'l':
+        case 'z': return len - 2;
+      }
+    
+    if (len > 3 && endsWith(s, len, "ns")) {
+      s[len - 2] = 'm';
+      return len - 1;
+    }
+    
+    if (len > 4 && (endsWith(s, len, "eis") || endsWith(s, len, "éis"))) {
+      s[len - 3] = 'e';
+      s[len - 2] = 'l';
+      return len - 1;
+    }
+    
+    if (len > 4 && endsWith(s, len, "ais")) {
+      s[len - 2] = 'l';
+      return len - 1;
+    }
+    
+    if (len > 4 && endsWith(s, len, "óis")) {
+      s[len - 3] = 'o';
+      s[len - 2] = 'l';
+      return len - 1;
+    }
+    
+    if (len > 4 && endsWith(s, len, "is")) {
+      s[len - 1] = 'l';
+      return len;
+    }
+    
+    if (len > 3 &&
+        (endsWith(s, len, "ões") ||
+         endsWith(s, len, "ães"))) {
+      len--;
+      s[len - 2] = 'ã';
+      s[len - 1] = 'o';
+      return len;
+    }
+    
+    if (len > 6 && endsWith(s, len, "mente"))
+      return len - 5;
+    
+    if (len > 3 && s[len-1] == 's')
+      return len - 1;
+    return len;
+  }
+
+  private int normFeminine(char s[], int len) {
+    if (len > 7 && 
+        (endsWith(s, len, "inha") ||
+         endsWith(s, len, "iaca") ||
+         endsWith(s, len, "eira"))) {
+      s[len - 1] = 'o';
+      return len;
+    }
+    
+    if (len > 6) {
+      if (endsWith(s, len, "osa") ||
+          endsWith(s, len, "ica") ||
+          endsWith(s, len, "ida") ||
+          endsWith(s, len, "ada") ||
+          endsWith(s, len, "iva") ||
+          endsWith(s, len, "ama")) {
+        s[len - 1] = 'o';
+        return len;
+      }
+      
+      if (endsWith(s, len, "ona")) {
+        s[len - 3] = 'ã';
+        s[len - 2] = 'o';
+        return len - 1;
+      }
+      
+      if (endsWith(s, len, "ora"))
+        return len - 1;
+      
+      if (endsWith(s, len, "esa")) {
+        s[len - 3] = 'ê';
+        return len - 1;
+      }
+      
+      if (endsWith(s, len, "na")) {
+        s[len - 1] = 'o';
+        return len;
+      }
+    }
+    return len;
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java
@ -0,0 +1,58 @@
+package org.apache.lucene.analysis.pt;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+/**
+ * A {@link TokenFilter} that applies {@link PortugueseMinimalStemmer} to stem 
+ * Portuguese words.
+ * <p>
+ * To prevent terms from being stemmed use an instance of
+ * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * the {@link KeywordAttribute} before this {@link TokenStream}.
+ * </p>
+ */
+public final class PortugueseMinimalStemFilter extends TokenFilter {
+  private final PortugueseMinimalStemmer stemmer = new PortugueseMinimalStemmer();
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
+
+  public PortugueseMinimalStemFilter(TokenStream input) {
+    super(input);
+  }
+  
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      if (!keywordAttr.isKeyword()) {
+        final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+        termAtt.setLength(newlen);
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java
@ -0,0 +1,119 @@
+package org.apache.lucene.analysis.pt;
+
+import java.util.Arrays;
+
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.util.Version;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Minimal Stemmer for Portuguese
+ * <p>
+ * This follows the "RSLP-S" algorithm presented in:
+ * <i>A study on the Use of Stemming for Monolingual Ad-Hoc Portuguese
+ * Information Retrieval</i> (Orengo, et al)
+ * which is just the plural reduction step of the RSLP
+ * algorithm from <i>A Stemming Algorithmm for the Portuguese Language</i>,
+ * Orengo et al.
+ */
+public class PortugueseMinimalStemmer {
+  
+  private static final CharArraySet excIS = new CharArraySet(Version.LUCENE_31,
+      Arrays.asList("lápis", "cais", "mais", "crúcis", "biquínis", "pois", 
+          "depois","dois","leis"),
+      false);
+  
+  private static final CharArraySet excS = new CharArraySet(Version.LUCENE_31,
+      Arrays.asList("aliás", "pires", "lápis", "cais", "mais", "mas", "menos",
+          "férias", "fezes", "pêsames", "crúcis", "gás", "atrás", "moisés",
+          "através", "convés", "ês", "país", "após", "ambas", "ambos",
+          "messias", "depois"), 
+      false);
+  
+  public int stem(char s[], int len) {
+    if (len < 3 || s[len-1] != 's')
+      return len;
+    
+    if (s[len-2] == 'n') {
+      len--;
+      s[len-1] = 'm';
+      return len;
+    }
+    
+    if (len >= 6 && s[len-3] == 'õ' && s[len-2] == 'e') {
+      len--;
+      s[len-2] = 'ã';
+      s[len-1] = 'o';
+      return len;
+    }
+      
+    if (len >= 4 && s[len-3] == 'ã' && s[len-2] == 'e')
+      if (!(len == 4 && s[0] == 'm')) {
+        len--;
+        s[len-1] = 'o';
+        return len;
+      }
+    
+    if (len >= 4 && s[len-2] == 'i') {
+      if (s[len-3] == 'a')
+        if (!(len == 4 && (s[0] == 'c' || s[0] == 'm'))) {
+          len--;
+          s[len-1] = 'l';
+          return len;
+        }
+   
+      if (len >= 5 && s[len-3] == 'é') {
+        len--;
+        s[len-2] = 'e';
+        s[len-1] = 'l';
+        return len;
+      }
+    
+      if (len >= 5 && s[len-3] == 'e') {
+        len--;
+        s[len-1] = 'l';
+        return len;
+      }
+    
+      if (len >= 5 && s[len-3] == 'ó') {
+        len--;
+        s[len-2] = 'o';
+        s[len-1] = 'l';
+        return len;
+      }
+  
+      if (!excIS.contains(s, 0, len)) {
+        s[len-1] = 'l';
+        return len;
+      }
+    }
+    
+    if (len >= 6 && s[len-3] == 'l' && s[len-2] == 'e')
+      return len - 2;
+    
+    if (len >= 6 && s[len-3] == 'r' && s[len-2] == 'e')
+      if (!(len == 7 && s[0] == 'á' && s[1] == 'r' && s[2] == 'v' && s[3] == 'o'))
+        return len - 2;
+      
+    if (excS.contains(s, 0, len))
+      return len;
+    else
+      return len-1;
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java
@ -0,0 +1,58 @@
+package org.apache.lucene.analysis.ru;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+/**
+ * A {@link TokenFilter} that applies {@link RussianLightStemmer} to stem Russian
+ * words.
+ * <p>
+ * To prevent terms from being stemmed use an instance of
+ * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * the {@link KeywordAttribute} before this {@link TokenStream}.
+ * </p>
+ */
+public final class RussianLightStemFilter extends TokenFilter {
+  private final RussianLightStemmer stemmer = new RussianLightStemmer();
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
+
+  public RussianLightStemFilter(TokenStream input) {
+    super(input);
+  }
+  
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      if (!keywordAttr.isKeyword()) {
+        final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+        termAtt.setLength(newlen);
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java
@ -0,0 +1,153 @@
+package org.apache.lucene.analysis.ru;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* 
+ * This algorithm is updated based on code located at:
+ * http://members.unine.ch/jacques.savoy/clef/
+ * 
+ * Full copyright for that code follows:
+ */
+
+/*
+ * Copyright (c) 2005, Jacques Savoy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this 
+ * list of conditions and the following disclaimer. Redistributions in binary 
+ * form must reproduce the above copyright notice, this list of conditions and
+ * the following disclaimer in the documentation and/or other materials 
+ * provided with the distribution. Neither the name of the author nor the names 
+ * of its contributors may be used to endorse or promote products derived from 
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+/**
+ * Light Stemmer for Russian.
+ * <p>
+ * This stemmer implements the following algorithm:
+ * <i>Indexing and Searching Strategies for the Russian Language.</i>
+ * Ljiljana Dolamic and Jacques Savoy.
+ */
+public class RussianLightStemmer {
+
+  public int stem(char s[], int len) {
+    len = removeCase(s, len);
+    return normalize(s, len);
+  }
+  
+  private int normalize(char s[], int len) {
+    if (len > 3)
+      switch(s[len-1]) { 
+        case 'ь':
+        case 'и': return len - 1;
+        case 'н': if (s[len-2] == 'н') return len - 1;
+      }
+    return len;
+  }
+
+  private int removeCase(char s[], int len) {
+    if (len > 6 && 
+        (endsWith(s, len, "иями") ||
+         endsWith(s, len, "оями")))
+      return len - 4;
+    
+    if (len > 5 && 
+        (endsWith(s, len, "иям") ||
+         endsWith(s, len, "иях") ||
+         endsWith(s, len, "оях") ||
+         endsWith(s, len, "ями") ||
+         endsWith(s, len, "оям") ||
+         endsWith(s, len, "оьв") ||
+         endsWith(s, len, "ами") ||
+         endsWith(s, len, "его") ||
+         endsWith(s, len, "ему") ||
+         endsWith(s, len, "ери") ||
+         endsWith(s, len, "ими") ||
+         endsWith(s, len, "ого") ||
+         endsWith(s, len, "ому") ||
+         endsWith(s, len, "ыми") ||
+         endsWith(s, len, "оев")))
+      return len - 3;
+    
+    if (len > 4 &&
+        (endsWith(s, len, "ая") ||
+         endsWith(s, len, "яя") ||
+         endsWith(s, len, "ях") ||
+         endsWith(s, len, "юю") ||
+         endsWith(s, len, "ах") ||
+         endsWith(s, len, "ею") ||
+         endsWith(s, len, "их") ||
+         endsWith(s, len, "ия") ||
+         endsWith(s, len, "ию") ||
+         endsWith(s, len, "ьв") ||
+         endsWith(s, len, "ою") ||
+         endsWith(s, len, "ую") ||
+         endsWith(s, len, "ям") ||
+         endsWith(s, len, "ых") ||
+         endsWith(s, len, "ея") ||
+         endsWith(s, len, "ам") ||
+         endsWith(s, len, "ем") ||
+         endsWith(s, len, "ей") ||
+         endsWith(s, len, "ём") ||
+         endsWith(s, len, "ев") ||
+         endsWith(s, len, "ий") ||
+         endsWith(s, len, "им") ||
+         endsWith(s, len, "ое") ||
+         endsWith(s, len, "ой") ||
+         endsWith(s, len, "ом") ||
+         endsWith(s, len, "ов") ||
+         endsWith(s, len, "ые") ||
+         endsWith(s, len, "ый") ||
+         endsWith(s, len, "ым") ||
+         endsWith(s, len, "ми")))
+      return len - 2;
+    
+    if (len > 3)
+      switch(s[len-1]) {
+        case 'а':
+        case 'е':
+        case 'и':
+        case 'о':
+        case 'у':
+        case 'й':
+        case 'ы':
+        case 'я':
+        case 'ь': return len - 1;
+      }
+    
+    return len;
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java
@ -0,0 +1,58 @@
+package org.apache.lucene.analysis.sv;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+/**
+ * A {@link TokenFilter} that applies {@link SwedishLightStemmer} to stem Swedish
+ * words.
+ * <p>
+ * To prevent terms from being stemmed use an instance of
+ * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * the {@link KeywordAttribute} before this {@link TokenStream}.
+ * </p>
+ */
+public final class SwedishLightStemFilter extends TokenFilter {
+  private final SwedishLightStemmer stemmer = new SwedishLightStemmer();
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
+
+  public SwedishLightStemFilter(TokenStream input) {
+    super(input);
+  }
+  
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      if (!keywordAttr.isKeyword()) {
+        final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+        termAtt.setLength(newlen);
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java
@ -0,0 +1,111 @@
+package org.apache.lucene.analysis.sv;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* 
+ * This algorithm is updated based on code located at:
+ * http://members.unine.ch/jacques.savoy/clef/
+ * 
+ * Full copyright for that code follows:
+ */
+
+/*
+ * Copyright (c) 2005, Jacques Savoy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this 
+ * list of conditions and the following disclaimer. Redistributions in binary 
+ * form must reproduce the above copyright notice, this list of conditions and
+ * the following disclaimer in the documentation and/or other materials 
+ * provided with the distribution. Neither the name of the author nor the names 
+ * of its contributors may be used to endorse or promote products derived from 
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+/**
+ * Light Stemmer for Swedish.
+ * <p>
+ * This stemmer implements the algorithm described in:
+ * <i>Report on CLEF-2003 Monolingual Tracks</i>
+ * Jacques Savoy
+ */
+public class SwedishLightStemmer {
+  
+  public int stem(char s[], int len) {   
+    if (len > 4 && s[len-1] == 's')
+      len--;
+    
+    if (len > 7 && 
+        (endsWith(s, len, "elser") || 
+         endsWith(s, len, "heten")))
+      return len - 5;
+    
+    if (len > 6 &&
+        (endsWith(s, len, "arne") ||
+         endsWith(s, len, "erna") ||
+         endsWith(s, len, "ande") ||
+         endsWith(s, len, "else") ||
+         endsWith(s, len, "aste") ||
+         endsWith(s, len, "orna") ||
+         endsWith(s, len, "aren")))
+      return len - 4;
+    
+    if (len > 5 &&
+        (endsWith(s, len, "are") ||
+         endsWith(s, len, "ast") ||
+         endsWith(s, len, "het")))
+      return len - 3;
+    
+    if (len > 4 &&
+        (endsWith(s, len, "ar") ||
+         endsWith(s, len, "er") ||
+         endsWith(s, len, "or") ||
+         endsWith(s, len, "en") ||
+         endsWith(s, len, "at") ||
+         endsWith(s, len, "te") ||
+         endsWith(s, len, "et")))
+      return len - 2;
+    
+    if (len > 3)
+      switch(s[len-1]) {
+        case 't':
+        case 'a':
+        case 'e':
+        case 'n': return len - 1;
+      }
+    
+    return len;
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java
@ -0,0 +1,89 @@
+package org.apache.lucene.analysis.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Some commonly-used stemming functions */
+public class StemmerUtil {
+  /**
+   * Returns true if the character array starts with the suffix.
+   * 
+   * @param s Input Buffer
+   * @param len length of input buffer
+   * @param suffix Suffix string to test
+   * @return true if <code>s</code> starts with <code>suffix</code>
+   */
+  public static boolean startsWith(char s[], int len, String prefix) {
+    final int prefixLen = prefix.length();
+    if (prefixLen > len)
+      return false;
+    for (int i = 0; i < prefixLen; i++)
+      if (s[i] != prefix.charAt(i)) 
+        return false;
+    return true;
+  }
+  
+  /**
+   * Returns true if the character array ends with the suffix.
+   * 
+   * @param s Input Buffer
+   * @param len length of input buffer
+   * @param suffix Suffix string to test
+   * @return true if <code>s</code> ends with <code>suffix</code>
+   */
+  public static boolean endsWith(char s[], int len, String suffix) {
+    final int suffixLen = suffix.length();
+    if (suffixLen > len)
+      return false;
+    for (int i = suffixLen - 1; i >= 0; i--)
+      if (s[len -(suffixLen - i)] != suffix.charAt(i))
+        return false;
+    
+    return true;
+  }
+  
+  /**
+   * Delete a character in-place
+   * 
+   * @param s Input Buffer
+   * @param pos Position of character to delete
+   * @param len length of input buffer
+   * @return length of input buffer after deletion
+   */
+  public static int delete(char s[], int pos, int len) {
+    if (pos < len) 
+      System.arraycopy(s, pos + 1, s, pos, len - pos - 1);
+    
+    return len - 1;
+  }
+  
+  /**
+   * Delete n characters in-place
+   * 
+   * @param s Input Buffer
+   * @param pos Position of character to delete
+   * @param len Length of input buffer
+   * @param nChars number of characters to delete
+   * @return length of input buffer after deletion
+   */
+  public static int deleteN(char s[], int pos, int len, int nChars) {
+    // TODO: speed up, this is silly
+    for (int i = 0; i < nChars; i++)
+      len = delete(s, pos, len);
+    return len;
+  }
+}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java
@ -0,0 +1,48 @@
+package org.apache.lucene.analysis.de;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+
+import static org.apache.lucene.analysis.util.VocabularyAssert.*;
+
+/**
+ * Simple tests for {@link GermanLightStemFilter}
+ */
+public class TestGermanLightStemFilter extends BaseTokenStreamTestCase {
+  private Analyzer analyzer = new ReusableAnalyzerBase() {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName,
+        Reader reader) {
+      Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+      return new TokenStreamComponents(source, new GermanLightStemFilter(source));
+    }
+  };
+  
+  /** Test against a vocabulary from the reference impl */
+  public void testVocabulary() throws IOException {
+    assertVocabulary(analyzer, getDataFile("delighttestdata.zip"), "delight.txt");
+  }
+}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java
@ -0,0 +1,60 @@
+package org.apache.lucene.analysis.de;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+
+import static org.apache.lucene.analysis.util.VocabularyAssert.*;
+
+/**
+ * Simple tests for {@link GermanMinimalStemFilter}
+ */
+public class TestGermanMinimalStemFilter extends BaseTokenStreamTestCase {
+  private Analyzer analyzer = new ReusableAnalyzerBase() {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName,
+        Reader reader) {
+      Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+      return new TokenStreamComponents(source, new GermanMinimalStemFilter(source));
+    }
+  };
+  
+  /** Test some examples from the paper */
+  public void testExamples() throws IOException {
+    checkOneTerm(analyzer, "sängerinnen", "sangerin");
+    checkOneTerm(analyzer, "frauen", "frau");
+    checkOneTerm(analyzer, "kenntnisse", "kenntnis");
+    checkOneTerm(analyzer, "staates", "staat");
+    checkOneTerm(analyzer, "bilder", "bild");
+    checkOneTerm(analyzer, "boote", "boot");
+    checkOneTerm(analyzer, "götter", "gott");
+    checkOneTerm(analyzer, "äpfel", "apfel");
+  }
+  
+  /** Test against a vocabulary from the reference impl */
+  public void testVocabulary() throws IOException {
+    assertVocabulary(analyzer, getDataFile("deminimaltestdata.zip"), "deminimal.txt");
+  }
+}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
@ -17,17 +17,17 @@ package org.apache.lucene.analysis.de;
 * limitations under the License.
 */

-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.InputStreamReader;
-import java.io.StringReader;
+import java.io.InputStream;
+import java.io.Reader;

+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+
+import static org.apache.lucene.analysis.util.VocabularyAssert.*;

 /**
 * Test the German stemmer. The stemming algorithm is known to work less 
@ -38,25 +38,18 @@ import org.apache.lucene.analysis.core.LowerCaseFilter;
 public class TestGermanStemFilter extends BaseTokenStreamTestCase {

  public void testStemming() throws Exception {
-    Tokenizer tokenizer = new KeywordTokenizer(new StringReader(""));
-    TokenFilter filter = new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, tokenizer));
-    // read test cases from external file:
-    InputStreamReader isr = new InputStreamReader(getClass().getResourceAsStream("data.txt"), "iso-8859-1");
-    BufferedReader breader = new BufferedReader(isr);
-    while(true) {
-      String line = breader.readLine();
-      if (line == null)
-        break;
-      line = line.trim();
-      if (line.startsWith("#") || line.equals(""))
-        continue;    // ignore comments and empty lines
-      String[] parts = line.split(";");
-      //System.out.println(parts[0] + " -- " + parts[1]);
-      tokenizer.reset(new StringReader(parts[0]));
-      filter.reset();
-      assertTokenStreamContents(filter, new String[] { parts[1] });
-    }
-    breader.close();
-    isr.close();
+    Analyzer analyzer = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName,
+          Reader reader) {
+        Tokenizer t = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(t,
+            new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t)));
+      }
+    };
+    
+    InputStream vocOut = getClass().getResourceAsStream("data.txt");
+    assertVocabulary(analyzer, vocOut);
+    vocOut.close();
  }
 }
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/de/data.txt
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/de/data.txt
@ -1,48 +1,48 @@
 # German special characters are replaced:
-häufig;haufig
+häufig	haufig

 # here the stemmer works okay, it maps related words to the same stem:
-abschließen;abschliess
-abschließender;abschliess
-abschließendes;abschliess
-abschließenden;abschliess
+abschließen	abschliess
+abschließender	abschliess
+abschließendes	abschliess
+abschließenden	abschliess

-Tisch;tisch
-Tische;tisch
-Tischen;tisch
+Tisch	tisch
+Tische	tisch
+Tischen	tisch

-Haus;hau
-Hauses;hau
-Häuser;hau
-Häusern;hau
+Haus	hau
+Hauses	hau
+Häuser	hau
+Häusern	hau
 # here's a case where overstemming occurs, i.e. a word is 
 # mapped to the same stem as unrelated words:
-hauen;hau
+hauen	hau

 # here's a case where understemming occurs, i.e. two related words
 # are not mapped to the same stem. This is the case with basically
 # all irregular forms:
-Drama;drama
-Dramen;dram
+Drama	drama
+Dramen	dram

-# replace "ß" with 'ss':
-Ausmaß;ausmass
+# replace "ß" with 'ss':
+Ausmaß	ausmass

 # fake words to test if suffixes are cut off:
-xxxxxe;xxxxx
-xxxxxs;xxxxx
-xxxxxn;xxxxx
-xxxxxt;xxxxx
-xxxxxem;xxxxx
-xxxxxer;xxxxx
-xxxxxnd;xxxxx
+xxxxxe	xxxxx
+xxxxxs	xxxxx
+xxxxxn	xxxxx
+xxxxxt	xxxxx
+xxxxxem	xxxxx
+xxxxxer	xxxxx
+xxxxxnd	xxxxx
 # the suffixes are also removed when combined:
-xxxxxetende;xxxxx
+xxxxxetende	xxxxx

 # words that are shorter than four charcters are not changed:
-xxe;xxe
+xxe	xxe
 # -em and -er are not removed from words shorter than five characters:
-xxem;xxem
-xxer;xxer
+xxem	xxem
+xxer	xxer
 # -nd is not removed from words shorter than six characters:
-xxxnd;xxxnd
+xxxnd	xxxnd
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/de/delighttestdata.zip
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/de/delighttestdata.zip
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/de/deminimaltestdata.zip
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/de/deminimaltestdata.zip
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.en;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+
+/**
+ * Simple tests for {@link EnglishMinimalStemFilter}
+ */
+public class TestEnglishMinimalStemFilter extends BaseTokenStreamTestCase {
+  private Analyzer analyzer = new ReusableAnalyzerBase() {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName,
+        Reader reader) {
+      Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+      return new TokenStreamComponents(source, new EnglishMinimalStemFilter(source));
+    }
+  };
+  
+  /** Test some examples from various papers about this technique */
+  public void testExamples() throws IOException {
+    checkOneTerm(analyzer, "queries", "query");
+    checkOneTerm(analyzer, "phrases", "phrase");
+    checkOneTerm(analyzer, "corpus", "corpus");
+    checkOneTerm(analyzer, "stress", "stress");
+    checkOneTerm(analyzer, "kings", "king");
+    checkOneTerm(analyzer, "panels", "panel");
+    checkOneTerm(analyzer, "aerodynamics", "aerodynamic");
+    checkOneTerm(analyzer, "congress", "congress");
+    checkOneTerm(analyzer, "serious", "serious");
+  }
+}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java
@ -17,21 +17,22 @@ package org.apache.lucene.analysis.en;
 * limitations under the License.
 */

-import java.io.BufferedReader;
 import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
+import java.io.Reader;
 import java.io.StringReader;
-import java.util.zip.ZipFile;

 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
 import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;

+import static org.apache.lucene.analysis.util.VocabularyAssert.*;
+
 /**
 * Test the PorterStemFilter with Martin Porter's test data.
 */
@ -41,26 +42,16 @@ public class TestPorterStemFilter extends BaseTokenStreamTestCase {
   * The output should be the same as the string in output.txt
   */
  public void testPorterStemFilter() throws Exception {
-    Tokenizer tokenizer = new KeywordTokenizer(new StringReader(""));
-    TokenStream filter = new PorterStemFilter(tokenizer);   
-    ZipFile zipFile = new ZipFile(getDataFile("porterTestData.zip"));
-    InputStream voc = zipFile.getInputStream(zipFile.getEntry("voc.txt"));
-    InputStream out = zipFile.getInputStream(zipFile.getEntry("output.txt"));
-    BufferedReader vocReader = new BufferedReader(new InputStreamReader(
-        voc, "UTF-8"));
-    BufferedReader outputReader = new BufferedReader(new InputStreamReader(
-        out, "UTF-8"));
-    String inputWord = null;
-    while ((inputWord = vocReader.readLine()) != null) {
-      String expectedWord = outputReader.readLine();
-      assertNotNull(expectedWord);
-      tokenizer.reset(new StringReader(inputWord));
-      filter.reset();
-      assertTokenStreamContents(filter, new String[] { expectedWord });
-    }
-    vocReader.close();
-    outputReader.close();
-    zipFile.close();
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName,
+          Reader reader) {
+        Tokenizer t = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(t, new PorterStemFilter(t));
+      }
+    };
+
+    assertVocabulary(a, getDataFile("porterTestData.zip"), "voc.txt", "output.txt");
  }
  
  public void testWithKeywordAttribute() throws IOException {
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java
@ -0,0 +1,48 @@
+package org.apache.lucene.analysis.es;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+
+import static org.apache.lucene.analysis.util.VocabularyAssert.*;
+
+/**
+ * Simple tests for {@link SpanishLightStemFilter}
+ */
+public class TestSpanishLightStemFilter extends BaseTokenStreamTestCase {
+  private Analyzer analyzer = new ReusableAnalyzerBase() {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName,
+        Reader reader) {
+      Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+      return new TokenStreamComponents(source, new SpanishLightStemFilter(source));
+    }
+  };
+  
+  /** Test against a vocabulary from the reference impl */
+  public void testVocabulary() throws IOException {
+    assertVocabulary(analyzer, getDataFile("eslighttestdata.zip"), "eslight.txt");
+  }
+}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/es/eslighttestdata.zip
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/es/eslighttestdata.zip
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java
@ -0,0 +1,48 @@
+package org.apache.lucene.analysis.fi;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+
+import static org.apache.lucene.analysis.util.VocabularyAssert.*;
+
+/**
+ * Simple tests for {@link FinnishLightStemFilter}
+ */
+public class TestFinnishLightStemFilter extends BaseTokenStreamTestCase {
+  private Analyzer analyzer = new ReusableAnalyzerBase() {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName,
+        Reader reader) {
+      Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+      return new TokenStreamComponents(source, new FinnishLightStemFilter(source));
+    }
+  };
+  
+  /** Test against a vocabulary from the reference impl */
+  public void testVocabulary() throws IOException {
+    assertVocabulary(analyzer, getDataFile("filighttestdata.zip"), "filight.txt");
+  }
+}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/filighttestdata.zip
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/fi/filighttestdata.zip
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java
@ -0,0 +1,162 @@
+package org.apache.lucene.analysis.fr;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+
+import static org.apache.lucene.analysis.util.VocabularyAssert.*;
+
+/**
+ * Simple tests for {@link FrenchLightStemFilter}
+ */
+public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase {
+  private Analyzer analyzer = new ReusableAnalyzerBase() {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName,
+        Reader reader) {
+      Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+      return new TokenStreamComponents(source, new FrenchLightStemFilter(source));
+    }
+  };
+  
+  /** Test some examples from the paper */
+  public void testExamples() throws IOException {
+    checkOneTerm(analyzer, "chevaux", "cheval");
+    checkOneTerm(analyzer, "cheval", "cheval");
+    
+    checkOneTerm(analyzer, "hiboux", "hibou");
+    checkOneTerm(analyzer, "hibou", "hibou");
+    
+    checkOneTerm(analyzer, "chantés", "chant");
+    checkOneTerm(analyzer, "chanter", "chant");
+    checkOneTerm(analyzer, "chante", "chant");
+    checkOneTerm(analyzer, "chant", "chant");
+    
+    checkOneTerm(analyzer, "baronnes", "baron");
+    checkOneTerm(analyzer, "barons", "baron");
+    checkOneTerm(analyzer, "baron", "baron");
+    
+    checkOneTerm(analyzer, "peaux", "peau");
+    checkOneTerm(analyzer, "peau", "peau");
+    
+    checkOneTerm(analyzer, "anneaux", "aneau");
+    checkOneTerm(analyzer, "anneau", "aneau");
+    
+    checkOneTerm(analyzer, "neveux", "neveu");
+    checkOneTerm(analyzer, "neveu", "neveu");
+    
+    checkOneTerm(analyzer, "affreux", "afreu");
+    checkOneTerm(analyzer, "affreuse", "afreu");
+    
+    checkOneTerm(analyzer, "investissement", "investi");
+    checkOneTerm(analyzer, "investir", "investi");
+    
+    checkOneTerm(analyzer, "assourdissant", "asourdi");
+    checkOneTerm(analyzer, "assourdir", "asourdi");
+    
+    checkOneTerm(analyzer, "pratiquement", "pratiqu");
+    checkOneTerm(analyzer, "pratique", "pratiqu");
+    
+    checkOneTerm(analyzer, "administrativement", "administratif");
+    checkOneTerm(analyzer, "administratif", "administratif");
+    
+    checkOneTerm(analyzer, "justificatrice", "justifi");
+    checkOneTerm(analyzer, "justificateur", "justifi");
+    checkOneTerm(analyzer, "justifier", "justifi");
+    
+    checkOneTerm(analyzer, "educatrice", "eduqu");
+    checkOneTerm(analyzer, "eduquer", "eduqu");
+    
+    checkOneTerm(analyzer, "communicateur", "comuniqu");
+    checkOneTerm(analyzer, "communiquer", "comuniqu");
+    
+    checkOneTerm(analyzer, "accompagnatrice", "acompagn");
+    checkOneTerm(analyzer, "accompagnateur", "acompagn");
+    
+    checkOneTerm(analyzer, "administrateur", "administr");
+    checkOneTerm(analyzer, "administrer", "administr");
+    
+    checkOneTerm(analyzer, "productrice", "product");
+    checkOneTerm(analyzer, "producteur", "product");
+    
+    checkOneTerm(analyzer, "acheteuse", "achet");
+    checkOneTerm(analyzer, "acheteur", "achet");
+    
+    checkOneTerm(analyzer, "planteur", "plant");
+    checkOneTerm(analyzer, "plante", "plant");
+    
+    checkOneTerm(analyzer, "poreuse", "poreu");
+    checkOneTerm(analyzer, "poreux", "poreu");
+    
+    checkOneTerm(analyzer, "plieuse", "plieu");
+    
+    checkOneTerm(analyzer, "bijoutière", "bijouti");
+    checkOneTerm(analyzer, "bijoutier", "bijouti");
+    
+    checkOneTerm(analyzer, "caissière", "caisi");
+    checkOneTerm(analyzer, "caissier", "caisi");
+    
+    checkOneTerm(analyzer, "abrasive", "abrasif");
+    checkOneTerm(analyzer, "abrasif", "abrasif");
+    
+    checkOneTerm(analyzer, "folle", "fou");
+    checkOneTerm(analyzer, "fou", "fou");
+    
+    checkOneTerm(analyzer, "personnelle", "person");
+    checkOneTerm(analyzer, "personne", "person");
+    
+    // algo bug: too short length
+    //checkOneTerm(analyzer, "personnel", "person");
+    
+    checkOneTerm(analyzer, "complète", "complet");
+    checkOneTerm(analyzer, "complet", "complet");
+    
+    checkOneTerm(analyzer, "aromatique", "aromat");
+    
+    checkOneTerm(analyzer, "faiblesse", "faibl");
+    checkOneTerm(analyzer, "faible", "faibl");
+    
+    checkOneTerm(analyzer, "patinage", "patin");
+    checkOneTerm(analyzer, "patin", "patin");
+    
+    checkOneTerm(analyzer, "sonorisation", "sono");
+    
+    checkOneTerm(analyzer, "ritualisation", "rituel");
+    checkOneTerm(analyzer, "rituel", "rituel");
+    
+    // algo bug: masked by rules above
+    //checkOneTerm(analyzer, "colonisateur", "colon");
+    
+    checkOneTerm(analyzer, "nomination", "nomin");
+    
+    checkOneTerm(analyzer, "disposition", "dispos");
+    checkOneTerm(analyzer, "dispose", "dispos");
+  }
+  
+  /** Test against a vocabulary from the reference impl */
+  public void testVocabulary() throws IOException {
+    assertVocabulary(analyzer, getDataFile("frlighttestdata.zip"), "frlight.txt");
+  }
+}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java
@ -0,0 +1,62 @@
+package org.apache.lucene.analysis.fr;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+
+import static org.apache.lucene.analysis.util.VocabularyAssert.*;
+
+/**
+ * Simple tests for {@link FrenchMinimalStemFilter}
+ */
+public class TestFrenchMinimalStemFilter extends BaseTokenStreamTestCase {
+  private Analyzer analyzer = new ReusableAnalyzerBase() {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName,
+        Reader reader) {
+      Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+      return new TokenStreamComponents(source, new FrenchMinimalStemFilter(source));
+    }
+  };
+  
+  /** Test some examples from the paper */
+  public void testExamples() throws IOException {
+    checkOneTerm(analyzer, "chevaux", "cheval");
+    checkOneTerm(analyzer, "hiboux", "hibou");
+    
+    checkOneTerm(analyzer, "chantés", "chant");
+    checkOneTerm(analyzer, "chanter", "chant");
+    checkOneTerm(analyzer, "chante", "chant");
+    
+    checkOneTerm(analyzer, "baronnes", "baron");
+    checkOneTerm(analyzer, "barons", "baron");
+    checkOneTerm(analyzer, "baron", "baron");
+  }
+  
+  /** Test against a vocabulary from the reference impl */
+  public void testVocabulary() throws IOException {
+    assertVocabulary(analyzer, getDataFile("frminimaltestdata.zip"), "frminimal.txt");
+  }
+}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/frlighttestdata.zip
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/frlighttestdata.zip
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/frminimaltestdata.zip
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/frminimaltestdata.zip
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java
@ -0,0 +1,48 @@
+package org.apache.lucene.analysis.hu;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+
+import static org.apache.lucene.analysis.util.VocabularyAssert.*;
+
+/**
+ * Simple tests for {@link HungarianLightStemFilter}
+ */
+public class TestHungarianLightStemFilter extends BaseTokenStreamTestCase {
+  private Analyzer analyzer = new ReusableAnalyzerBase() {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName,
+        Reader reader) {
+      Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+      return new TokenStreamComponents(source, new HungarianLightStemFilter(source));
+    }
+  };
+  
+  /** Test against a vocabulary from the reference impl */
+  public void testVocabulary() throws IOException {
+    assertVocabulary(analyzer, getDataFile("hulighttestdata.zip"), "hulight.txt");
+  }
+}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/hulighttestdata.zip
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/hu/hulighttestdata.zip
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java
@ -0,0 +1,48 @@
+package org.apache.lucene.analysis.it;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+
+import static org.apache.lucene.analysis.util.VocabularyAssert.*;
+
+/**
+ * Simple tests for {@link ItalianLightStemFilter}
+ */
+public class TestItalianLightStemFilter extends BaseTokenStreamTestCase {
+  private Analyzer analyzer = new ReusableAnalyzerBase() {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName,
+        Reader reader) {
+      Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+      return new TokenStreamComponents(source, new ItalianLightStemFilter(source));
+    }
+  };
+  
+  /** Test against a vocabulary from the reference impl */
+  public void testVocabulary() throws IOException {
+    assertVocabulary(analyzer, getDataFile("itlighttestdata.zip"), "itlight.txt");
+  }
+}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/it/itlighttestdata.zip
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/it/itlighttestdata.zip
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java
@ -0,0 +1,95 @@
+package org.apache.lucene.analysis.pt;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+
+import static org.apache.lucene.analysis.util.VocabularyAssert.*;
+
+/**
+ * Simple tests for {@link PortugueseLightStemFilter}
+ */
+public class TestPortugueseLightStemFilter extends BaseTokenStreamTestCase {
+  private Analyzer analyzer = new ReusableAnalyzerBase() {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName,
+        Reader reader) {
+      Tokenizer source = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+      TokenStream result = new LowerCaseFilter(TEST_VERSION_CURRENT, source);
+      return new TokenStreamComponents(source, new PortugueseLightStemFilter(result));
+    }
+  };
+  
+  /**
+   * Test the example from the paper "Assessing the impact of stemming accuracy
+   * on information retrieval"
+   */
+  public void testExamples() throws IOException {
+    assertAnalyzesTo(
+        analyzer,
+    "O debate político, pelo menos o que vem a público, parece, de modo nada "
+    + "surpreendente, restrito a temas menores. Mas há, evidentemente, "
+    + "grandes questões em jogo nas eleições que se aproximam.",
+    new String[] { 
+      "o", "debat", "politic", "pelo", "meno", "o", "que", "vem", "a", 
+      "public", "parec", "de", "modo", "nada", "surpreendent", "restrit",
+      "a", "tema", "menor", "mas", "há", "evident", "grand", "questa",
+      "em", "jogo", "nas", "eleica", "que", "se", "aproximam"
+    });
+  }
+  
+  /**
+   * Test examples from the c implementation
+   */
+  public void testMoreExamples() throws IOException {
+     checkOneTerm(analyzer, "doutores", "doutor");
+     checkOneTerm(analyzer, "doutor", "doutor");
+     
+     checkOneTerm(analyzer, "homens", "homem");
+     checkOneTerm(analyzer, "homem", "homem");
+     
+     checkOneTerm(analyzer, "papéis", "papel");
+     checkOneTerm(analyzer, "papel", "papel");
+     
+     checkOneTerm(analyzer, "normais", "normal");
+     checkOneTerm(analyzer, "normal", "normal");
+     
+     checkOneTerm(analyzer, "lencóis", "lencol");
+     checkOneTerm(analyzer, "lencol", "lencol");
+     
+     checkOneTerm(analyzer, "barris", "barril");
+     checkOneTerm(analyzer, "barril", "barril");
+     
+     checkOneTerm(analyzer, "botões", "bota");
+     checkOneTerm(analyzer, "botão", "bota");
+  }
+  
+  /** Test against a vocabulary from the reference impl */
+  public void testVocabulary() throws IOException {
+    assertVocabulary(analyzer, getDataFile("ptlighttestdata.zip"), "ptlight.txt");
+  }
+}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java
@ -0,0 +1,69 @@
+package org.apache.lucene.analysis.pt;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+
+import static org.apache.lucene.analysis.util.VocabularyAssert.*;
+
+/**
+ * Simple tests for {@link PortugueseMinimalStemFilter}
+ */
+public class TestPortugueseMinimalStemFilter extends BaseTokenStreamTestCase {
+  private Analyzer analyzer = new ReusableAnalyzerBase() {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName,
+        Reader reader) {
+      Tokenizer source = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+      TokenStream result = new LowerCaseFilter(TEST_VERSION_CURRENT, source);
+      return new TokenStreamComponents(source, new PortugueseMinimalStemFilter(result));
+    }
+  };
+  
+  /**
+   * Test the example from the paper "Assessing the impact of stemming accuracy
+   * on information retrieval"
+   */
+  public void testExamples() throws IOException {
+    assertAnalyzesTo(
+        analyzer,
+    "O debate político, pelo menos o que vem a público, parece, de modo nada "
+    + "surpreendente, restrito a temas menores. Mas há, evidentemente, "
+    + "grandes questões em jogo nas eleições que se aproximam.",
+    new String[] { 
+      "o", "debate", "político", "pelo", "menos", "o", "que", "vem", "a", 
+      "público", "parece", "de", "modo", "nada", "surpreendente", "restrito",
+      "a", "tema", "menor", "mas", "há", "evidentemente", "grande", "questão",
+      "em", "jogo", "na", "eleição", "que", "se", "aproximam"
+    });
+  }
+  
+  /** Test against a vocabulary from the reference impl */
+  public void testVocabulary() throws IOException {
+    assertVocabulary(analyzer, getDataFile("ptminimaltestdata.zip"), "ptminimal.txt");
+  }
+}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/ptlighttestdata.zip
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/ptlighttestdata.zip
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/ptminimaltestdata.zip
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/ptminimaltestdata.zip
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java
@ -0,0 +1,48 @@
+package org.apache.lucene.analysis.ru;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+
+import static org.apache.lucene.analysis.util.VocabularyAssert.*;
+
+/**
+ * Simple tests for {@link RussianLightStemFilter}
+ */
+public class TestRussianLightStemFilter extends BaseTokenStreamTestCase {
+  private Analyzer analyzer = new ReusableAnalyzerBase() {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName,
+        Reader reader) {
+      Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+      return new TokenStreamComponents(source, new RussianLightStemFilter(source));
+    }
+  };
+  
+  /** Test against a vocabulary from the reference impl */
+  public void testVocabulary() throws IOException {
+    assertVocabulary(analyzer, getDataFile("rulighttestdata.zip"), "rulight.txt");
+  }
+}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java
@ -17,71 +17,35 @@ package org.apache.lucene.analysis.ru;
 * limitations under the License.
 */

+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
 import org.apache.lucene.util.LuceneTestCase;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.InputStreamReader;
-import java.io.FileInputStream;
-import java.util.ArrayList;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+
+import static org.apache.lucene.analysis.util.VocabularyAssert.*;

 /**
 * @deprecated Remove this test class (and its datafiles!) in Lucene 4.0
 */
@Deprecated
-public class TestRussianStem extends LuceneTestCase
-{
-    private ArrayList<String> words = new ArrayList<String>();
-    private ArrayList<String> stems = new ArrayList<String>();
-
-    public TestRussianStem(String name)
-    {
-        super(name);
-    }
-
-    /**
-     * @see TestCase#setUp()
-     */
-    @Override
-    protected void setUp() throws Exception {
-        super.setUp();
-        //System.out.println(new java.util.Date());
-        String str;
-        
-        // open and read words into an array list
-        BufferedReader inWords =
-            new BufferedReader(
-                new InputStreamReader(
-                    getClass().getResourceAsStream("wordsUTF8.txt"),
-                    "UTF-8"));
-        while ((str = inWords.readLine()) != null)
-        {
-            words.add(str);
-        }
-        inWords.close();
-
-        // open and read stems into an array list
-        BufferedReader inStems =
-            new BufferedReader(
-                new InputStreamReader(
-                    getClass().getResourceAsStream("stemsUTF8.txt"),
-                    "UTF-8"));
-        while ((str = inStems.readLine()) != null)
-        {
-            stems.add(str);
-        }
-        inStems.close();
-    }
-
-    public void testStem()
-    {
-        for (int i = 0; i < words.size(); i++)
-        {
-            //if ( (i % 100) == 0 ) System.err.println(i);
-            String realStem =
-                RussianStemmer.stemWord(
-                    words.get(i));
-            assertEquals("unicode", stems.get(i), realStem);
-        }
-    }
-
+public class TestRussianStem extends LuceneTestCase {
+  public void testStem() throws IOException {
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName,
+          Reader reader) {
+        Tokenizer t = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(t, new RussianStemFilter(t));
+      }
+    };
+    InputStream voc = getClass().getResourceAsStream("wordsUTF8.txt");
+    InputStream out = getClass().getResourceAsStream("stemsUTF8.txt");
+    assertVocabulary(a, voc, out);
+    voc.close();
+    out.close();
+  }
 }
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/rulighttestdata.zip
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/rulighttestdata.zip
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java
@ -17,38 +17,21 @@ package org.apache.lucene.analysis.snowball;
 * limitations under the License.
 */

-import java.io.BufferedReader;
 import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.StringReader;
-import java.util.zip.ZipFile;
+import java.io.Reader;

-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+import org.apache.lucene.util.LuceneTestCase;
+
+import static org.apache.lucene.analysis.util.VocabularyAssert.*;

 /**
 * Test the snowball filters against the snowball data tests
 */
-public class TestSnowballVocab extends BaseTokenStreamTestCase {
-  private Tokenizer tokenizer = new KeywordTokenizer(new StringReader(""));
-  ZipFile zipFile = null;
-  
-  @Override
-  protected void setUp() throws Exception {
-    super.setUp();
-    this.zipFile = new ZipFile(getDataFile("TestSnowballVocabData.zip"));
-  }
-  
-  @Override
-  protected void tearDown() throws Exception {
-    this.zipFile.close();
-    this.zipFile = null;
-    super.tearDown();
-  }
-
+public class TestSnowballVocab extends LuceneTestCase {
  /**
   * Run all languages against their snowball vocabulary tests.
   */
@ -82,25 +65,20 @@ public class TestSnowballVocab extends BaseTokenStreamTestCase {
   * For the supplied language, run the stemmer against all strings in voc.txt
   * The output should be the same as the string in output.txt
   */
-  private void assertCorrectOutput(String snowballLanguage, String dataDirectory)
+  private void assertCorrectOutput(final String snowballLanguage, String dataDirectory)
      throws IOException {
    if (VERBOSE) System.out.println("checking snowball language: " + snowballLanguage);
-    TokenStream filter = new SnowballFilter(tokenizer, snowballLanguage);
-    InputStream voc = zipFile.getInputStream(zipFile.getEntry(dataDirectory + "/voc.txt"));
-    InputStream out = zipFile.getInputStream(zipFile.getEntry(dataDirectory + "/output.txt"));
-    BufferedReader vocReader = new BufferedReader(new InputStreamReader(
-        voc, "UTF-8"));
-    BufferedReader outputReader = new BufferedReader(new InputStreamReader(
-        out, "UTF-8"));
-    String inputWord = null;
-    while ((inputWord = vocReader.readLine()) != null) {
-      String expectedWord = outputReader.readLine();
-      assertNotNull(expectedWord);
-      tokenizer.reset(new StringReader(inputWord));
-      filter.reset();
-      assertTokenStreamContents(filter, new String[] {expectedWord});
-    }
-    vocReader.close();
-    outputReader.close();
+    
+    Analyzer a = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName,
+          Reader reader) {
+        Tokenizer t = new KeywordTokenizer(reader);
+        return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
+      }  
+    };
+    
+    assertVocabulary(a, getDataFile("TestSnowballVocabData.zip"), 
+        dataDirectory + "/voc.txt", dataDirectory + "/output.txt");
  }
 }
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
@ -0,0 +1,48 @@
+package org.apache.lucene.analysis.sv;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+
+import static org.apache.lucene.analysis.util.VocabularyAssert.*;
+
+/**
+ * Simple tests for {@link SwedishLightStemFilter}
+ */
+public class TestSwedishLightStemFilter extends BaseTokenStreamTestCase {
+  private Analyzer analyzer = new ReusableAnalyzerBase() {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName,
+        Reader reader) {
+      Tokenizer source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+      return new TokenStreamComponents(source, new SwedishLightStemFilter(source));
+    }
+  };
+  
+  /** Test against a vocabulary from the reference impl */
+  public void testVocabulary() throws IOException {
+    assertVocabulary(analyzer, getDataFile("svlighttestdata.zip"), "svlight.txt");
+  }
+}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/svlighttestdata.zip
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/sv/svlighttestdata.zip
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/util/VocabularyAssert.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/util/VocabularyAssert.java
@ -0,0 +1,83 @@
+package org.apache.lucene.analysis.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.zip.ZipFile;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.junit.Assert;
+
+/** Utility class for doing vocabulary-based stemming tests */
+public class VocabularyAssert {
+  /** Run a vocabulary test against two data files. */
+  public static void assertVocabulary(Analyzer a, InputStream voc, InputStream out)
+  throws IOException {
+    BufferedReader vocReader = new BufferedReader(
+        new InputStreamReader(voc, "UTF-8"));
+    BufferedReader outputReader = new BufferedReader(
+        new InputStreamReader(out, "UTF-8"));
+    String inputWord = null;
+    while ((inputWord = vocReader.readLine()) != null) {
+      String expectedWord = outputReader.readLine();
+      Assert.assertNotNull(expectedWord);
+      BaseTokenStreamTestCase.checkOneTermReuse(a, inputWord, expectedWord);
+    }
+  }
+  
+  /** Run a vocabulary test against one file: tab separated. */
+  public static void assertVocabulary(Analyzer a, InputStream vocOut)
+  throws IOException {
+    BufferedReader vocReader = new BufferedReader(
+        new InputStreamReader(vocOut, "UTF-8"));
+    String inputLine = null;
+    while ((inputLine = vocReader.readLine()) != null) {
+      if (inputLine.startsWith("#") || inputLine.trim().length() == 0)
+        continue; /* comment */
+      String words[] = inputLine.split("\t");
+      BaseTokenStreamTestCase.checkOneTermReuse(a, words[0], words[1]);
+    }
+  }
+  
+  /** Run a vocabulary test against two data files inside a zip file */
+  public static void assertVocabulary(Analyzer a, File zipFile, String voc, String out)
+  throws IOException {
+    ZipFile zip = new ZipFile(zipFile);
+    InputStream v = zip.getInputStream(zip.getEntry(voc));
+    InputStream o = zip.getInputStream(zip.getEntry(out));
+    assertVocabulary(a, v, o);
+    v.close();
+    o.close();
+    zip.close();
+  }
+  
+  /** Run a vocabulary test against a tab-separated data file inside a zip file */
+  public static void assertVocabulary(Analyzer a, File zipFile, String vocOut)
+  throws IOException {
+    ZipFile zip = new ZipFile(zipFile);
+    InputStream vo = zip.getInputStream(zip.getEntry(vocOut));
+    assertVocabulary(a, vo);
+    vo.close();
+    zip.close();
+  }
+}
--- a/solr/src/java/org/apache/solr/analysis/EnglishMinimalStemFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/EnglishMinimalStemFilterFactory.java
@ -0,0 +1,28 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
+
+/** Factory for {@link EnglishMinimalStemFilter} */
+public class EnglishMinimalStemFilterFactory extends BaseTokenFilterFactory {
+  public TokenStream create(TokenStream input) {
+    return new EnglishMinimalStemFilter(input);
+  }
+}
--- a/solr/src/java/org/apache/solr/analysis/FinnishLightStemFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/FinnishLightStemFilterFactory.java
@ -0,0 +1,28 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.fi.FinnishLightStemFilter;
+
+/** Factory for {@link FinnishLightStemFilter} */
+public class FinnishLightStemFilterFactory extends BaseTokenFilterFactory {
+  public TokenStream create(TokenStream input) {
+    return new FinnishLightStemFilter(input);
+  }
+}
--- a/solr/src/java/org/apache/solr/analysis/FrenchLightStemFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/FrenchLightStemFilterFactory.java
@ -0,0 +1,28 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.fr.FrenchLightStemFilter;
+
+/** Factory for {@link FrenchLightStemFilter} */
+public class FrenchLightStemFilterFactory extends BaseTokenFilterFactory {
+  public TokenStream create(TokenStream input) {
+    return new FrenchLightStemFilter(input);
+  }
+}
--- a/solr/src/java/org/apache/solr/analysis/FrenchMinimalStemFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/FrenchMinimalStemFilterFactory.java
@ -0,0 +1,28 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.fr.FrenchMinimalStemFilter;
+
+/** Factory for {@link FrenchMinimalStemFilter} */
+public class FrenchMinimalStemFilterFactory extends BaseTokenFilterFactory {
+  public TokenStream create(TokenStream input) {
+    return new FrenchMinimalStemFilter(input);
+  }
+}
--- a/solr/src/java/org/apache/solr/analysis/GermanLightStemFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/GermanLightStemFilterFactory.java
@ -0,0 +1,28 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.de.GermanLightStemFilter;
+
+/** Factory for {@link GermanLightStemFilter} */
+public class GermanLightStemFilterFactory extends BaseTokenFilterFactory {
+  public TokenStream create(TokenStream input) {
+    return new GermanLightStemFilter(input);
+  }
+}
--- a/solr/src/java/org/apache/solr/analysis/GermanMinimalStemFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/GermanMinimalStemFilterFactory.java
@ -0,0 +1,28 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.de.GermanMinimalStemFilter;
+
+/** Factory for {@link GermanMinimalStemFilter} */
+public class GermanMinimalStemFilterFactory extends BaseTokenFilterFactory {
+  public TokenStream create(TokenStream input) {
+    return new GermanMinimalStemFilter(input);
+  }
+}
--- a/solr/src/java/org/apache/solr/analysis/HungarianLightStemFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/HungarianLightStemFilterFactory.java
@ -0,0 +1,28 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.hu.HungarianLightStemFilter;
+
+/** Factory for {@link HungarianLightStemFilter} */
+public class HungarianLightStemFilterFactory extends BaseTokenFilterFactory {
+  public TokenStream create(TokenStream input) {
+    return new HungarianLightStemFilter(input);
+  }
+}
--- a/solr/src/java/org/apache/solr/analysis/ItalianLightStemFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/ItalianLightStemFilterFactory.java
@ -0,0 +1,28 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.it.ItalianLightStemFilter;
+
+/** Factory for {@link ItalianLightStemFilter} */
+public class ItalianLightStemFilterFactory extends BaseTokenFilterFactory {
+  public TokenStream create(TokenStream input) {
+    return new ItalianLightStemFilter(input);
+  }
+}
--- a/solr/src/java/org/apache/solr/analysis/PortugueseLightStemFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/PortugueseLightStemFilterFactory.java
@ -0,0 +1,28 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.pt.PortugueseLightStemFilter;
+
+/** Factory for {@link PortugueseLightStemFilter} */
+public class PortugueseLightStemFilterFactory extends BaseTokenFilterFactory {
+  public TokenStream create(TokenStream input) {
+    return new PortugueseLightStemFilter(input);
+  }
+}
--- a/solr/src/java/org/apache/solr/analysis/PortugueseMinimalStemFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/PortugueseMinimalStemFilterFactory.java
@ -0,0 +1,28 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.pt.PortugueseMinimalStemFilter;
+
+/** Factory for {@link PortugueseMinimalStemFilter} */
+public class PortugueseMinimalStemFilterFactory extends BaseTokenFilterFactory {
+  public TokenStream create(TokenStream input) {
+    return new PortugueseMinimalStemFilter(input);
+  }
+}
--- a/solr/src/java/org/apache/solr/analysis/RussianLightStemFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/RussianLightStemFilterFactory.java
@ -0,0 +1,28 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.ru.RussianLightStemFilter;
+
+/** Factory for {@link RussianLightStemFilter} */
+public class RussianLightStemFilterFactory extends BaseTokenFilterFactory {
+  public TokenStream create(TokenStream input) {
+    return new RussianLightStemFilter(input);
+  }
+}
--- a/solr/src/java/org/apache/solr/analysis/SpanishLightStemFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/SpanishLightStemFilterFactory.java
@ -0,0 +1,28 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.es.SpanishLightStemFilter;
+
+/** Factory for {@link SpanishLightStemFilter} */
+public class SpanishLightStemFilterFactory extends BaseTokenFilterFactory {
+  public TokenStream create(TokenStream input) {
+    return new SpanishLightStemFilter(input);
+  }
+}
--- a/solr/src/java/org/apache/solr/analysis/SwedishLightStemFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/SwedishLightStemFilterFactory.java
@ -0,0 +1,28 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.sv.SwedishLightStemFilter;
+
+/** Factory for {@link SwedishLightStemFilter} */
+public class SwedishLightStemFilterFactory extends BaseTokenFilterFactory {
+  public TokenStream create(TokenStream input) {
+    return new SwedishLightStemFilter(input);
+  }
+}
--- a/solr/src/test/org/apache/solr/analysis/TestEnglishMinimalStemFilterFactory.java
+++ b/solr/src/test/org/apache/solr/analysis/TestEnglishMinimalStemFilterFactory.java
@ -0,0 +1,36 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+
+/**
+ * Simple tests to ensure the English minimal stem factory is working.
+ */
+public class TestEnglishMinimalStemFilterFactory extends BaseTokenTestCase {
+  public void testStemming() throws Exception {
+    Reader reader = new StringReader("bricks");
+    EnglishMinimalStemFilterFactory factory = new EnglishMinimalStemFilterFactory();
+    TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
+    assertTokenStreamContents(stream, new String[] { "brick" });
+  }
+}
--- a/solr/src/test/org/apache/solr/analysis/TestFinnishLightStemFilterFactory.java
+++ b/solr/src/test/org/apache/solr/analysis/TestFinnishLightStemFilterFactory.java
@ -0,0 +1,36 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+
+/**
+ * Simple tests to ensure the Finnish light stem factory is working.
+ */
+public class TestFinnishLightStemFilterFactory extends BaseTokenTestCase {
+  public void testStemming() throws Exception {
+    Reader reader = new StringReader("aseistettujen");
+    FinnishLightStemFilterFactory factory = new FinnishLightStemFilterFactory();
+    TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
+    assertTokenStreamContents(stream, new String[] { "aseistet" });
+  }
+}
--- a/solr/src/test/org/apache/solr/analysis/TestFrenchLightStemFilterFactory.java
+++ b/solr/src/test/org/apache/solr/analysis/TestFrenchLightStemFilterFactory.java
@ -0,0 +1,36 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+
+/**
+ * Simple tests to ensure the French light stem factory is working.
+ */
+public class TestFrenchLightStemFilterFactory extends BaseTokenTestCase {
+  public void testStemming() throws Exception {
+    Reader reader = new StringReader("administrativement");
+    FrenchLightStemFilterFactory factory = new FrenchLightStemFilterFactory();
+    TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
+    assertTokenStreamContents(stream, new String[] { "administratif" });
+  }
+}
--- a/solr/src/test/org/apache/solr/analysis/TestFrenchMinimalStemFilterFactory.java
+++ b/solr/src/test/org/apache/solr/analysis/TestFrenchMinimalStemFilterFactory.java
@ -0,0 +1,36 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+
+/**
+ * Simple tests to ensure the French minimal stem factory is working.
+ */
+public class TestFrenchMinimalStemFilterFactory extends BaseTokenTestCase {
+  public void testStemming() throws Exception {
+    Reader reader = new StringReader("chevaux");
+    FrenchMinimalStemFilterFactory factory = new FrenchMinimalStemFilterFactory();
+    TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
+    assertTokenStreamContents(stream, new String[] { "cheval" });
+  }
+}
--- a/solr/src/test/org/apache/solr/analysis/TestGermanLightStemFilterFactory.java
+++ b/solr/src/test/org/apache/solr/analysis/TestGermanLightStemFilterFactory.java
@ -0,0 +1,36 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+
+/**
+ * Simple tests to ensure the German light stem factory is working.
+ */
+public class TestGermanLightStemFilterFactory extends BaseTokenTestCase {
+  public void testStemming() throws Exception {
+    Reader reader = new StringReader("häuser");
+    GermanLightStemFilterFactory factory = new GermanLightStemFilterFactory();
+    TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
+    assertTokenStreamContents(stream, new String[] { "haus" });
+  }
+}
--- a/solr/src/test/org/apache/solr/analysis/TestGermanMinimalStemFilterFactory.java
+++ b/solr/src/test/org/apache/solr/analysis/TestGermanMinimalStemFilterFactory.java
@ -0,0 +1,36 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+
+/**
+ * Simple tests to ensure the German minimal stem factory is working.
+ */
+public class TestGermanMinimalStemFilterFactory extends BaseTokenTestCase {
+  public void testStemming() throws Exception {
+    Reader reader = new StringReader("bilder");
+    GermanMinimalStemFilterFactory factory = new GermanMinimalStemFilterFactory();
+    TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
+    assertTokenStreamContents(stream, new String[] { "bild" });
+  }
+}
--- a/solr/src/test/org/apache/solr/analysis/TestHungarianLightStemFilterFactory.java
+++ b/solr/src/test/org/apache/solr/analysis/TestHungarianLightStemFilterFactory.java
@ -0,0 +1,36 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+
+/**
+ * Simple tests to ensure the Hungarian light stem factory is working.
+ */
+public class TestHungarianLightStemFilterFactory extends BaseTokenTestCase {
+  public void testStemming() throws Exception {
+    Reader reader = new StringReader("házakat");
+    HungarianLightStemFilterFactory factory = new HungarianLightStemFilterFactory();
+    TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
+    assertTokenStreamContents(stream, new String[] { "haz" });
+  }
+}
--- a/solr/src/test/org/apache/solr/analysis/TestItalianLightStemFilterFactory.java
+++ b/solr/src/test/org/apache/solr/analysis/TestItalianLightStemFilterFactory.java
@ -0,0 +1,36 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+
+/**
+ * Simple tests to ensure the Italian light stem factory is working.
+ */
+public class TestItalianLightStemFilterFactory extends BaseTokenTestCase {
+  public void testStemming() throws Exception {
+    Reader reader = new StringReader("ragazzo ragazzi");
+    ItalianLightStemFilterFactory factory = new ItalianLightStemFilterFactory();
+    TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
+    assertTokenStreamContents(stream, new String[] { "ragazz", "ragazz" });
+  }
+}
--- a/solr/src/test/org/apache/solr/analysis/TestPortugueseLightStemFilterFactory.java
+++ b/solr/src/test/org/apache/solr/analysis/TestPortugueseLightStemFilterFactory.java
@ -0,0 +1,36 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+
+/**
+ * Simple tests to ensure the Portuguese Light stem factory is working.
+ */
+public class TestPortugueseLightStemFilterFactory extends BaseTokenTestCase {
+  public void testStemming() throws Exception {
+    Reader reader = new StringReader("evidentemente");
+    PortugueseLightStemFilterFactory factory = new PortugueseLightStemFilterFactory();
+    TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
+    assertTokenStreamContents(stream, new String[] { "evident" });
+  }
+}
--- a/solr/src/test/org/apache/solr/analysis/TestPortugueseMinimalStemFilterFactory.java
+++ b/solr/src/test/org/apache/solr/analysis/TestPortugueseMinimalStemFilterFactory.java
@ -0,0 +1,36 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+
+/**
+ * Simple tests to ensure the Portuguese Minimal stem factory is working.
+ */
+public class TestPortugueseMinimalStemFilterFactory extends BaseTokenTestCase {
+  public void testStemming() throws Exception {
+    Reader reader = new StringReader("questões");
+    PortugueseMinimalStemFilterFactory factory = new PortugueseMinimalStemFilterFactory();
+    TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
+    assertTokenStreamContents(stream, new String[] { "questão" });
+  }
+}
--- a/solr/src/test/org/apache/solr/analysis/TestRussianLightStemFilterFactory.java
+++ b/solr/src/test/org/apache/solr/analysis/TestRussianLightStemFilterFactory.java
@ -0,0 +1,36 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+
+/**
+ * Simple tests to ensure the Russian light stem factory is working.
+ */
+public class TestRussianLightStemFilterFactory extends BaseTokenTestCase {
+  public void testStemming() throws Exception {
+    Reader reader = new StringReader("журналы");
+    RussianLightStemFilterFactory factory = new RussianLightStemFilterFactory();
+    TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
+    assertTokenStreamContents(stream, new String[] { "журнал" });
+  }
+}
--- a/solr/src/test/org/apache/solr/analysis/TestSpanishLightStemFilterFactory.java
+++ b/solr/src/test/org/apache/solr/analysis/TestSpanishLightStemFilterFactory.java
@ -0,0 +1,36 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+
+/**
+ * Simple tests to ensure the Spanish Light stem factory is working.
+ */
+public class TestSpanishLightStemFilterFactory extends BaseTokenTestCase {
+  public void testStemming() throws Exception {
+    Reader reader = new StringReader("sociedades");
+    SpanishLightStemFilterFactory factory = new SpanishLightStemFilterFactory();
+    TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
+    assertTokenStreamContents(stream, new String[] { "sociedad" });
+  }
+}
--- a/solr/src/test/org/apache/solr/analysis/TestSwedishLightStemFilterFactory.java
+++ b/solr/src/test/org/apache/solr/analysis/TestSwedishLightStemFilterFactory.java
@ -0,0 +1,36 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+
+/**
+ * Simple tests to ensure the Swedish Light stem factory is working.
+ */
+public class TestSwedishLightStemFilterFactory extends BaseTokenTestCase {
+  public void testStemming() throws Exception {
+    Reader reader = new StringReader("äpplen äpple");
+    SwedishLightStemFilterFactory factory = new SwedishLightStemFilterFactory();
+    TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
+    assertTokenStreamContents(stream, new String[] { "äppl", "äppl" });
+  }
+}