LUCENE-2842: add Galician analyzer, Portuguese RSLP

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1055892 13f79535-47bb-0310-9956-ffa450edef68
2011-01-06 14:30:37 +00:00 · 2011-01-06 14:30:37 +00:00 · 61872be09d
parent 1b22e86417
commit 61872be09d
22 changed files with 2394 additions and 85 deletions
--- a/lucene/contrib/CHANGES.txt
+++ b/lucene/contrib/CHANGES.txt
@ -281,6 +281,9 @@ New features
   BooleanModifiersQueryNodeProcessor, for example instead of GroupQueryNodeProcessor.
   (Adriano Crestani via Robert Muir)

+ * LUCENE-2842: Add analyzer for Galician. Also adds the RSLP (Orengo) stemmer
+   for Portuguese.  (Robert Muir)
+
 Build

 * LUCENE-2124: Moved the JDK-based collation support from contrib/collation 
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
@ -0,0 +1,129 @@
+package org.apache.lucene.analysis.gl;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.Version;
+
+/**
+ * {@link Analyzer} for Galician.
+ */
+public final class GalicianAnalyzer extends StopwordAnalyzerBase {
+  private final Set<?> stemExclusionSet;
+  
+  /** File containing default Galician stopwords. */
+  public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
+  
+  /**
+   * Returns an unmodifiable instance of the default stop words set.
+   * @return default stop words set.
+   */
+  public static Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_STOP_SET;
+  }
+  
+  /**
+   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
+   * accesses the static final set the first time.;
+   */
+  private static class DefaultSetHolder {
+    static final Set<?> DEFAULT_STOP_SET;
+
+    static {
+      try {
+        DEFAULT_STOP_SET = WordlistLoader.getWordSet(GalicianAnalyzer.class, 
+            DEFAULT_STOPWORD_FILE);
+      } catch (IOException ex) {
+        // default set should always be present as it is part of the
+        // distribution (JAR)
+        throw new RuntimeException("Unable to load default stopword set");
+      }
+    }
+  }
+
+  /**
+   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
+   */
+  public GalicianAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   */
+  public GalicianAnalyzer(Version matchVersion, Set<?> stopwords) {
+    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
+   * provided this analyzer will add a {@link KeywordMarkerFilter} before
+   * stemming.
+   * 
+   * @param matchVersion lucene compatibility version
+   * @param stopwords a stopword set
+   * @param stemExclusionSet a set of terms not to be stemmed
+   */
+  public GalicianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
+    super(matchVersion, stopwords);
+    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
+        matchVersion, stemExclusionSet));
+  }
+
+  /**
+   * Creates a
+   * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
+   * which tokenizes all the text in the provided {@link Reader}.
+   * 
+   * @return A
+   *         {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
+   *         built from an {@link StandardTokenizer} filtered with
+   *         {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
+   *         , {@link KeywordMarkerFilter} if a stem exclusion set is
+   *         provided and {@link GalicianStemFilter}.
+   */
+  @Override
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new StandardFilter(matchVersion, source);
+    result = new LowerCaseFilter(matchVersion, result);
+    result = new StopFilter(matchVersion, result, stopwords);
+    if(!stemExclusionSet.isEmpty())
+      result = new KeywordMarkerFilter(result, stemExclusionSet);
+    result = new GalicianStemFilter(result);
+    return new TokenStreamComponents(source, result);
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilter.java
@ -0,0 +1,60 @@
+package org.apache.lucene.analysis.gl;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+/**
+ * A {@link TokenFilter} that applies {@link GalicianStemmer} to stem 
+ * Galician words.
+ * <p>
+ * To prevent terms from being stemmed use an instance of
+ * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * the {@link KeywordAttribute} before this {@link TokenStream}.
+ * </p>
+ */
+public final class GalicianStemFilter extends TokenFilter {
+  private final GalicianStemmer stemmer = new GalicianStemmer();
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
+
+  public GalicianStemFilter(TokenStream input) {
+    super(input);
+  }
+  
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      if (!keywordAttr.isKeyword()) {
+        // this stemmer increases word length by 1: worst case '*çom' -> '*ción'
+        final int len = termAtt.length();
+        final int newlen = stemmer.stem(termAtt.resizeBuffer(len+1), len);
+        termAtt.setLength(newlen);
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemmer.java
@ -0,0 +1,83 @@
+package org.apache.lucene.analysis.gl;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Map;
+
+import org.apache.lucene.analysis.pt.RSLPStemmerBase;
+
+/**
+ * Galician stemmer implementing "Regras do lematizador para o galego".
+ * 
+ * @see RSLPStemmerBase
+ * @see <a href="http://bvg.udc.es/recursos_lingua/stemming.jsp">Description of rules</a>
+ */
+public class GalicianStemmer extends RSLPStemmerBase {
+  private static final Step plural, unification, adverb, augmentative, noun, verb, vowel;
+  
+  static {
+    Map<String,Step> steps = parse(GalicianStemmer.class, "galician.rslp");
+    plural = steps.get("Plural");
+    unification = steps.get("Unification");
+    adverb = steps.get("Adverb");
+    augmentative = steps.get("Augmentative");
+    noun = steps.get("Noun");
+    verb = steps.get("Verb");
+    vowel = steps.get("Vowel");
+  }
+  
+  /**
+   * @param s buffer, oversized to at least <code>len+1</code>
+   * @param len initial valid length of buffer
+   * @return new valid length, stemmed
+   */
+  public int stem(char s[], int len) {
+    assert s.length >= len + 1 : "this stemmer requires an oversized array of at least 1";
+    
+    len = plural.apply(s, len);
+    len = unification.apply(s, len);
+    len = adverb.apply(s, len);
+    
+    int oldlen;
+    do {
+      oldlen = len;
+      len = augmentative.apply(s, len);
+    } while (len != oldlen);
+    
+    oldlen = len;
+    len = noun.apply(s, len);
+    if (len == oldlen) { /* suffix not removed */
+      len = verb.apply(s, len);
+    }
+      
+    len = vowel.apply(s, len);
+    
+    // RSLG accent removal
+    for (int i = 0; i < len; i++)
+      switch(s[i]) {
+        case 'á': s[i] = 'a'; break;
+        case 'é':
+        case 'ê': s[i] = 'e'; break;
+        case 'í': s[i] = 'i'; break;
+        case 'ó': s[i] = 'o'; break;
+        case 'ú': s[i] = 'u'; break;
+      }
+    
+    return len;
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/package.html
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/gl/package.html
@ -0,0 +1,22 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html><head></head>
+<body>
+Analyzer for Galician.
+</body>
+</html>
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java
@ -1,10 +1,5 @@
 package org.apache.lucene.analysis.pt;

-import java.util.Arrays;
-
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;
-
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
@ -31,89 +26,14 @@ import org.apache.lucene.util.Version;
 * which is just the plural reduction step of the RSLP
 * algorithm from <i>A Stemming Algorithmm for the Portuguese Language</i>,
 * Orengo et al.
+ * @see RSLPStemmerBase
 */
-public class PortugueseMinimalStemmer {
+public class PortugueseMinimalStemmer extends RSLPStemmerBase {
  
-  private static final CharArraySet excIS = new CharArraySet(Version.LUCENE_31,
-      Arrays.asList("lápis", "cais", "mais", "crúcis", "biquínis", "pois", 
-          "depois","dois","leis"),
-      false);
-  
-  private static final CharArraySet excS = new CharArraySet(Version.LUCENE_31,
-      Arrays.asList("aliás", "pires", "lápis", "cais", "mais", "mas", "menos",
-          "férias", "fezes", "pêsames", "crúcis", "gás", "atrás", "moisés",
-          "através", "convés", "ês", "país", "após", "ambas", "ambos",
-          "messias", "depois"), 
-      false);
+  private static final Step pluralStep = 
+    parse(PortugueseMinimalStemmer.class, "portuguese.rslp").get("Plural");
  
  public int stem(char s[], int len) {
-    if (len < 3 || s[len-1] != 's')
-      return len;
-    
-    if (s[len-2] == 'n') {
-      len--;
-      s[len-1] = 'm';
-      return len;
-    }
-    
-    if (len >= 6 && s[len-3] == 'õ' && s[len-2] == 'e') {
-      len--;
-      s[len-2] = 'ã';
-      s[len-1] = 'o';
-      return len;
-    }
-      
-    if (len >= 4 && s[len-3] == 'ã' && s[len-2] == 'e')
-      if (!(len == 4 && s[0] == 'm')) {
-        len--;
-        s[len-1] = 'o';
-        return len;
-      }
-    
-    if (len >= 4 && s[len-2] == 'i') {
-      if (s[len-3] == 'a')
-        if (!(len == 4 && (s[0] == 'c' || s[0] == 'm'))) {
-          len--;
-          s[len-1] = 'l';
-          return len;
-        }
-   
-      if (len >= 5 && s[len-3] == 'é') {
-        len--;
-        s[len-2] = 'e';
-        s[len-1] = 'l';
-        return len;
-      }
-    
-      if (len >= 5 && s[len-3] == 'e') {
-        len--;
-        s[len-1] = 'l';
-        return len;
-      }
-    
-      if (len >= 5 && s[len-3] == 'ó') {
-        len--;
-        s[len-2] = 'o';
-        s[len-1] = 'l';
-        return len;
-      }
-  
-      if (!excIS.contains(s, 0, len)) {
-        s[len-1] = 'l';
-        return len;
-      }
-    }
-    
-    if (len >= 6 && s[len-3] == 'l' && s[len-2] == 'e')
-      return len - 2;
-    
-    if (len >= 6 && s[len-3] == 'r' && s[len-2] == 'e')
-      if (!(len == 7 && s[0] == 'á' && s[1] == 'r' && s[2] == 'v' && s[3] == 'o'))
-        return len - 2;
-      
-    if (excS.contains(s, 0, len))
-      return len;
-    else
-      return len-1;
+    return pluralStep.apply(s, len);
  }
 }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java
@ -0,0 +1,60 @@
+package org.apache.lucene.analysis.pt;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+/**
+ * A {@link TokenFilter} that applies {@link PortugueseStemmer} to stem 
+ * Portuguese words.
+ * <p>
+ * To prevent terms from being stemmed use an instance of
+ * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * the {@link KeywordAttribute} before this {@link TokenStream}.
+ * </p>
+ */
+public final class PortugueseStemFilter extends TokenFilter {
+  private final PortugueseStemmer stemmer = new PortugueseStemmer();
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
+
+  public PortugueseStemFilter(TokenStream input) {
+    super(input);
+  }
+  
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      if (!keywordAttr.isKeyword()) {
+        // this stemmer increases word length by 1: worst case '*ã' -> '*ão'
+        final int len = termAtt.length();
+        final int newlen = stemmer.stem(termAtt.resizeBuffer(len+1), len);
+        termAtt.setLength(newlen);
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemmer.java
@ -0,0 +1,102 @@
+package org.apache.lucene.analysis.pt;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Map;
+
+/**
+ * Portuguese stemmer implementing the RSLP (Removedor de Sufixos da Lingua Portuguesa)
+ * algorithm. This is sometimes also referred to as the Orengo stemmer.
+ * 
+ * @see RSLPStemmerBase
+ */
+public class PortugueseStemmer extends RSLPStemmerBase {
+  private static final Step plural, feminine, adverb, augmentative, noun, verb, vowel;
+  
+  static {
+    Map<String,Step> steps = parse(PortugueseStemmer.class, "portuguese.rslp");
+    plural = steps.get("Plural");
+    feminine = steps.get("Feminine");
+    adverb = steps.get("Adverb");
+    augmentative = steps.get("Augmentative");
+    noun = steps.get("Noun");
+    verb = steps.get("Verb");
+    vowel = steps.get("Vowel");
+  }
+  
+  /**
+   * @param s buffer, oversized to at least <code>len+1</code>
+   * @param len initial valid length of buffer
+   * @return new valid length, stemmed
+   */
+  public int stem(char s[], int len) {
+    assert s.length >= len + 1 : "this stemmer requires an oversized array of at least 1";
+    
+    len = plural.apply(s, len);
+    len = adverb.apply(s, len);
+    len = feminine.apply(s, len);
+    len = augmentative.apply(s, len);
+    
+    int oldlen = len;
+    len = noun.apply(s, len);
+    
+    if (len == oldlen) { /* suffix not removed */
+      oldlen = len;
+      
+      len = verb.apply(s, len);
+      
+      if (len == oldlen) { /* suffix not removed */
+        len = vowel.apply(s, len);
+      }
+    }
+    
+    // rslp accent removal
+    for (int i = 0; i < len; i++) {
+      switch(s[i]) {
+        case 'à':
+        case 'á':
+        case 'â':
+        case 'ã':
+        case 'ä':
+        case 'å': s[i] = 'a'; break;
+        case 'ç': s[i] = 'c'; break;
+        case 'è':
+        case 'é':
+        case 'ê':
+        case 'ë': s[i] = 'e'; break;
+        case 'ì':
+        case 'í':
+        case 'î':
+        case 'ï': s[i] = 'i'; break;
+        case 'ñ': s[i] = 'n'; break;
+        case 'ò':
+        case 'ó':
+        case 'ô':
+        case 'õ':
+        case 'ö': s[i] = 'o'; break;
+        case 'ù':
+        case 'ú':
+        case 'û':
+        case 'ü': s[i] = 'u'; break;
+        case 'ý':
+        case 'ÿ': s[i] = 'y'; break;
+      }
+    }
+    return len;
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
@ -0,0 +1,345 @@
+package org.apache.lucene.analysis.pt;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.LineNumberReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.util.Version;
+
+import static org.apache.lucene.analysis.util.StemmerUtil.*;
+
+/**
+ * Base class for stemmers that use a set of RSLP-like stemming steps.
+ * <p>
+ * RSLP (Removedor de Sufixos da Lingua Portuguesa) is an algorithm designed
+ * originally for stemming the Portuguese language, described in the paper
+ * <i>A Stemming Algorithm for the Portuguese Language</i>, Orengo et. al.
+ * <p>
+ * Since this time a plural-only modification (RSLP-S) as well as a modification
+ * for the Galician language have been implemented. This class parses a configuration
+ * file that describes {@link Step}s, where each Step contains a set of {@link Rule}s.
+ * <p>
+ * The general rule format is: 
+ * <blockquote>{ "suffix", N, "replacement", { "exception1", "exception2", ...}}</blockquote>
+ * where:
+ * <ul>
+ *   <li><code>suffix</code> is the suffix to be removed (such as "inho").
+ *   <li><code>N</code> is the min stem size, where stem is defined as the candidate stem 
+ *       after removing the suffix (but before appending the replacement!)
+ *   <li><code>replacement</code> is an optimal string to append after removing the suffix.
+ *       This can be the empty string.
+ *   <li><code>exceptions</code> is an optional list of exceptions, patterns that should 
+ *       not be stemmed. These patterns can be specified as whole word or suffix (ends-with) 
+ *       patterns, depending upon the exceptions format flag in the step header.
+ * </ul>
+ * <p>
+ * A step is an ordered list of rules, with a structure in this format:
+ * <blockquote>{ "name", N, B, { "cond1", "cond2", ... }
+ *               ... rules ... };
+ * </blockquote>
+ * where:
+ * <ul>
+ *   <li><code>name</code> is a name for the step (such as "Plural").
+ *   <li><code>N</code> is the min word size. Words that are less than this length bypass
+ *       the step completely, as an optimization. Note: N can be zero, in this case this 
+ *       implementation will automatically calculate the appropriate value from the underlying 
+ *       rules.
+ *   <li><code>B</code> is a "boolean" flag specifying how exceptions in the rules are matched.
+ *       A value of 1 indicates whole-word pattern matching, a value of 0 indicates that 
+ *       exceptions are actually suffixes and should be matched with ends-with.
+ *   <li><code>conds</code> are an optional list of conditions to enter the step at all. If
+ *       the list is non-empty, then a word must end with one of these conditions or it will
+ *       bypass the step completely as an optimization.
+ * </ul>
+ * <p>
+ * @see <a href="http://www.inf.ufrgs.br/~viviane/rslp/index.htm">RSLP description</a>
+ * @lucene.internal
+ */
+public abstract class RSLPStemmerBase {
+  
+  /**
+   * A basic rule, with no exceptions.
+   */
+  protected static class Rule {
+    protected final char suffix[];
+    protected final char replacement[];
+    protected final int min;
+    
+    /**
+     * Create a rule.
+     * @param suffix suffix to remove
+     * @param min minimum stem length
+     * @param replacement replacement string
+     */
+    public Rule(String suffix, int min, String replacement) {
+      this.suffix = suffix.toCharArray();
+      this.replacement = replacement.toCharArray();
+      this.min = min;
+    }
+    
+    /**
+     * @return true if the word matches this rule.
+     */
+    public boolean matches(char s[], int len) {
+      return (len - suffix.length >= min && endsWith(s, len, suffix));
+    }
+    
+    /**
+     * @return new valid length of the string after firing this rule.
+     */
+    public int replace(char s[], int len) {
+      if (replacement.length > 0) {
+        System.arraycopy(replacement, 0, s, len - suffix.length, replacement.length);
+      }
+      return len - suffix.length + replacement.length;
+    }
+  }
+  
+  /**
+   * A rule with a set of whole-word exceptions.
+   */
+  protected static class RuleWithSetExceptions extends Rule {
+    protected final CharArraySet exceptions;
+    
+    public RuleWithSetExceptions(String suffix, int min, String replacement,
+        String[] exceptions) {
+      super(suffix, min, replacement);
+      for (int i = 0; i < exceptions.length; i++) {
+        if (!exceptions[i].endsWith(suffix))
+          System.err.println("warning: useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'");
+      }
+      this.exceptions = new CharArraySet(Version.LUCENE_31,
+           Arrays.asList(exceptions), false);
+    }
+
+    @Override
+    public boolean matches(char s[], int len) {
+      return super.matches(s, len) && !exceptions.contains(s, 0, len);
+    }
+  }
+  
+  /**
+   * A rule with a set of exceptional suffixes.
+   */
+  protected static class RuleWithSuffixExceptions extends Rule {
+    // TODO: use a more efficient datastructure: automaton?
+    protected final char[][] exceptions;
+    
+    public RuleWithSuffixExceptions(String suffix, int min, String replacement,
+        String[] exceptions) {
+      super(suffix, min, replacement);
+      for (int i = 0; i < exceptions.length; i++) {
+        if (!exceptions[i].endsWith(suffix))
+          System.err.println("warning: useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'");
+      }
+      this.exceptions = new char[exceptions.length][];
+      for (int i = 0; i < exceptions.length; i++)
+        this.exceptions[i] = exceptions[i].toCharArray();
+    }
+    
+    @Override
+    public boolean matches(char s[], int len) {
+      if (!super.matches(s, len))
+        return false;
+      
+      for (int i = 0; i < exceptions.length; i++)
+        if (endsWith(s, len, exceptions[i]))
+          return false;
+
+      return true;
+    }
+  }
+  
+  /**
+   * A step containing a list of rules.
+   */
+  protected static class Step {
+    protected final String name;
+    protected final Rule rules[];
+    protected final int min;
+    protected final char[][] suffixes;
+    
+    /**
+     * Create a new step
+     * @param name Step's name.
+     * @param rules an ordered list of rules.
+     * @param min minimum word size. if this is 0 it is automatically calculated.
+     * @param suffixes optional list of conditional suffixes. may be null.
+     */
+    public Step(String name, Rule rules[], int min, String suffixes[]) {
+      this.name = name;
+      this.rules = rules;
+      if (min == 0) {
+        min = Integer.MAX_VALUE;
+        for (Rule r : rules)
+          min = Math.min(min, r.min + r.suffix.length);
+      }
+      this.min = min;
+      
+      if (suffixes == null || suffixes.length == 0) {
+        this.suffixes = null;
+      } else {
+        this.suffixes = new char[suffixes.length][];
+        for (int i = 0; i < suffixes.length; i++)
+          this.suffixes[i] = suffixes[i].toCharArray();
+      }
+    }
+    
+    /**
+     * @return new valid length of the string after applying the entire step.
+     */
+    public int apply(char s[], int len) {
+      if (len < min)
+        return len;
+      
+      if (suffixes != null) {
+        boolean found = false;
+        
+        for (int i = 0; i < suffixes.length; i++)
+          if (endsWith(s, len, suffixes[i])) {
+            found = true;
+            break;
+          }
+        
+        if (!found) return len;
+      }
+      
+      for (int i = 0; i < rules.length; i++) {
+        if (rules[i].matches(s, len))
+          return rules[i].replace(s, len);
+      }
+      
+      return len;
+    }
+  }
+  
+  /**
+   * Parse a resource file into an RSLP stemmer description.
+   * @return a Map containing the named Steps in this description.
+   */
+  protected static Map<String,Step> parse(Class<? extends RSLPStemmerBase> clazz, String resource) {
+    // TODO: this parser is ugly, but works. use a jflex grammar instead.
+    try {
+      InputStream is = clazz.getResourceAsStream(resource);
+      LineNumberReader r = new LineNumberReader(new InputStreamReader(is, "UTF-8"));
+      Map<String,Step> steps = new HashMap<String,Step>();
+      String step;
+      while ((step = readLine(r)) != null) {
+        Step s = parseStep(r, step);
+        steps.put(s.name, s);
+      }
+      r.close();
+      return steps;
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+  
+  private static final Pattern headerPattern = 
+    Pattern.compile("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+),\\s*(0|1),\\s*\\{(.*)\\},\\s*$");
+  private static final Pattern stripPattern = 
+    Pattern.compile("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+)\\s*\\}\\s*(,|(\\}\\s*;))$");
+  private static final Pattern repPattern = 
+    Pattern.compile("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+),\\s*\"([^\"]*)\"\\}\\s*(,|(\\}\\s*;))$");
+  private static final Pattern excPattern = 
+    Pattern.compile("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+),\\s*\"([^\"]*)\",\\s*\\{(.*)\\}\\s*\\}\\s*(,|(\\}\\s*;))$");
+  
+  private static Step parseStep(LineNumberReader r, String header) throws IOException {
+    Matcher matcher = headerPattern.matcher(header);
+    if (!matcher.find()) {
+      throw new RuntimeException("Illegal Step header specified at line " + r.getLineNumber());
+    }
+    assert matcher.groupCount() == 4;
+    String name = matcher.group(1);
+    int min = Integer.parseInt(matcher.group(2));
+    int type = Integer.parseInt(matcher.group(3));
+    String suffixes[] = parseList(matcher.group(4));
+    Rule rules[] = parseRules(r, type);
+    return new Step(name, rules, min, suffixes);
+  }
+  
+  private static Rule[] parseRules(LineNumberReader r, int type) throws IOException {
+    List<Rule> rules = new ArrayList<Rule>();
+    String line;
+    while ((line = readLine(r)) != null) {
+      Matcher matcher = stripPattern.matcher(line);
+      if (matcher.matches()) {
+        rules.add(new Rule(matcher.group(1), Integer.parseInt(matcher.group(2)), ""));
+      } else {
+        matcher = repPattern.matcher(line);
+        if (matcher.matches()) {
+          rules.add(new Rule(matcher.group(1), Integer.parseInt(matcher.group(2)), matcher.group(3)));
+        } else {
+          matcher = excPattern.matcher(line);
+          if (matcher.matches()) {
+            if (type == 0) {
+              rules.add(new RuleWithSuffixExceptions(matcher.group(1), 
+                        Integer.parseInt(matcher.group(2)), 
+                        matcher.group(3), 
+                        parseList(matcher.group(4))));
+            } else {
+              rules.add(new RuleWithSetExceptions(matcher.group(1), 
+                        Integer.parseInt(matcher.group(2)), 
+                        matcher.group(3), 
+                        parseList(matcher.group(4))));
+            }
+          } else {
+            throw new RuntimeException("Illegal Step rule specified at line " + r.getLineNumber());
+          }
+        }
+      }
+      if (line.endsWith(";"))
+        return rules.toArray(new Rule[rules.size()]);
+    }
+    return null;
+  }
+  
+  private static String[] parseList(String s) {
+    if (s.isEmpty())
+      return null;
+    String list[] = s.split(",");
+    for (int i = 0; i < list.length; i++)
+      list[i] = parseString(list[i].trim());
+    return list;
+  }
+  
+  private static String parseString(String s) {
+    return s.substring(1, s.length()-1);
+  }
+  
+  private static String readLine(LineNumberReader r) throws IOException {
+    String line = null;
+    while ((line = r.readLine()) != null) {
+      line = line.trim();
+      if (!line.isEmpty() && line.charAt(0) != '#')
+        return line;
+    }
+    return line;
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java
@ -56,6 +56,25 @@ public class StemmerUtil {
    return true;
  }
  
+  /**
+   * Returns true if the character array ends with the suffix.
+   * 
+   * @param s Input Buffer
+   * @param len length of input buffer
+   * @param suffix Suffix string to test
+   * @return true if <code>s</code> ends with <code>suffix</code>
+   */
+  public static boolean endsWith(char s[], int len, char suffix[]) {
+    final int suffixLen = suffix.length;
+    if (suffixLen > len)
+      return false;
+    for (int i = suffixLen - 1; i >= 0; i--)
+      if (s[len -(suffixLen - i)] != suffix[i])
+        return false;
+    
+    return true;
+  }
+  
  /**
   * Delete a character in-place
   * 
--- a/modules/analysis/common/src/resources/org/apache/lucene/analysis/gl/galician.rslp
+++ b/modules/analysis/common/src/resources/org/apache/lucene/analysis/gl/galician.rslp
@ -0,0 +1,647 @@
+#	Steps file for the RSLP stemmer.
+
+# Step 1: Plural Reduction
+{  "Plural", 3, 1, {"s"}, 
+  # bons -> bon
+  {"ns",1,"n",{"luns","furatapóns","furatapons"}},
+  # xamós -> xamón
+  {"ós",3,"ón"},
+  # balões -> balón
+  {"ões",3,"ón"},
+  # capitães -> capitão
+  {"ães",1,"ão",{"mães","magalhães"}},
+  # normais -> normal
+  {"ais",2,"al",{"cais","tais","mais","pais","ademais"}},
+  {"áis",2,"al",{"cáis","táis", "máis", "páis", "ademáis"}},
+  # papéis -> papel
+  {"éis",2,"el"},
+  # posíbeis -> posíbel
+  {"eis",2,"el"},
+  # espanhóis -> espanhol
+  {"óis",2,"ol",{"escornabóis"}},
+  # caracois -> caracol
+  {"ois",2,"ol",{"escornabois"}},
+  # cadrís -> cadril
+  {"ís",2,"il",{"país"}},
+  # cadris -> cadril
+  {"is",2,"il",{"menfis","pais","kinguis"}},
+  # males -> mal
+  {"les",2,"l",{"ingles","marselles","montreales","senegales","manizales","móstoles","nápoles"}},
+  # mares -> mar
+  {"res",3,"r",{"petres","henares","cáceres","baleares","linares","londres","mieres","miraflores","mércores","venres", "pires"}},
+  # luces -> luz
+  {"ces",2,"z"},
+  # luzes -> luz
+  {"zes",2,"z"},
+  # leises -> lei
+  {"ises",3,"z"},
+  # animás -> animal
+  {"ás",1,"al",{"más"}},
+  # gases -> gas
+  {"ses",2,"s"},
+  # casas -> casa
+  {"s",2,"",{"barbadés","barcelonés","cantonés","gabonés","llanés","medinés","escocés","escocês","francês","barcelonês","cantonês","macramés","reves","barcelones","cantones","gabones","llanes","magallanes","medines","escoces","frances","xoves","martes","aliás","pires","lápis","cais","mais","mas","menos","férias","pêsames","crúcis","país","cangas","atenas","asturias","canarias","filipinas","honduras","molucas","caldas","mascareñas","micenas","covarrubias","psoas","óculos","nupcias","xoves","martes","llanes"}}};
+
+{  "Unification", 0, 0, {},
+  # cansadísimo -> cansadísimo
+  {"íssimo",5,"ísimo"},
+  # cansadísima -> cansadísima
+  {"íssima",5,"ísima"},
+  # homaço -> homazo
+  {"aço",4,"azo"},
+  # mulheraça -> mulheraza
+  {"aça",4,"aza"},
+  # xentuça -> xentuza
+  {"uça",4,"uza"},
+  # manilhar -> manillar
+  {"lhar",2,"llar"},
+  # colher -> coller
+  {"lher",2,"ller"},
+  # melhor -> mellor
+  {"lhor",2,"llor"},
+  # alho -> allo
+  {"lho",1,"llo"},
+  # linhar -> liñar
+  {"nhar",2,"ñar"},
+  # penhor -> peñor
+  {"nhor",2,"ñor"},
+  # anho -> año
+  {"nho",1,"ño"},
+  # cunha -> cuña
+  {"nha",1,"ña"},
+  # hospitalário -> hospitalario
+  {"ário",3,"ario"},
+  # bibliotecária -> bibliotecaria
+  {"ária",3,"aria"},
+  # agradable -> agradábel
+  {"able",2,"ábel"},
+  # agradávele -> agradábel
+  {"ável",2,"ábel"},
+  # imposible -> imposíbel
+  {"ible",2,"íbel"},
+  # imposível -> imposíbel
+  {"ível",2,"íbel"},
+  # imposiçom -> imposición
+  {"çom",2,"ción"},
+  # garagem -> garaxe
+  {"agem",2,"axe"},
+  # garage -> garaxe
+  {"age",2,"axe"},
+  # impressão -> impressón
+  {"ão",3,"ón"},
+  # irmao -> irmán
+  {"ao",1,"án"},
+  # irmau -> irmán
+  {"au",1,"án"},
+  # garrafom -> garrafón
+  {"om",3,"ón"},
+  # cantem -> canten
+  {"m",2,"n"}};
+
+{  "Adverb", 0, 0, {},
+  # felizmente -> feliz
+  {"mente",4,"",{"experimente","vehemente","sedimente"}}};
+
+{  "Augmentative", 0, 1, {},
+  # cansadísimo -> cansad
+  {"dísimo",5},
+  # cansadísima -> cansad
+  {"dísima",5},
+  # amabilísimo -> ama
+  {"bilísimo",3},
+  # amabilísima -> ama
+  {"bilísima",3},
+  # fortísimo -> fort
+  {"ísimo",3},
+  # fortísima -> fort
+  {"ísima",3},
+  # centésimo -> cent
+  {"ésimo",3},
+  # centésima -> cent
+  {"ésima",3},
+  # paupérrimo -> paup
+  {"érrimo",4},
+  # paupérrima -> paup
+  {"érrima",4},
+  # charlatana -> charlat
+  {"ana",2,"",{"argana","banana","choupana","espadana","faciana","iguana","lantana","macana","membrana","mesana","nirvana","obsidiana","palangana","pavana","persiana","pestana","porcelana","pseudomembrana","roldana","sábana","salangana","saragana","ventana"}},
+  # charlatán -> charlat
+  {"án",3,"",{"ademán","bardán","barregán","corricán","curricán","faisán","furacán","fustán","gabán","gabián","galán","gañán","lavacán","mazán","mourán","rabadán","serán","serrán","tabán","titán","tobogán","verán","volcán","volován"}},
+  # homazo -> hom
+  {"azo",4,"",{"abrazo","espazo","andazo","bagazo","balazo","bandazo","cachazo","carazo","denazo","engazo","famazo","lampreazo","pantocazo","pedazo","preñazo","regazo","ribazo","sobrazo","terrazo","trompazo"}},
+  # mulleraza -> muller
+  {"aza",3,"",{"alcarraza","ameaza","baraza","broucaza","burgaza","cabaza","cachaza","calaza","carpaza","carraza","coiraza","colmaza","fogaza","famaza","labaza","liñaza","melaza","mordaza","paraza","pinaza","rabaza","rapaza","trancaza"}},
+  # cascallo -> casc
+  {"allo",4,"",{"traballo"}},
+  # xentalla -> xent
+  {"alla",4},
+  # bocarra -> boc
+  {"arra",3,"",{"cigarra","cinzarra"}},
+  # medicastro -> medic
+  {"astro",3,"",{"balastro","bimbastro","canastro","retropilastro"}},
+  # poetastra -> poet
+  {"astra",3,"",{"banastra","canastra","contrapilastra","piastra","pilastra"}},
+  # corpázio -> corp
+  {"ázio",3,"",{"topázio"}},
+  # soutelo -> sout
+  {"elo",4,"",{"bacelo","barrelo","bicarelo","biquelo","boquelo","botelo","bouquelo","cacarelo","cachelo","cadrelo","campelo","candelo","cantelo","carabelo","carambelo","caramelo","cercelo","cerebelo","chocarelo","coitelo","conchelo","corbelo","cotobelo","couselo","destelo","desvelo","esfácelo","fandelo","fardelo","farelo","farnelo","flabelo","ganchelo","garfelo","involucelo","mantelo","montelo","outerelo","padicelo","pesadelo","pinguelo","piquelo","rampelo","rastrelo","restelo","tornecelo","trabelo","restrelo","portelo","ourelo","zarapelo"}},
+  # avioneta -> avion
+  {"eta",3,"",{"arqueta","atleta","avoceta","baioneta","baldeta","banqueta","barraganeta","barreta","borleta","buceta","caceta","calceta","caldeta","cambeta","canaleta","caneta","carreta","cerceta","chaparreta","chapeta","chareta","chincheta","colcheta","cometa","corbeta","corveta","cuneta","desteta","espeta","espoleta","estafeta","esteta","faceta","falanxeta","frasqueta","gaceta","gabeta","galleta","garabeta","gaveta","glorieta","lagareta","lambeta","lanceta","libreta","maceta","macheta","maleta","malleta","mareta","marreta","meseta","mofeta","muleta","peseta","planeta","raqueta","regreta","saqueta","veleta","vendeta","viñeta"}},
+  # guapete -> guap
+  {"ete",3,"",{"alfinete","ariete","bacinete","banquete","barallete","barrete","billete","binguelete","birrete","bonete","bosquete","bufete","burlete","cabalete","cacahuete","cavinete","capacete","carrete","casarete","casete","chupete","clarinete","colchete","colete","capete","curupete","disquete","estilete","falsete","ferrete","filete","gallardete","gobelete","inglete","machete","miquelete","molete","mosquete","piquete","ribete","rodete","rolete","roquete","sorvete","vedete","vendete"}},
+  # práctica -> práct
+  {"ica",3,"",{"andarica","botánica","botica","dialéctica","dinámica","física","formica","gráfica","marica","túnica"}},
+  # práctico -> práct
+  {"ico",3,"",{"conico","acetifico","acidifico"}},
+  # trapexo -> trap
+  {"exo",3,"",{"arpexo","arquexo","asexo","axexo","azulexo","badexo","bafexo","bocexo","bosquexo","boubexo","cacarexo","carrexo","cascarexo","castrexo","convexo","cotexo","desexo","despexo","forcexo","gabexo","gargarexo","gorgolexo","inconexo","manexo","merexo","narnexo","padexo","patexo","sopexo","varexo"}},
+  {"exa",3,"",{"airexa","bandexa","carrexa","envexa","igrexa","larexa","patexa","presexa","sobexa"}},
+  # multidão -> mult
+  {"idão",3},
+  # pequeniño -> pequeno
+  {"iño",3,"o",{"camiño","cariño","comiño","golfiño","padriño","sobriño","viciño","veciño"}},
+  # pequeniña -> pequena
+  {"iña",3,"a",{"camariña","campiña","entreliña","espiña","fariña","moriña","valiña"}},
+  # grandito -> grand
+  {"ito",3,""},
+  # grandita -> grand
+  {"ita",3,""},
+  # anomaloide -> animal
+  {"oide",3,"",{"anaroide","aneroide","asteroide","axoide","cardioide","celuloide","coronoide","discoide","espermatozoide","espiroide","esquizoide","esteroide","glenoide","linfoide","hemorroide","melaloide","sacaroide","tetraploide","varioloide"}},
+  # cazola -> caz
+  {"ola",3,"",{"aixola","ampola","argola","arola","arteríola","bandola","bítola","bractéola","cachola","carambola","carapola","carola","carrandiola","catrapola","cebola","centola","champola","chatola","cirola","cítola","consola","corola","empola","escarola","esmola","estola","fitola","florícola","garañola","gárgola","garxola","glicocola","góndola","mariola","marola","michola","pirola","rebola","rupícola","saxícola","sémola","tachola","tómbola"}},
+  # pedrolo -> pedr
+  {"olo",3,"",{"arrolo","babiolo","cacharolo","caixarolo","carolo","carramolo","cascarolo","cirolo","codrolo","correolo","cotrolo","desconsolo","rebolo","repolo","subsolo","tixolo","tómbolo","torolo","trémolo","vacúolo","xermolo","zócolo"}},
+  # vellote -> vell
+  {"ote",3,"",{"aigote","alcaiote","barbarote","balote","billote","cachote","camarote","capote","cebote","chichote","citote","cocorote","escote","gañote","garrote","gavote","lamote","lapote","larapote","lingote","lítote","magote","marrote","matalote","pandote","paparote","rebote","tagarote","zarrote"}},
+  # mozota -> moz
+  {"ota",3,"",{"asíntota","caiota","cambota","chacota","compota","creosota","curota","derrota","díspota","gamota","maniota","pelota","picota","pillota","pixota","queirota","remota"}},
+  # gordocho -> gord
+  {"cho",3,"",{"abrocho","arrocho","carocho","falucho","bombacho","borracho","mostacho"}},
+  # gordecha -> gord
+  {"cha",3,"",{"borracha","carracha","estacha","garnacha","limacha","remolacha","abrocha"}},
+  # baratuco -> barat
+  {"uco",4,"",{"caduco","estuco","fachuco","malluco","saluco","trabuco"}},
+  # borrachuzo -> borrach
+  {"uzo",3,"",{"carriñouzo","fachuzo","mañuzo","mestruzo","tapuzo"}},
+  # xentuza -> xent
+  {"uza",3,"",{"barruza","chamuza","chapuza","charamuza","conduza","deduza","desluza","entreluza","induza","reluza","seduza","traduza","trasluza"}},
+  # babuxa -> bab
+  {"uxa",3,"",{"caramuxa","carrabouxa","cartuxa","coruxa","curuxa","gaturuxa","maruxa","meruxa","miruxa","moruxa","muruxa","papuxa","rabuxa","trouxa"}},
+  {"uxo",3,"",{"caramuxo","carouxo","carrabouxo","curuxo","debuxo","ganduxo","influxo","negouxo","pertuxo","refluxo"}},
+  # grupello -> grup
+  {"ello",3,"",{"alborello","artello","botello","cachafello","calello","casarello","cazabello","cercello","cocerello","concello","consello","desparello","escaravello","espello","fedello","fervello","gagafello","gorrobello","nortello","pendello","troupello","trebello"}},
+  # pontella -> pont
+  {"ella",3,"",{"alborella","bertorella","bocatella","botella","calella","cercella","gadella","grosella","lentella","movella","nocella","noitevella","parella","pelella","percebella","segorella","sabella"}}};
+
+{  "Noun", 0, 0, {},
+  # lealdade -> leal 
+  {"dade",3,"",{"acridade","calidade"}},
+  # clarificar -> clar
+  {"ificar",2},
+  # brasileiro->brasil
+  {"eiro",3,"",{"agoireiro","bardalleiro","braseiro","barreiro","canteiro","capoeiro","carneiro","carteiro","cinceiro","faroleiro","mareiro","preguiceiro","quinteiro","raposeiro","retranqueiro","regueiro","sineiro","troleiro","ventureiro"}},
+  # marisqueira -> marisqu
+  {"eira",3,"",{"cabeleira","canteira","cocheira","folleira","milleira"}},
+  # hospitalario -> hospital
+  {"ario",3,"",{"armario","calcario","lionario","salario"}},
+  # bibliotecaria -> bibliotec
+  {"aria",3,"",{"cetaria","coronaria","fumaria","linaria","lunaria","parietaria","saponaria","serpentaria"}},
+  # humorístico -> humor
+  {"ístico",3,"",{"balístico", "ensaístico"}},
+  # castrista -> castr
+  {"ista",3,"",{"batista","ciclista","fadista","operista","tenista","verista"}},
+  # lavado -> lav
+  {"ado",2,"",{"grado","agrado"}},
+  # decanato -> decan
+  {"ato",2,"",{"agnato"}},
+  # xemido -> xem
+  {"ido",3,"",{"cándido","cândido","consolido","decidido","duvido","marido","rápido"}},
+  # mantida -> mant
+  {"ida",3,"",{"bastida","dúbida","dubida","duvida","ermida","éxida","guarida","lapicida","medida","morida"}},
+  {"ída",3},
+  # mantído -> mant
+  {"ido",3},
+  # orelludo -> orell
+  {"udo",3,"",{"estudo","escudo"}},
+  # orelluda -> orell
+  {"uda",3},
+  {"ada",3,"",{"abada","alhada","allada","pitada"}},
+  # comedela -> come
+  {"dela",3,"",{"cambadela","cavadela","forcadela","erisipidela","mortadela","espadela","fondedela","picadela","arandela","candela","cordela","escudela","pardela"}},
+  # fontela -> font
+  {"ela",3,"",{"canela","capela","cotela","cubela","curupela","escarapela","esparrela","estela","fardela","flanela","fornela","franela","gabela","gamela","gavela","glumela","granicela","lamela","lapela","malvela","manela","manganela","mexarela","micela","mistela","novela","ourela","panela","parcela","pasarela","patamela","patela","paxarela","pipela","pitela","postela","pubela","restela","sabela","salmonela","secuela","sentinela","soldanela","subela","temoncela","tesela","tixela","tramela","trapela","varela","vitela","xanela","xestela"}},
+  # agradábel -> agrad
+  {"ábel",2,"",{"afábel","fiábel"}},
+  # combustíbel -> combust
+  {"íbel",2,"",{"críbel","imposíbel","posíbel","fisíbel","falíbel"}},
+  # fabricante -> frabrica
+  {"nte",3,"",{"alimente","adiante","acrescente","elefante","frequente","freqüente","gigante","instante","oriente","permanente","posante","possante","restaurante"}},
+  # ignorancia -> ignora
+  {"ncia",3},
+  # temperanza -> tempera
+  {"nza",3},
+  {"acia",3,"",{"acracia","audacia","falacia","farmacia"}},
+  # inmundicia -> inmund
+  {"icia",3,"",{"caricia","delicia","ledicia","malicia","milicia","noticia","pericia","presbicia","primicia","regalicia","sevicia","tiricia"}},
+  # xustiza -> xust
+  {"iza",3,"",{"alvariza","baliza","cachiza","caniza","cañiza","carbaliza","carriza","chamariza","chapiza","fraguiza","latiza","longaniza","mañiza","nabiza","peliza","preguiza","rabiza"}},
+  # clarexar -> clar
+  {"exar",3,"",{"palmexar"}},
+  # administración -> administr
+  {"ación",2,"",{"aeración"}},
+  # expedición -> exped
+  {"ición",3,"",{"condición","gornición","monición","nutrición","petición","posición","sedición","volición"}},
+  # excepción -> except
+  {"ción",3,"t"},
+  # comprensión -> comprens
+  {"sión",3,"s",{"abrasión", "alusión"}},
+  # doazón -> do
+  {"azón",2,"",{"armazón"}},
+  # garrafón -> garraf
+  {"ón",3,"",{"abalón","acordeón","alción","aldrabón","alerón","aliñón","ambón","bombón","calzón","campón","canalón","cantón","capitón","cañón","centón","ciclón","collón","colofón","copón","cotón","cupón","petón","tirón","tourón","turón","unción","versión","zubón","zurrón"}},
+  # lambona -> lamb
+  {"ona",3,"",{"abandona","acetona","aleurona","amazona","anémona","bombona","cambona","carona","chacona","charamona","cincona","condona","cortisona","cretona","cretona","detona","estona","fitohormona","fregona","gerona","hidroquinona","hormona","lesiona","madona","maratona","matrona","metadona","monótona","neurona","pamplona","peptona","poltrona","proxesterona","quinona","quinona","silicona","sulfona"}},
+  # bretoa -> bretón
+  {"oa",3,"",{"abandoa","madroa","barbacoa","estoa","airoa","eiroa","amalloa","ámboa","améndoa","anchoa","antinéboa","avéntoa","avoa","bágoa","balboa","bisavoa","boroa","canoa","caroa","comadroa","coroa","éngoa","espácoa","filloa","fírgoa","grañoa","lagoa","lanzoa","magoa","mámoa","morzoa","noiteboa","noraboa","parañoa","persoa","queiroa","rañoa","táboa","tataravoa","teiroa"}},
+  # demoníaco -> demoní
+  {"aco",3},
+  # demoníaca -> demoní
+  {"aca",3,"",{"alpaca","barraca","bullaca","buraca","carraca","casaca","cavaca","cloaca","entresaca","ervellaca","espinaca","estaca","farraca","millaca","pastinaca","pataca","resaca","urraca","purraca"}},
+  # carballal -> carball
+  {"al",4,"",{"afinal","animal","estatal","bisexual","bissexual","desleal","fiscal","formal","pessoal","persoal","liberal","postal","virtual","visual","pontual","puntual","homosexual","heterosexual"}},
+  # nadador -> nada
+  {"dor",2,"",{"abaixador"}},
+  # benfeitor -> benfei
+  {"tor",3,"",{"autor","motor","pastor","pintor"}},
+  # produtor -> produt
+  {"or",2,"",{"asesor","assessor","favor","mellor","melhor","redor","rigor","sensor","tambor","tumor"}},
+  # profesora -> profes
+  {"ora",3,"",{"albacora","anáfora","áncora","apisoadora","ardora","ascospora","aurora","avéspora","bitácora","canéfora","cantimplora","catáfora","cepilladora","demora","descalcificadora","diáspora","empacadora","epífora","ecavadora","escora","eslora","espora","fotocompoñedora","fotocopiadora","grampadora","isícora","lavadora","lixadora","macrospora","madrépora","madrágora","masora","mellora","metáfora","microspora","milépora","milpéndora","nécora","oospora","padeadora","pasiflora","pécora","píldora","pólvora","ratinadora","rémora","retroescavadora","sófora","torradora","trémbora","uredospora","víbora","víncora","zoospora"}},
+  # zapataría -> zapat
+  {"aría",3,"",{"libraría"}},
+  # etiquetaxe -> etiquet
+  {"axe",3,"",{"aluaxe","amaraxe","amperaxe","bagaxe","balaxe","barcaxe","borraxe","bescaxe","cabotaxe","carraxe","cartilaxe","chantaxe","colaxe","coraxe","carruaxe","dragaxe","embalaxe","ensilaxe","epistaxe","fagundaxe","fichaxe","fogaxe","forraxe","fretaxe","friaxe","garaxe","homenaxe","leitaxe","liñaxe","listaxe","maraxe","marcaxe","maridaxe","masaxe","miraxe","montaxe","pasaxe","peaxe","portaxe","ramaxe","rebelaxe","rodaxe","romaxe","sintaxe","sondaxe","tiraxe","vantaxe","vendaxe","viraxe"}},
+  # movedizo -> move
+  {"dizo",3},
+  # limpeza -> limp
+  {"eza",3,"",{"alteza","beleza","fereza","fineza","vasteza","vileza"}},
+  # rixidez -> rixid
+  {"ez",3,"",{"acidez","adultez","adustez","avidez","candidez","mudez","nenez","nudez","pomez"}},
+  # mullerengo -> muller
+  {"engo",3},
+  # chairego -> chair
+  {"ego",3,"",{"corego","derrego","entrego","lamego","sarego","sartego"}},
+  # cariñoso -> cariñ
+  {"oso",3,"",{"afanoso","algoso","caldoso","caloso","cocoso","ditoso","favoso","fogoso","lamoso","mecoso","mocoso","precioso","rixoso","venoso","viroso","xesoso"}},
+  # cariñosa -> cariñ
+  {"osa",3,"",{"mucosa","glicosa","baldosa","celulosa","isoglosa","nitrocelulosa","levulosa","ortosa","pectosa","preciosa","sacarosa","serosa","ventosa"}},
+  # negrume -> negr
+  {"ume",3,"",{"agrume","albume","alcume","batume","cacume","cerrume","chorume","churume","costume","curtume","estrume","gafume","legume","perfume","queixume","zarrume"}},
+  # altura -> alt
+  {"ura",3,"",{"albura","armadura","imatura","costura"}},
+  # cuspiñar -> cusp
+  {"iñar",3},
+  # febril -> febr
+  {"il",3,"",{"abril","alfil","anil","atril","badil","baril","barril","brasil","cadril","candil","cantil","carril","chamil","chancil","civil","cubil","dátil","difícil","dócil","edil","estéril","fácil","fráxil","funil","fusil","grácil","gradil","hábil","hostil","marfil"}},
+  # principesco -> princip
+  {"esco",4},
+  # mourisco -> mour
+  {"isco",4},
+  # esportivo -> esport
+  {"ivo",3,"",{"pasivo","positivo","passivo","possessivo","posesivo","pexotarivo","relativo"}}};
+
+{  "Verb", 0, 0, {},
+  # amaba -> am
+  {"aba",2},
+  # andabade -> and
+  {"abade",2},
+  # andábade -> and
+  {"ábade",2},
+  # chorabamo -> chor
+  {"abamo",2},
+  # chorábamo -> chor
+  {"ábamo",2}, 
+  # moraban -> morab
+  {"aban",2},
+  # andache -> and
+  {"ache",2},
+  # andade -> and
+  {"ade",2},
+  {"an",2}, 
+  # cantando -> cant
+  {"ando",2},
+  # cantar -> cant
+  {"ar",2,"",{"azar","bazar","patamar"}}, 
+  # lembrarade -> lembra
+  {"arade",2},
+  {"aramo",2}, 
+  {"arán",2},
+  # cantaran -> cant
+  {"aran",2},
+  # convidárade -> convid
+  {"árade",2},
+  # convidaría -> convid
+  {"aría",2},
+  # cantariade -> cant
+  {"ariade",2},
+  # cantaríade -> cant
+  {"aríade",2},
+  # cantarian -> cant 
+  {"arian",2},
+  # cantariamo -> cant
+  {"ariamo",2},
+  # pescaron -> pesc
+  {"aron",2},
+  # cantase -> cant
+  {"ase",2},
+  # cantasede -> cant
+  {"asede",2},
+  # cantásede -> cant
+  {"ásede",2},
+  # cantasemo -> cant
+  {"asemo",2},
+  # cantásemo -> cant
+  {"ásemo",2},
+  # cantasen -> cant
+  {"asen",2},
+  # loitavan -> loitav
+  {"avan",2},
+  # cantaríamo -> cant
+  {"aríamo",2},
+  # cantassen -> cant
+  {"assen",2},
+  # cantássemo -> cant
+  {"ássemo",2},
+  # beberíamo -> beb
+  {"eríamo",2},
+  # bebêssemo -> beb
+  {"êssemo",2},
+  # partiríamo -> part
+  {"iríamo",3},
+  # partíssemo -> part
+  {"íssemo",3},
+  # cantáramo -> cant
+  {"áramo",2},
+  # cantárei -> cant
+  {"árei",2},
+  # cantaren -> cant
+  {"aren",2},
+  # cantaremo -> cant
+  {"aremo",2},
+  # cantaríei -> cant
+  {"aríei",2},
+  {"ássei",2},
+  # cantávamo-> cant
+  {"ávamo",2},
+  # bebêramo -> beb
+  {"êramo",1},
+  # beberemo -> beb
+  {"eremo",1},
+  # beberíei -> beb
+  {"eríei",1},
+  # bebêssei -> beb
+  {"êssei",1},
+  # partiríamo -> part
+  {"íramo",3},
+  # partiremo -> part
+  {"iremo",3},
+  # partiríei -> part
+  {"iríei",3},
+  # partíssei -> part
+  {"íssei",3},
+  # partissen -> part
+  {"issen",3},
+  # bebendo -> beb
+  {"endo",1},
+  # partindo -> part
+  {"indo",3},
+  # propondo -> prop
+  {"ondo",3},
+  # cantarde -> cant
+  {"arde",2},
+  # cantarei -> cant
+  {"arei",2},
+  # cantaria -> cant
+  {"aria",2},
+  # cantarmo -> cant
+  {"armo",2},
+  # cantasse -> cant
+  {"asse",2},
+  {"aste",2},
+  # cantávei -> cant
+  {"ávei",2},
+  # perderão -> perd
+  {"erão",1},
+  # beberde -> beb
+  {"erde",1},
+  # beberei -> beb
+  {"erei",1},
+  # bebêrei -> beb
+  {"êrei",1},
+  # beberen -> beb
+  {"eren",2},
+  # beberia -> beb
+  {"eria",1},
+  # bebermo -> beb
+  {"ermo",1},
+  # bebeste -> beb
+  {"este",1,"",{"faroeste","agreste"}},
+  # bebíamo -> beb
+  {"íamo",1},
+  # fuxian -> fux
+  {"ian",2,"",{"enfian","eloxian","ensaian"}},
+  # partirde -> part
+  {"irde",2},
+  # partírei -> part
+  {"irei",3,"",{"admirei"}},
+  # partiren -> part
+  {"iren",3},
+  # partiria -> part
+  {"iria",3},
+  # partirmo -> part
+  {"irmo",3},
+  # partisse -> part
+  {"isse",3},
+  # partiste -> part
+  {"iste",4},
+  {"iava",1,"",{"ampliava"}},
+  # cantamo -> cant
+  {"amo",2},
+  # funciona -> func
+  {"iona",3},
+  # cantara -> cant
+  {"ara",2,"",{"arara","prepara"}},
+  # enviará -> envi
+  {"ará",2,"",{"alvará","bacará"}},
+  # cantare -> cant
+  {"are",2,"",{"prepare"}},
+  # cantava -> cant
+  {"ava",2,"",{"agrava"}},
+  # cantemo -> cant
+  {"emo",2},
+  # bebera -> beb
+  {"era",1,"",{"acelera","espera"}},
+  # beberá -> beb
+  {"erá",1},
+  # bebere -> beb
+  {"ere",1,"",{"espere"}},
+  # bebíei -> beb
+  {"íei",1},
+  # metin -> met
+  {"in",3},
+  # partimo -> part
+  {"imo",3,"",{"reprimo","intimo","íntimo","nimo","queimo","ximo"}},
+  # partira -> part
+  {"ira",3,"",{"fronteira","sátira"}},
+  {"ído",3},
+  # partirá -> part
+  {"irá",3},
+  # concretizar -> concret
+  {"tizar",4,"",{"alfabetizar"}},
+  {"izar",3,"",{"organizar"}},
+  # saltitar -> salt
+  {"itar",5,"",{"acreditar","explicitar","estreitar"}},
+  # partire -> part
+  {"ire",3,"",{"adquire"}},
+  # compomo -> comp
+  {"omo",3},
+  {"ai",2},
+  # barbear -> barb
+  {"ear",4,"",{"alardear","nuclear"}},
+  # cheguei -> cheg
+  {"uei",3},
+  {"uía",5,"u"},
+  # cantei -> cant
+  {"ei",3},
+  # beber -> beb
+  {"er",1,"",{"éter","pier"}},
+  # bebeu -> beb
+  {"eu",1,"",{"chapeu"}},
+  # bebia -> beb
+  {"ia",1,"",{"estória","fatia","acia","praia","elogia","mania","lábia","aprecia","polícia","arredia","cheia","ásia"}},
+  # partir -> part
+  {"ir",3},
+  # partiu -> part
+  {"iu",3},
+  # fraqueou -> fraqu
+  {"eou",5},
+  # chegou -> cheg
+  {"ou",3},
+  # bebi -> beb
+  {"i",1},
+  # varrede -> varr
+  {"ede",1,"",{"rede","bípede","céspede","parede","palmípede","vostede","hóspede","adrede"}},
+  # cantei -> cant
+  {"ei",3},
+  # anden -> and
+  {"en",2},
+  # descerade -> desc
+  {"erade",1},
+  # vivérade -> viv
+  {"érade",1},
+  # beberan -> beb
+  {"eran",2},
+  # colleramo -> coller
+  {"eramo",1},
+  # bebéramo -> beb
+  {"éramo",1},
+  # perderán -> perd
+  {"erán",1},
+  # varrería -> varr
+  {"ería",1},
+  # beberiade -> beb
+  {"eriade",1},
+  # beberíade -> beb
+  {"eríade",1},
+  # beberiamo -> beb
+  {"eriamo",1},
+  # beberian -> beb
+  {"erian",1},
+  # beberían -> beb
+  {"erían",1},
+  # perderon -> perd
+  {"eron",1},
+  # bebese -> beb
+  {"ese",1},
+  # bebesedes -> beb
+  {"esedes",1},
+  # bebésedes -> beb
+  {"ésedes",1}, 
+  # bebesemo -> beb
+  {"esemo",1},
+  # bebésemo -> beb
+  {"ésemo",1},
+  # bebesen -> beb
+  {"esen",1},
+  # bebêssede -> beb 
+  {"êssede",1},
+  # chovía -> chov
+  {"ía",1},
+  # faciade -> fac
+  {"iade",1},
+  # facíade -> fac
+  {"íade",1},
+  # perdiamo -> perd
+  {"iamo",1},
+  # fuxían -> fux 
+  {"ían",1},
+  # corriche -> corr
+  {"iche",1},
+  # partide -> part
+  {"ide",1},
+  # escribirade -> escrib
+  {"irade",3},
+  # parírade -> par
+  {"írade",3},
+  # partiramo -> part
+  {"iramo",3}, 
+  # fugirán -> fug
+  {"irán",3},
+  # viviría -> viv
+  {"iría",3},
+  # partiriade -> part
+  {"iriade",3},
+  # partiríade -> part
+  {"iríade",3},
+  # partiriamo -> part
+  {"iriamo",3}, 
+  # partirian -> part
+  {"irian",3},
+  # partirían -> part
+  {"irían",3},
+  # reflectiron -> reflect
+  {"iron",3},
+  # partise -> part
+  {"ise",3},
+  # partisede -> part
+  {"isede",3},
+  # partísede -> part
+  {"ísede",3},
+  # partisemo -> part
+  {"isemo",3},
+  # partísemo -> part
+  {"ísemo",3},
+  # partisen -> part
+  {"isen",3},
+  # partíssede -> part
+  {"íssede",3}, 
+  {"tizar",3,"",{"alfabetizar"}},
+  {"ondo",3}};
+
+{  "Vowel", 0, 0, {},
+  # segue -> seg
+  {"gue",2,"g",{"azougue","dengue","merengue","nurague","merengue","rengue"}},
+  {"que",2,"c",{"alambique","albaricoque","abaroque","alcrique","almadraque","almanaque","arenque","arinque","baduloque","ballestrinque","betoque","bivaque","bloque","bodaque","bosque","breque","buque","cacique","cheque","claque","contradique","coque","croque","dique","duque","enroque","espeque","estoque","estoraque","estraloque","estrinque","milicroque","monicreque","orinque","arinque","palenque","parque","penique","picabeque","pique","psique","raque","remolque","xeque","repenique","roque","sotobosque","tabique","tanque","toque","traque","truque","vivaque","xaque"}},
+  {"a",3,"",{"amasadela","cerva"}},
+  {"e",3,"",{"marte"}},
+  {"o",3,"",{"barro","fado","cabo","libro","cervo"}},
+  {"â",3},
+  {"ã",3,"",{"amanhã","arapuã","fã","divã","manhã"}},
+  {"ê",3},
+  {"ô",3},
+  {"á",3},
+  {"é",3},
+  {"ó",3},
+  # munxi -> munx
+  {"i",3}};
--- a/modules/analysis/common/src/resources/org/apache/lucene/analysis/gl/stopwords.txt
+++ b/modules/analysis/common/src/resources/org/apache/lucene/analysis/gl/stopwords.txt
@ -0,0 +1,161 @@
+# galican stopwords
+a
+aínda
+alí
+aquel
+aquela
+aquelas
+aqueles
+aquilo
+aquí
+ao
+aos
+as
+así
+á
+ben
+cando
+che
+co
+coa
+comigo
+con
+connosco
+contigo
+convosco
+coas
+cos
+cun
+cuns
+cunha
+cunhas
+da
+dalgunha
+dalgunhas
+dalgún
+dalgúns
+das
+de
+del
+dela
+delas
+deles
+desde
+deste
+do
+dos
+dun
+duns
+dunha
+dunhas
+e
+el
+ela
+elas
+eles
+en
+era
+eran
+esa
+esas
+ese
+eses
+esta
+estar
+estaba
+está
+están
+este
+estes
+estiven
+estou
+eu
+é
+facer
+foi
+foron
+fun
+había
+hai
+iso
+isto
+la
+las
+lle
+lles
+lo
+los
+mais
+me
+meu
+meus
+min
+miña
+miñas
+moi
+na
+nas
+neste
+nin
+no
+non
+nos
+nosa
+nosas
+noso
+nosos
+nós
+nun
+nunha
+nuns
+nunhas
+o
+os
+ou
+ó
+ós
+para
+pero
+pode
+pois
+pola
+polas
+polo
+polos
+por
+que
+se
+senón
+ser
+seu
+seus
+sexa
+sido
+sobre
+súa
+súas
+tamén
+tan
+te
+ten
+teñen
+teño
+ter
+teu
+teus
+ti
+tido
+tiña
+tiven
+túa
+túas
+un
+unha
+unhas
+uns
+vos
+vosa
+vosas
+voso
+vosos
+vós
--- a/modules/analysis/common/src/resources/org/apache/lucene/analysis/pt/portuguese.rslp
+++ b/modules/analysis/common/src/resources/org/apache/lucene/analysis/pt/portuguese.rslp
@ -0,0 +1,456 @@
+#  Steps file for the RSLP stemmer.
+
+# Step 1: Plural Reduction
+{  "Plural", 3, 1, {"s"},
+  # bons -> bom 
+  {"ns",1,"m"},
+  # balões -> balão
+  {"ões",3,"ão"},
+  # capitães -> capitão
+  {"ães",1,"ão",{"mães"}},
+  # normais -> normal
+  {"ais",1,"al",{"cais","mais"}},
+  # papéis -> papel
+  {"éis",2,"el"},
+  # amáveis -> amável
+  {"eis",2,"el"},
+  # lençóis -> lençol
+  {"óis",2,"ol"},
+  # barris -> barril
+  {"is",2,"il",{"lápis","cais","mais","crúcis","biquínis","pois","depois","dois","leis"}},
+  # males -> mal
+  {"les",3,"l"},
+  # mares -> mar
+  {"res",3,"r", {"árvores"}},
+  # casas -> casa
+  {"s",2,"",{"aliás","pires","lápis","cais","mais","mas","menos","férias","fezes","pêsames","crúcis","gás","atrás","moisés","através","convés","ês","país","após","ambas","ambos","messias", "depois"}}};
+
+# Step 2: Adverb Reduction
+{  "Adverb", 0, 0, {},
+  # felizmente -> feliz
+  {"mente",4,"",{"experimente"}}};
+  
+# Step 3: Feminine Reduction
+{  "Feminine", 3, 1, {"a","ã"},
+  # chefona -> chefão
+  {"ona",3,"ão",{"abandona","lona","iona","cortisona","monótona","maratona","acetona","detona","carona"}},
+  # vilã -> vilão
+  {"ã",2,"ão",{"amanhã","arapuã","fã","divã"}},
+  # professora -> professor
+  {"ora",3,"or"},
+  # americana -> americano
+  {"na",4,"no",{"carona","abandona","lona","iona","cortisona","monótona","maratona","acetona","detona","guiana","campana","grana","caravana","banana","paisana"}},
+  # sozinha -> sozinho
+  {"inha",3,"inho",{"rainha","linha","minha"}},
+  # inglesa -> inglês
+  {"esa",3,"ês",{"mesa","obesa","princesa","turquesa","ilesa","pesa","presa"}},
+  # famosa -> famoso
+  {"osa",3,"oso",{"mucosa","prosa"}},
+  # maníaca -> maníaco
+  {"íaca",3,"íaco"},
+  # prática -> prático
+  {"ica",3,"ico",{"dica"}},
+  # cansada -> cansado
+  {"ada",2,"ado",{"pitada"}},
+  # mantida -> mantido
+  {"ida",3,"ido",{"vida","dúvida"}},
+  {"ída",3,"ido",{"recaída","saída"}},
+  # prima -> primo
+  {"ima",3,"imo",{"vítima"}},
+  # passiva -> passivo
+  {"iva",3,"ivo",{"saliva","oliva"}},
+  # primeira -> primeiro
+  {"eira",3,"eiro",{"beira","cadeira","frigideira","bandeira","feira","capoeira","barreira","fronteira","besteira","poeira"}}};
+
+# Step 4: Augmentative/Diminutive Reduction
+{  "Augmentative", 0, 1, {},
+  # cansadíssimo -> cansad
+  {"díssimo",5},
+  # amabilíssimo -> ama
+  {"abilíssimo",5},
+  # fortíssimo -> fort
+  {"íssimo",3},
+  {"ésimo",3},
+  # chiquérrimo -> chiqu
+  {"érrimo",4},
+  # pezinho -> pe
+  {"zinho",2},
+  # maluquinho -> maluc
+  {"quinho",4,"c"},
+  # amiguinho -> amig
+  {"uinho",4},
+  # cansadinho -> cansad
+  {"adinho",3},
+  # carrinho -> carr
+  {"inho",3,"",{"caminho","cominho"}},
+  # grandalhão -> grand
+  {"alhão",4},
+  # dentuça -> dent
+  {"uça",4},
+  # ricaço -> ric
+  {"aço",4,"",{"antebraço"}},
+  {"aça",4},
+  # casadão -> cans
+  {"adão",4}, 
+  {"idão",4},
+  # corpázio -> corp
+  {"ázio",3,"",{"topázio"}},
+  # pratarraz -> prat
+  {"arraz",4},
+  {"zarrão",3},
+  {"arrão",4},
+  # bocarra -> boc
+  {"arra",3},
+  # calorzão -> calor
+  {"zão",2,"",{"coalizão"}},
+  # meninão -> menin
+  {"ão",3,"",{"camarão","chimarrão","canção","coração","embrião","grotão","glutão","ficção","fogão","feição","furacão","gamão","lampião","leão","macacão","nação","órfão","orgão","patrão","portão","quinhão","rincão","tração","falcão","espião","mamão","folião","cordão","aptidão","campeão","colchão","limão","leilão","melão","barão","milhão","bilhão","fusão","cristão","ilusão","capitão","estação","senão"}}};
+
+# Step 5: Noun Suffix Reduction
+{  "Noun", 0, 0, {},
+  # existencialista -> exist
+  {"encialista",4},
+  # minimalista -> minim
+  {"alista",5},
+  # contagem -> cont
+  {"agem",3,"",{"coragem","chantagem","vantagem","carruagem"}},
+  # gerenciamento -> gerenc
+  {"iamento",4},
+  # monitoramento -> monitor
+  {"amento",3,"",{"firmamento","fundamento","departamento"}},
+  # nascimento -> nasc
+  {"imento",3},
+  {"mento",6,"",{"firmamento","elemento","complemento","instrumento","departamento"}},
+  # comercializado -> comerci
+  {"alizado",4},
+  # traumatizado -> traum
+  {"atizado",4},
+  {"tizado",4,"",{"alfabetizado"}},
+  # alfabetizado -> alfabet
+  {"izado",5,"",{"organizado","pulverizado"}},
+  # associativo -> associ
+  {"ativo",4,"",{"pejorativo","relativo"}},
+  # contraceptivo -> contracep
+  {"tivo",4,"",{"relativo"}},
+  # esportivo -> esport
+  {"ivo",4,"",{"passivo","possessivo","pejorativo","positivo"}},
+  # abalado -> abal
+  {"ado",2,"",{"grado"}},
+  # impedido -> imped
+  {"ido",3,"",{"cândido","consolido","rápido","decido","tímido","duvido","marido"}},
+  # ralador -> ral
+  {"ador",3},
+  # entendedor -> entend
+  {"edor",3},
+  # cumpridor -> cumpr
+  {"idor",4,"",{"ouvidor"}},
+  {"dor",4,"",{"ouvidor"}},
+  {"sor",4,"",{"assessor"}},
+  {"atoria",5},
+  {"tor",3,"",{"benfeitor","leitor","editor","pastor","produtor","promotor","consultor"}},
+  {"or",2,"",{"motor","melhor","redor","rigor","sensor","tambor","tumor","assessor","benfeitor","pastor","terior","favor","autor"}},
+  # comparabilidade -> compar
+  {"abilidade",5},
+  # abolicionista -> abol
+  {"icionista",4},
+  # intervencionista -> interven
+  {"cionista",5},
+  {"ionista",5},
+  {"ionar",5},
+  # profissional -> profiss
+  {"ional",4},
+  # referência -> refer
+  {"ência",3},
+  # repugnância -> repugn
+  {"ância",4,"",{"ambulância"}},
+  # abatedouro -> abat
+  {"edouro",3},
+  # fofoqueiro -> fofoc
+  {"queiro",3,"c"},
+  {"adeiro",4,"",{"desfiladeiro"}},
+  # brasileiro -> brasil
+  {"eiro",3,"",{"desfiladeiro","pioneiro","mosteiro"}},
+  {"uoso",3},
+  # gostoso -> gost
+  {"oso",3,"",{"precioso"}},
+  # comercializaç -> comerci
+  {"alizaç",5},
+  {"atizaç",5},
+  {"tizaç",5},
+  {"izaç",5,"",{"organizaç"}},
+  # alegaç -> aleg
+  {"aç",3,"",{"equaç","relaç"}},
+  # aboliç -> abol
+  {"iç",3,"",{"eleiç"}},
+  # anedotário -> anedot
+  {"ário",3,"",{"voluntário","salário","aniversário","diário","lionário","armário"}},
+  {"atório",3},
+  {"rio",5,"",{"voluntário","salário","aniversário","diário","compulsório","lionário","próprio","stério","armário"}},
+  # ministério -> minist
+  {"ério",6},
+  # chinês -> chin
+  {"ês",4},
+  # beleza -> bel
+  {"eza",3},
+  # rigidez -> rigid
+  {"ez",4},
+  # parentesco -> parent
+  {"esco",4},
+  # ocupante -> ocup
+  {"ante",2,"",{"gigante","elefante","adiante","possante","instante","restaurante"}},
+  # bombástico -> bomb
+  {"ástico",4,"",{"eclesiástico"}},
+  {"alístico",3},
+  {"áutico",4},
+  {"êutico",4},
+  {"tico",3,"",{"político","eclesiástico","diagnostico","prático","doméstico","diagnóstico","idêntico","alopático","artístico","autêntico","eclético","crítico","critico"}},
+  # polêmico -> polêm
+  {"ico",4,"",{"tico","público","explico"}},
+  # produtividade -> produt
+  {"ividade",5},
+  # profundidade -> profund
+  {"idade",4,"",{"autoridade","comunidade"}},
+  # aposentadoria -> aposentad
+  {"oria",4,"",{"categoria"}},
+  # existencial -> exist
+  {"encial",5},
+  # artista -> art
+  {"ista",4},
+  {"auta",5},
+  # maluquice -> maluc
+  {"quice",4,"c"},
+  # chatice -> chat
+  {"ice",4,"",{"cúmplice"}},
+  # demoníaco -> demon
+  {"íaco",3},
+  # decorrente -> decorr
+  {"ente",4,"",{"freqüente","alimente","acrescente","permanente","oriente","aparente"}},
+  {"ense",5},
+  # criminal -> crim
+  {"inal",3},
+  # americano -> americ
+  {"ano",4},
+  # amável -> am
+  {"ável",2,"",{"afável","razoável","potável","vulnerável"}},
+  # combustível -> combust
+  {"ível",3,"",{"possível"}},
+  {"vel",5,"",{"possível","vulnerável","solúvel"}},
+  {"bil",3,"vel"},
+  # cobertura -> cobert
+  {"ura",4,"",{"imatura","acupuntura","costura"}},
+  {"ural",4},
+  # consensual -> consens
+  {"ual",3,"",{"bissexual","virtual","visual","pontual"}},
+  # mundial -> mund
+  {"ial",3},
+  # experimental -> experiment
+  {"al",4,"",{"afinal","animal","estatal","bissexual","desleal","fiscal","formal","pessoal","liberal","postal","virtual","visual","pontual","sideral","sucursal"}},
+  {"alismo",4},
+  {"ivismo",4},
+  {"ismo",3,"",{"cinismo"}}};
+
+# Step 6: Verb Suffix Reduction
+{  "Verb", 0, 0, {}, 
+  # cantaríamo -> cant
+  {"aríamo",2},
+  # cantássemo -> cant
+  {"ássemo",2},
+  # beberíamo -> beb
+  {"eríamo",2},
+  # bebêssemo -> beb
+  {"êssemo",2},
+  # partiríamo -> part
+  {"iríamo",3},
+  # partíssemo -> part
+  {"íssemo",3},
+  # cantáramo -> cant
+  {"áramo",2},
+  # cantárei -> cant
+  {"árei",2},
+  # cantaremo -> cant
+  {"aremo",2},
+  # cantariam -> cant
+  {"ariam",2},
+  # cantaríei -> cant
+  {"aríei",2},
+  # cantássei -> cant
+  {"ássei",2},
+  # cantassem -> cant
+  {"assem",2},
+  # cantávamo -> cant
+  {"ávamo",2},
+  # bebêramo -> beb
+  {"êramo",3},
+  # beberemo -> beb
+  {"eremo",3},
+  # beberiam -> beb
+  {"eriam",3},
+  # beberíei -> beb
+  {"eríei",3},
+  # bebêssei -> beb
+  {"êssei",3},
+  # bebessem -> beb
+  {"essem",3},
+  # partiríamo -> part
+  {"íramo",3},
+  # partiremo -> part
+  {"iremo",3},
+  # partiriam -> part
+  {"iriam",3},
+  # partiríei -> part
+  {"iríei",3},
+  # partíssei -> part
+  {"íssei",3},
+  # partissem -> part
+  {"issem",3},
+  # cantando -> cant
+  {"ando",2},
+  # bebendo -> beb
+  {"endo",3},
+  # partindo -> part
+  {"indo",3},
+  # propondo -> prop
+  {"ondo",3},
+  # cantaram -> cant
+  {"aram",2},
+  {"arão",2},
+  # cantarde -> cant
+  {"arde",2},
+  # cantarei -> cant
+  {"arei",2},
+  # cantarem -> cant
+  {"arem",2},
+  # cantaria -> cant
+  {"aria",2},
+  # cantarmo -> cant
+  {"armo",2},
+  # cantasse -> cant
+  {"asse",2},
+  # cantaste -> cant
+  {"aste",2},
+  # cantavam -> cant
+  {"avam",2,"",{"agravam"}},
+  # cantávei -> cant
+  {"ávei",2},
+  # beberam -> beb
+  {"eram",3},
+  {"erão",3},
+  # beberde -> beb
+  {"erde",3},
+  # beberei -> beb
+  {"erei",3},
+  # bebêrei -> beb
+  {"êrei",3},
+  # beberem -> beb
+  {"erem",3},
+  # beberia -> beb
+  {"eria",3},
+  # bebermo -> beb
+  {"ermo",3},
+  # bebesse -> beb
+  {"esse",3},
+  # bebeste -> beb
+  {"este",3,"",{"faroeste","agreste"}},
+  # bebíamo -> beb
+  {"íamo",3},
+  # partiram -> part
+  {"iram",3},
+  # concluíram -> conclu
+  {"íram",3},
+  {"irão",2},
+  # partirde -> part
+  {"irde",2},
+  # partírei -> part
+  {"irei",3,"",{"admirei"}},
+  # partirem -> part
+  {"irem",3,"",{"adquirem"}},
+  # partiria -> part
+  {"iria",3},
+  # partirmo -> part
+  {"irmo",3},
+  # partisse -> part
+  {"isse",3},
+  # partiste -> part
+  {"iste",4},
+  {"iava",4,"",{"ampliava"}},
+  # cantamo -> cant
+  {"amo",2},
+  {"iona",3},
+  # cantara -> cant
+  {"ara",2,"",{"arara","prepara"}},
+  # cantará -> cant
+  {"ará",2,"",{"alvará"}},
+  # cantare -> cant
+  {"are",2,"",{"prepare"}},
+  # cantava -> cant
+  {"ava",2,"",{"agrava"}},
+  # cantemo -> cant
+  {"emo",2},
+  # bebera -> beb
+  {"era",3,"",{"acelera","espera"}},
+  # beberá -> beb
+  {"erá",3},
+  # bebere -> beb
+  {"ere",3,"",{"espere"}},
+  # bebiam -> beb
+  {"iam",3,"",{"enfiam","ampliam","elogiam","ensaiam"}},
+  # bebíei -> beb
+  {"íei",3},
+  # partimo -> part
+  {"imo",3,"",{"reprimo","intimo","íntimo","nimo","queimo","ximo"}},
+  # partira -> part
+  {"ira",3,"",{"fronteira","sátira"}},
+  {"ído",3},
+  # partirá -> part
+  {"irá",3},
+  {"tizar",4,"",{"alfabetizar"}},
+  {"izar",5,"",{"organizar"}},
+  {"itar",5,"",{"acreditar","explicitar","estreitar"}},
+  # partire -> part
+  {"ire",3,"",{"adquire"}},
+  # compomo -> comp
+  {"omo",3},
+  # cantai -> cant
+  {"ai",2},
+  # cantam -> cant
+  {"am",2},
+  # barbear -> barb
+  {"ear",4,"",{"alardear","nuclear"}},
+  # cantar -> cant
+  {"ar",2,"",{"azar","bazaar","patamar"}},
+  # cheguei -> cheg
+  {"uei",3},
+  {"uía",5,"u"},
+  # cantei -> cant
+  {"ei",3},
+  {"guem",3,"g"},
+  # cantem -> cant
+  {"em",2,"",{"alem","virgem"}},
+  # beber -> beb
+  {"er",2,"",{"éter","pier"}},
+  # bebeu -> beb
+  {"eu",3,"",{"chapeu"}},
+  # bebia -> beb
+  {"ia",3,"",{"estória","fatia","acia","praia","elogia","mania","lábia","aprecia","polícia","arredia","cheia","ásia"}},
+  # partir -> part
+  {"ir",3,"",{"freir"}},
+  # partiu -> part
+  {"iu",3},
+  {"eou",5},
+  # chegou -> cheg
+  {"ou",3},
+  # bebi -> beb
+  {"i",3}};
+
+# Step 7: Vowel Removal 
+{  "Vowel", 0, 0, {}, 
+  {"bil",2,"vel"},
+  {"gue",2,"g",{"gangue","jegue"}},
+  {"á",3}, 
+  {"ê",3,"",{"bebê"}},
+  # menina -> menin
+  {"a",3,"",{"ásia"}},
+  # grande -> grand
+  {"e",3},
+  # menino -> menin
+  {"o",3,"",{"ão"}}};
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java
@ -0,0 +1,53 @@
+package org.apache.lucene.analysis.gl;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+
+public class TestGalicianAnalyzer extends BaseTokenStreamTestCase {
+  /** This test fails with NPE when the 
+   * stopwords file is missing in classpath */
+  public void testResourcesAvailable() {
+    new GalicianAnalyzer(TEST_VERSION_CURRENT);
+  }
+  
+  /** test stopwords and stemming */
+  public void testBasics() throws IOException {
+    Analyzer a = new GalicianAnalyzer(TEST_VERSION_CURRENT);
+    // stemming
+    checkOneTermReuse(a, "correspondente", "correspond");
+    checkOneTermReuse(a, "corresponderá", "correspond");
+    // stopword
+    assertAnalyzesTo(a, "e", new String[] {});
+  }
+  
+  /** test use of exclusion set */
+  public void testExclude() throws IOException {
+    Set<String> exclusionSet = new HashSet<String>();
+    exclusionSet.add("correspondente");
+    Analyzer a = new GalicianAnalyzer(TEST_VERSION_CURRENT, 
+        GalicianAnalyzer.getDefaultStopSet(), exclusionSet);
+    checkOneTermReuse(a, "correspondente", "correspondente");
+    checkOneTermReuse(a, "corresponderá", "correspond");
+  }
+}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilter.java
@ -0,0 +1,52 @@
+package org.apache.lucene.analysis.gl;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.apache.lucene.analysis.util.VocabularyAssert.assertVocabulary;
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+
+/**
+ * Simple tests for {@link GalicianStemFilter}
+ */
+public class TestGalicianStemFilter extends BaseTokenStreamTestCase {
+  private Analyzer analyzer = new ReusableAnalyzerBase() {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName,
+        Reader reader) {
+      Tokenizer source = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+      TokenStream result = new LowerCaseFilter(TEST_VERSION_CURRENT, source);
+      return new TokenStreamComponents(source, new GalicianStemFilter(result));
+    }
+  };
+  
+ 
+  /** Test against a vocabulary from the reference impl */
+  public void testVocabulary() throws IOException {
+    assertVocabulary(analyzer, getDataFile("gltestdata.zip"), "gl.txt");
+  }
+}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/gltestdata.zip
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/gl/gltestdata.zip
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java
@ -0,0 +1,69 @@
+package org.apache.lucene.analysis.pt;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.apache.lucene.analysis.util.VocabularyAssert.assertVocabulary;
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+
+/**
+ * Simple tests for {@link PortugueseStemFilter}
+ */
+public class TestPortugueseStemFilter extends BaseTokenStreamTestCase {
+  private Analyzer analyzer = new ReusableAnalyzerBase() {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName,
+        Reader reader) {
+      Tokenizer source = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+      TokenStream result = new LowerCaseFilter(TEST_VERSION_CURRENT, source);
+      return new TokenStreamComponents(source, new PortugueseStemFilter(result));
+    }
+  };
+  
+  /**
+   * Test the example from the paper "Assessing the impact of stemming accuracy
+   * on information retrieval"
+   */
+  public void testExamples() throws IOException {
+    assertAnalyzesTo(
+        analyzer,
+    "O debate político, pelo menos o que vem a público, parece, de modo nada "
+    + "surpreendente, restrito a temas menores. Mas há, evidentemente, "
+    + "grandes questões em jogo nas eleições que se aproximam.",
+    new String[] { 
+      "o", "debat", "politic", "pel", "menos", "o", "que", "vem", "a", 
+      "public", "parec", "de", "mod", "nad", "surpreend", "restrit",
+      "a", "tem", "men", "mas", "ha", "evid", "grand", "quest",
+      "em", "jog", "na", "eleic", "que", "se", "aproxim"
+    });
+  }
+  
+  /** Test against a vocabulary from the reference impl */
+  public void testVocabulary() throws IOException {
+    assertVocabulary(analyzer, getDataFile("ptrslptestdata.zip"), "ptrslp.txt");
+  }
+}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/ptrslptestdata.zip
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/pt/ptrslptestdata.zip
--- a/solr/src/java/org/apache/solr/analysis/GalicianStemFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/GalicianStemFilterFactory.java
@ -0,0 +1,28 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.gl.GalicianStemFilter;
+
+/** Factory for {@link GalicianStemFilter} */
+public class GalicianStemFilterFactory extends BaseTokenFilterFactory {
+  public TokenStream create(TokenStream input) {
+    return new GalicianStemFilter(input);
+  }
+}
--- a/solr/src/java/org/apache/solr/analysis/PortugueseStemFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/PortugueseStemFilterFactory.java
@ -0,0 +1,28 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.pt.PortugueseStemFilter;
+
+/** Factory for {@link PortugueseStemFilter} */
+public class PortugueseStemFilterFactory extends BaseTokenFilterFactory {
+  public TokenStream create(TokenStream input) {
+    return new PortugueseStemFilter(input);
+  }
+}
--- a/solr/src/test/org/apache/solr/analysis/TestGalicianStemFilterFactory.java
+++ b/solr/src/test/org/apache/solr/analysis/TestGalicianStemFilterFactory.java
@ -0,0 +1,36 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+
+/**
+ * Simple tests to ensure the Galician stem factory is working.
+ */
+public class TestGalicianStemFilterFactory extends BaseTokenTestCase {
+  public void testStemming() throws Exception {
+    Reader reader = new StringReader("cariñosa");
+    GalicianStemFilterFactory factory = new GalicianStemFilterFactory();
+    TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
+    assertTokenStreamContents(stream, new String[] { "cariñ" });
+  }
+}
--- a/solr/src/test/org/apache/solr/analysis/TestPortugueseStemFilterFactory.java
+++ b/solr/src/test/org/apache/solr/analysis/TestPortugueseStemFilterFactory.java
@ -0,0 +1,36 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+
+/**
+ * Simple tests to ensure the Portuguese stem factory is working.
+ */
+public class TestPortugueseStemFilterFactory extends BaseTokenTestCase {
+  public void testStemming() throws Exception {
+    Reader reader = new StringReader("maluquice");
+    PortugueseStemFilterFactory factory = new PortugueseStemFilterFactory();
+    TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
+    assertTokenStreamContents(stream, new String[] { "maluc" });
+  }
+}