LUCENE-2463: Improve Greek analysis

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@945090 13f79535-47bb-0310-9956-ffa450edef68
2010-05-17 11:28:04 +00:00 · 2010-05-17 11:28:04 +00:00 · acbf053b7c
parent 97a95c3a6a
commit acbf053b7c
12 changed files with 1774 additions and 188 deletions
--- a/lucene/contrib/CHANGES.txt
+++ b/lucene/contrib/CHANGES.txt
@ -157,6 +157,9 @@ New features
 * LUCENE-2393: The HighFreqTerms tool (in misc) can now optionally
   also include the total termFreq.  (Tom Burton-West via Mike McCandless)

+ * LUCENE-2463: Add a Greek inflectional stemmer. GreekAnalyzer will now stem words
+   when Version is set to 3.1 or higher.  (Robert Muir)
+
 Build

 * LUCENE-2124: Moved the JDK-based collation support from contrib/collation 
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
@ -16,9 +16,7 @@ package org.apache.lucene.analysis.el;
 * limitations under the License.
 */

-
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
@ -28,8 +26,8 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
 import org.apache.lucene.util.Version;

+import java.io.IOException;
 import java.io.Reader;
-import java.util.Arrays;
 import java.util.Map;
 import java.util.Set;

@ -45,7 +43,7 @@ import java.util.Set;
 * <p>You must specify the required {@link Version}
 * compatibility when creating GreekAnalyzer:
 * <ul>
- *   <li> As of 3.1, StandardFilter is used by default.
+ *   <li> As of 3.1, StandardFilter and GreekStemmer are used by default.
 *   <li> As of 2.9, StopFilter preserves position
 *        increments
 * </ul>
@ -53,72 +51,73 @@ import java.util.Set;
 * <p><b>NOTE</b>: This class uses the same {@link Version}
 * dependent settings as {@link StandardAnalyzer}.</p>
 */
-public final class GreekAnalyzer extends StopwordAnalyzerBase
-{
-    /**
-     * List of typical Greek stopwords.
-     */
-    private static final String[] GREEK_STOP_WORDS = {
-      "ο", "η", "το", "οι", "τα", "του", "τησ", "των", "τον", "την", "και", 
-      "κι", "κ", "ειμαι", "εισαι", "ειναι", "ειμαστε", "ειστε", "στο", "στον",
-      "στη", "στην", "μα", "αλλα", "απο", "για", "προσ", "με", "σε", "ωσ",
-      "παρα", "αντι", "κατα", "μετα", "θα", "να", "δε", "δεν", "μη", "μην",
-      "επι", "ενω", "εαν", "αν", "τοτε", "που", "πωσ", "ποιοσ", "ποια", "ποιο",
-      "ποιοι", "ποιεσ", "ποιων", "ποιουσ", "αυτοσ", "αυτη", "αυτο", "αυτοι",
-      "αυτων", "αυτουσ", "αυτεσ", "αυτα", "εκεινοσ", "εκεινη", "εκεινο",
-      "εκεινοι", "εκεινεσ", "εκεινα", "εκεινων", "εκεινουσ", "οπωσ", "ομωσ",
-      "ισωσ", "οσο", "οτι"
-    };
+public final class GreekAnalyzer extends StopwordAnalyzerBase {
+  /** File containing default Greek stopwords. */
+  public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
  
-    /**
-     * Returns a set of default Greek-stopwords 
-     * @return a set of default Greek-stopwords 
-     */
-    public static final Set<?> getDefaultStopSet(){
-      return DefaultSetHolder.DEFAULT_SET;
-    }
+  /**
+   * Returns a set of default Greek-stopwords 
+   * @return a set of default Greek-stopwords 
+   */
+  public static final Set<?> getDefaultStopSet(){
+    return DefaultSetHolder.DEFAULT_SET;
+  }
  
-    private static class DefaultSetHolder {
-      private static final Set<?> DEFAULT_SET = CharArraySet.unmodifiableSet(new CharArraySet(
-          Version.LUCENE_CURRENT, Arrays.asList(GREEK_STOP_WORDS), false));
-    }
+  private static class DefaultSetHolder {
+    private static final Set<?> DEFAULT_SET;
    
-    public GreekAnalyzer(Version matchVersion) {
-      this(matchVersion, DefaultSetHolder.DEFAULT_SET);
+    static {
+      try {
+        DEFAULT_SET = loadStopwordSet(false, GreekAnalyzer.class, DEFAULT_STOPWORD_FILE, "#");
+      } catch (IOException ex) {
+        // default set should always be present as it is part of the
+        // distribution (JAR)
+        throw new RuntimeException("Unable to load default stopword set");
+      }
    }
+  }
  
-    /**
-     * Builds an analyzer with the given stop words 
-     * 
-     * @param matchVersion
-     *          lucene compatibility version
-     * @param stopwords
-     *          a stopword set
-     */
-    public GreekAnalyzer(Version matchVersion, Set<?> stopwords) {
-      super(matchVersion, stopwords);
-    }
+  /**
+   * Builds an analyzer with the default stop words.
+   * @param matchVersion Lucene compatibility version,
+   *   See <a href="#version">above</a>
+   */
+  public GreekAnalyzer(Version matchVersion) {
+    this(matchVersion, DefaultSetHolder.DEFAULT_SET);
+  }
  
-    /**
-     * Builds an analyzer with the given stop words.
-     * @param stopwords Array of stopwords to use.
-     * @deprecated use {@link #GreekAnalyzer(Version, Set)} instead
-     */
-    @Deprecated
-    public GreekAnalyzer(Version matchVersion, String... stopwords)
-    {
-      this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords));
-    }
+  /**
+   * Builds an analyzer with the given stop words. 
+   * <p>
+   * <b>NOTE:</b> The stopwords set should be pre-processed with the logic of 
+   * {@link GreekLowerCaseFilter} for best results.
+   *  
+   * @param matchVersion Lucene compatibility version,
+   *   See <a href="#version">above</a>
+   * @param stopwords a stopword set
+   */
+  public GreekAnalyzer(Version matchVersion, Set<?> stopwords) {
+    super(matchVersion, stopwords);
+  }
  
-    /**
-     * Builds an analyzer with the given stop words.
-     * @deprecated use {@link #GreekAnalyzer(Version, Set)} instead
-     */
-    @Deprecated
-    public GreekAnalyzer(Version matchVersion, Map<?,?> stopwords)
-    {
-      this(matchVersion, stopwords.keySet());
-    }
+  /**
+   * Builds an analyzer with the given stop words.
+   * @param stopwords Array of stopwords to use.
+   * @deprecated use {@link #GreekAnalyzer(Version, Set)} instead
+   */
+  @Deprecated
+  public GreekAnalyzer(Version matchVersion, String... stopwords) {
+    this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords));
+  }
+  
+  /**
+   * Builds an analyzer with the given stop words.
+   * @deprecated use {@link #GreekAnalyzer(Version, Set)} instead
+   */
+  @Deprecated
+  public GreekAnalyzer(Version matchVersion, Map<?,?> stopwords) {
+    this(matchVersion, stopwords.keySet());
+  }
  
  /**
   * Creates
@ -127,16 +126,19 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase
   * 
   * @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
   *         built from a {@link StandardTokenizer} filtered with
-   *         {@link GreekLowerCaseFilter}, {@link StandardFilter} and
-   *         {@link StopFilter}
+   *         {@link GreekLowerCaseFilter}, {@link StandardFilter},
+   *         {@link StopFilter}, and {@link GreekStemFilter}
   */
-    @Override
-    protected TokenStreamComponents createComponents(String fieldName,
-        Reader reader) {
-      final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-      TokenStream result = new GreekLowerCaseFilter(source);
-      if (matchVersion.onOrAfter(Version.LUCENE_31))
-        result = new StandardFilter(result);
-      return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords));
-    }
+  @Override
+  protected TokenStreamComponents createComponents(String fieldName,
+      Reader reader) {
+    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+    TokenStream result = new GreekLowerCaseFilter(matchVersion, source);
+    if (matchVersion.onOrAfter(Version.LUCENE_31))
+      result = new StandardFilter(result);
+    result = new StopFilter(matchVersion, result, stopwords);
+    if (matchVersion.onOrAfter(Version.LUCENE_31))
+      result = new GreekStemFilter(result);
+    return new TokenStreamComponents(source, result);
+  }
 }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
@ -20,97 +20,115 @@ import java.io.IOException;

 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.util.Version;

 /**
 * Normalizes token text to lower case, removes some Greek diacritics,
 * and standardizes final sigma to sigma. 
- *
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating GreekLowerCaseFilter:
+ * <ul>
+ *   <li> As of 3.1, supplementary characters are properly lowercased.
+ * </ul>
 */
-public final class GreekLowerCaseFilter extends TokenFilter
-{
-    private TermAttribute termAtt;
+public final class GreekLowerCaseFilter extends TokenFilter {
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final CharacterUtils charUtils;

-    public GreekLowerCaseFilter(TokenStream in)
-    {
-    	super(in);
-    	termAtt = addAttribute(TermAttribute.class);
+  /** @deprecated Use {@link #GreekLowerCaseFilter(Version, TokenStream)} instead. */
+  @Deprecated
+  public GreekLowerCaseFilter(TokenStream in) {
+    this(Version.LUCENE_30, in);
+  }
+  
+  /**
+   * Create a GreekLowerCaseFilter that normalizes Greek token text.
+   * 
+   * @param matchVersion Lucene compatibility version, 
+   *   See <a href="#version">above</a>
+   * @param in TokenStream to filter
+   */
+  public GreekLowerCaseFilter(Version matchVersion, TokenStream in) {
+    super(in);
+    this.charUtils = CharacterUtils.getInstance(matchVersion);
+  }
+  
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      char[] chArray = termAtt.buffer();
+      int chLen = termAtt.length();
+      for (int i = 0; i < chLen;) {
+        i += Character.toChars(
+            lowerCase(charUtils.codePointAt(chArray, i)), chArray, i);
+       }
+      return true;
+    } else {
+      return false;
    }
+  }
  
-    @Override
-    public boolean incrementToken() throws IOException {
-      if (input.incrementToken()) {
-        char[] chArray = termAtt.termBuffer();
-        int chLen = termAtt.termLength();
-        // TODO: iterate codepoints to support supp. characters
-        for (int i = 0; i < chLen; i++)
-        {
-          chArray[i] = (char) lowerCase(chArray[i]);
-        }
-        return true;
-      } else {
-        return false;
-      }
-    }
-    
-    private int lowerCase(int codepoint) {
-      switch(codepoint) {
-        /* There are two lowercase forms of sigma:
-         *   U+03C2: small final sigma (end of word)
-         *   U+03C3: small sigma (otherwise)
-         *   
-         * Standardize both to U+03C3
-         */
-        case '\u03C2': /* small final sigma */
-          return '\u03C3'; /* small sigma */
-        
-        /* Some greek characters contain diacritics.
-         * This filter removes these, converting to the lowercase base form.
-         */
-        
-        case '\u0386': /* capital alpha with tonos */
-        case '\u03AC': /* small alpha with tonos */
-          return '\u03B1'; /* small alpha */
-          
-        case '\u0388': /* capital epsilon with tonos */
-        case '\u03AD': /* small epsilon with tonos */
-          return '\u03B5'; /* small epsilon */
-          
-        case '\u0389': /* capital eta with tonos */
-        case '\u03AE': /* small eta with tonos */
-          return '\u03B7'; /* small eta */
-        
-        case '\u038A': /* capital iota with tonos */
-        case '\u03AA': /* capital iota with dialytika */
-        case '\u03AF': /* small iota with tonos */
-        case '\u03CA': /* small iota with dialytika */
-        case '\u0390': /* small iota with dialytika and tonos */
-          return '\u03B9'; /* small iota */
-          
-        case '\u038E': /* capital upsilon with tonos */
-        case '\u03AB': /* capital upsilon with dialytika */
-        case '\u03CD': /* small upsilon with tonos */
-        case '\u03CB': /* small upsilon with dialytika */
-        case '\u03B0': /* small upsilon with dialytika and tonos */
-          return '\u03C5'; /* small upsilon */
-          
-        case '\u038C': /* capital omicron with tonos */
-        case '\u03CC': /* small omicron with tonos */
-          return '\u03BF'; /* small omicron */
-          
-        case '\u038F': /* capital omega with tonos */
-        case '\u03CE': /* small omega with tonos */
-          return '\u03C9'; /* small omega */
-          
-        /* The previous implementation did the conversion below.
-         * Only implemented for backwards compatibility with old indexes.
-         */
-          
-        case '\u03A2': /* reserved */
-          return '\u03C2'; /* small final sigma */
-          
-        default:
-          return Character.toLowerCase(codepoint);
-      }
+  private int lowerCase(int codepoint) {
+    switch(codepoint) {
+      /* There are two lowercase forms of sigma:
+       *   U+03C2: small final sigma (end of word)
+       *   U+03C3: small sigma (otherwise)
+       *   
+       * Standardize both to U+03C3
+       */
+      case '\u03C2': /* small final sigma */
+        return '\u03C3'; /* small sigma */
+        
+      /* Some greek characters contain diacritics.
+       * This filter removes these, converting to the lowercase base form.
+       */
+        
+      case '\u0386': /* capital alpha with tonos */
+      case '\u03AC': /* small alpha with tonos */
+        return '\u03B1'; /* small alpha */
+        
+      case '\u0388': /* capital epsilon with tonos */
+      case '\u03AD': /* small epsilon with tonos */
+        return '\u03B5'; /* small epsilon */
+        
+      case '\u0389': /* capital eta with tonos */
+      case '\u03AE': /* small eta with tonos */
+        return '\u03B7'; /* small eta */
+        
+      case '\u038A': /* capital iota with tonos */
+      case '\u03AA': /* capital iota with dialytika */
+      case '\u03AF': /* small iota with tonos */
+      case '\u03CA': /* small iota with dialytika */
+      case '\u0390': /* small iota with dialytika and tonos */
+        return '\u03B9'; /* small iota */
+        
+      case '\u038E': /* capital upsilon with tonos */
+      case '\u03AB': /* capital upsilon with dialytika */
+      case '\u03CD': /* small upsilon with tonos */
+      case '\u03CB': /* small upsilon with dialytika */
+      case '\u03B0': /* small upsilon with dialytika and tonos */
+        return '\u03C5'; /* small upsilon */
+        
+      case '\u038C': /* capital omicron with tonos */
+      case '\u03CC': /* small omicron with tonos */
+        return '\u03BF'; /* small omicron */
+        
+      case '\u038F': /* capital omega with tonos */
+      case '\u03CE': /* small omega with tonos */
+        return '\u03C9'; /* small omega */
+        
+      /* The previous implementation did the conversion below.
+       * Only implemented for backwards compatibility with old indexes.
+       */
+        
+      case '\u03A2': /* reserved */
+        return '\u03C2'; /* small final sigma */
+        
+      default:
+        return Character.toLowerCase(codepoint);
    }
+  }
 }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemFilter.java
@ -0,0 +1,63 @@
+package org.apache.lucene.analysis.el;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.KeywordMarkerFilter; // for javadoc
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+/**
+ * A {@link TokenFilter} that applies {@link GreekStemmer} to stem Greek
+ * words.
+ * <p>
+ * To prevent terms from being stemmed use an instance of
+ * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
+ * the {@link KeywordAttribute} before this {@link TokenStream}.
+ * </p>
+ * <p>
+ * NOTE: Input is expected to be casefolded for Greek (including folding of final
+ * sigma to sigma), and with diacritics removed. This can be achieved by using 
+ * either {@link GreekLowerCaseFilter} or ICUFoldingFilter before GreekStemFilter.
+ * @lucene.experimental
+ */
+public final class GreekStemFilter extends TokenFilter {
+  private final GreekStemmer stemmer = new GreekStemmer();
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
+  
+  public GreekStemFilter(TokenStream input) {
+    super(input);
+  }
+  
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      if(!keywordAttr.isKeyword()) {
+        final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+        termAtt.setLength(newlen);
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java
@ -0,0 +1,819 @@
+package org.apache.lucene.analysis.el;
+
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.util.Version;
+
+import java.util.Arrays;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A stemmer for Greek words, according to: <i>Development of a Stemmer for the
+ * Greek Language.</i> Georgios Ntais
+ * <p>
+ * NOTE: Input is expected to be casefolded for Greek (including folding of final
+ * sigma to sigma), and with diacritics removed. This can be achieved with 
+ * either {@link GreekLowerCaseFilter} or ICUFoldingFilter.
+ * @lucene.experimental
+ */
+public class GreekStemmer {
+  public int stem(char s[], int len) {
+    if (len < 4) // too short
+      return len;
+    
+    final int origLen = len;
+    // "short rules": if it hits one of these, it skips the "long list"
+    len = rule0(s, len);
+    len = rule1(s, len);
+    len = rule2(s, len);
+    len = rule3(s, len);
+    len = rule4(s, len);
+    len = rule5(s, len);
+    len = rule6(s, len);
+    len = rule7(s, len);
+    len = rule8(s, len);
+    len = rule9(s, len);
+    len = rule10(s, len);
+    len = rule11(s, len);
+    len = rule12(s, len);
+    len = rule13(s, len);
+    len = rule14(s, len);
+    len = rule15(s, len);
+    len = rule16(s, len);
+    len = rule17(s, len);
+    len = rule18(s, len);
+    len = rule19(s, len);
+    len = rule20(s, len);
+    // "long list"
+    if (len == origLen)
+      len = rule21(s, len);
+    
+    return rule22(s, len);
+  }
+
+  private int rule0(char s[], int len) {
+    if (len > 9 && (endsWith(s, len, "καθεστωτοσ")
+        || endsWith(s, len, "καθεστωτων")))
+      return len - 4;
+    
+    if (len > 8 && (endsWith(s, len, "γεγονοτοσ")
+        || endsWith(s, len, "γεγονοτων")))
+      return len - 4;
+    
+    if (len > 8 && endsWith(s, len, "καθεστωτα"))
+      return len - 3;
+    
+    if (len > 7 && (endsWith(s, len, "τατογιου")
+        || endsWith(s, len, "τατογιων")))
+      return len - 4;
+    
+    if (len > 7 && endsWith(s, len, "γεγονοτα"))
+      return len - 3;
+    
+    if (len > 7 && endsWith(s, len, "καθεστωσ"))
+      return len - 2;
+    
+    if (len > 6 && (endsWith(s, len, "σκαγιου"))
+        || endsWith(s, len, "σκαγιων")
+        || endsWith(s, len, "ολογιου")
+        || endsWith(s, len, "ολογιων")
+        || endsWith(s, len, "κρεατοσ")
+        || endsWith(s, len, "κρεατων")
+        || endsWith(s, len, "περατοσ")
+        || endsWith(s, len, "περατων")
+        || endsWith(s, len, "τερατοσ")
+        || endsWith(s, len, "τερατων"))
+      return len - 4;
+    
+    if (len > 6 && endsWith(s, len, "τατογια"))
+      return len - 3;
+    
+    if (len > 6 && endsWith(s, len, "γεγονοσ"))
+      return len - 2;
+    
+    if (len > 5 && (endsWith(s, len, "φαγιου")
+        || endsWith(s, len, "φαγιων")
+        || endsWith(s, len, "σογιου")
+        || endsWith(s, len, "σογιων")))
+      return len - 4;
+    
+    if (len > 5 && (endsWith(s, len, "σκαγια")
+        || endsWith(s, len, "ολογια")
+        || endsWith(s, len, "κρεατα")
+        || endsWith(s, len, "περατα")
+        || endsWith(s, len, "τερατα")))
+      return len - 3;
+    
+    if (len > 4 && (endsWith(s, len, "φαγια")
+        || endsWith(s, len, "σογια")
+        || endsWith(s, len, "φωτοσ")
+        || endsWith(s, len, "φωτων")))
+      return len - 3;
+    
+    if (len > 4 && (endsWith(s, len, "κρεασ")
+        || endsWith(s, len, "περασ")
+        || endsWith(s, len, "τερασ")))
+      return len - 2;
+    
+    if (len > 3 && endsWith(s, len, "φωτα"))
+      return len - 2;
+    
+    if (len > 2 && endsWith(s, len, "φωσ"))
+      return len - 1;
+    
+    return len;
+  }
+
+  private int rule1(char s[], int len) {
+    if (len > 4 && (endsWith(s, len, "αδεσ") || endsWith(s, len, "αδων"))) {
+      len -= 4;
+      if (!(endsWith(s, len, "οκ") ||
+          endsWith(s, len, "μαμ") ||
+          endsWith(s, len, "μαν") ||
+          endsWith(s, len, "μπαμπ") ||
+          endsWith(s, len, "πατερ") ||
+          endsWith(s, len, "γιαγι") ||
+          endsWith(s, len, "νταντ") ||
+          endsWith(s, len, "κυρ") ||
+          endsWith(s, len, "θει") ||
+          endsWith(s, len, "πεθερ")))
+        len += 2; // add back -αδ
+    }
+    return len;
+  }
+  
+  private int rule2(char s[], int len) {
+    if (len > 4 && (endsWith(s, len, "εδεσ") || endsWith(s, len, "εδων"))) {
+      len -= 4;
+      if (endsWith(s, len, "οπ") ||
+          endsWith(s, len, "ιπ") ||
+          endsWith(s, len, "εμπ") ||
+          endsWith(s, len, "υπ") ||
+          endsWith(s, len, "γηπ") ||
+          endsWith(s, len, "δαπ") ||
+          endsWith(s, len, "κρασπ") ||
+          endsWith(s, len, "μιλ"))
+        len += 2; // add back -εδ
+    }
+    return len;
+  }
+  
+  private int rule3(char s[], int len) {
+    if (len > 5 && (endsWith(s, len, "ουδεσ") || endsWith(s, len, "ουδων"))) {
+      len -= 5;
+      if (endsWith(s, len, "αρκ") ||
+          endsWith(s, len, "καλιακ") ||
+          endsWith(s, len, "πεταλ") ||
+          endsWith(s, len, "λιχ") ||
+          endsWith(s, len, "πλεξ") ||
+          endsWith(s, len, "σκ") ||
+          endsWith(s, len, "σ") ||
+          endsWith(s, len, "φλ") ||
+          endsWith(s, len, "φρ") ||
+          endsWith(s, len, "βελ") ||
+          endsWith(s, len, "λουλ") ||
+          endsWith(s, len, "χν") ||
+          endsWith(s, len, "σπ") ||
+          endsWith(s, len, "τραγ") ||
+          endsWith(s, len, "φε"))
+        len += 3; // add back -ουδ
+    }
+    return len;
+  }
+  
+  private static final CharArraySet exc4 = new CharArraySet(Version.LUCENE_31,
+      Arrays.asList("θ", "δ", "ελ", "γαλ", "ν", "π", "ιδ", "παρ"),
+      false);
+  
+  private int rule4(char s[], int len) {   
+    if (len > 3 && (endsWith(s, len, "εωσ") || endsWith(s, len, "εων"))) {
+      len -= 3;
+      if (exc4.contains(s, 0, len))
+        len++; // add back -ε
+    }
+    return len;
+  }
+  
+  private int rule5(char s[], int len) {
+    if (len > 2 && endsWith(s, len, "ια")) {
+      len -= 2;
+      if (endsWithVowel(s, len))
+        len++; // add back -ι
+    } else if (len > 3 && (endsWith(s, len, "ιου") || endsWith(s, len, "ιων"))) {
+      len -= 3;
+      if (endsWithVowel(s, len))
+        len++; // add back -ι
+    }
+    return len;
+  }
+
+  private static final CharArraySet exc6 = new CharArraySet(Version.LUCENE_31,
+      Arrays.asList("αλ", "αδ", "ενδ", "αμαν", "αμμοχαλ", "ηθ", "ανηθ",
+          "αντιδ", "φυσ", "βρωμ", "γερ", "εξωδ", "καλπ", "καλλιν", "καταδ",
+          "μουλ", "μπαν", "μπαγιατ", "μπολ", "μποσ", "νιτ", "ξικ", "συνομηλ",
+          "πετσ", "πιτσ", "πικαντ", "πλιατσ", "ποστελν", "πρωτοδ", "σερτ",
+          "συναδ", "τσαμ", "υποδ", "φιλον", "φυλοδ", "χασ"), 
+       false);
+
+  private int rule6(char s[], int len) {
+    boolean removed = false;
+    if (len > 3 && (endsWith(s, len, "ικα") || endsWith(s, len, "ικο"))) {
+      len -= 3;
+      removed = true;
+    } else if (len > 4 && (endsWith(s, len, "ικου") || endsWith(s, len, "ικων"))) {
+      len -= 4;
+      removed = true;
+    }
+    
+    if (removed) {
+      if (endsWithVowel(s, len) || exc6.contains(s, 0, len))
+        len += 2; // add back -ικ
+    }
+    return len;
+  }
+  
+  private static final CharArraySet exc7 = new CharArraySet(Version.LUCENE_31,
+      Arrays.asList("αναπ", "αποθ", "αποκ", "αποστ", "βουβ", "ξεθ", "ουλ",
+          "πεθ", "πικρ", "ποτ", "σιχ", "χ"), 
+      false);
+  
+  private int rule7(char s[], int len) {
+    if (len == 5 && endsWith(s, len, "αγαμε"))
+      return len - 1;
+    
+    if (len > 7 && endsWith(s, len, "ηθηκαμε"))
+      len -= 7;
+    else if (len > 6 && endsWith(s, len, "ουσαμε"))
+      len -= 6;
+    else if (len > 5 && (endsWith(s, len, "αγαμε") ||
+             endsWith(s, len, "ησαμε") ||
+             endsWith(s, len, "ηκαμε")))
+      len -= 5;
+    
+    if (len > 3 && endsWith(s, len, "αμε")) {
+      len -= 3;
+      if (exc7.contains(s, 0, len))
+        len += 2; // add back -αμ
+    }
+
+    return len;
+  }
+
+  private static final CharArraySet exc8a = new CharArraySet(Version.LUCENE_31,
+      Arrays.asList("τρ", "τσ"),
+      false);
+
+  private static final CharArraySet exc8b = new CharArraySet(Version.LUCENE_31,
+      Arrays.asList("βετερ", "βουλκ", "βραχμ", "γ", "δραδουμ", "θ", "καλπουζ",
+          "καστελ", "κορμορ", "λαοπλ", "μωαμεθ", "μ", "μουσουλμ", "ν", "ουλ",
+          "π", "πελεκ", "πλ", "πολισ", "πορτολ", "σαρακατσ", "σουλτ",
+          "τσαρλατ", "ορφ", "τσιγγ", "τσοπ", "φωτοστεφ", "χ", "ψυχοπλ", "αγ",
+          "ορφ", "γαλ", "γερ", "δεκ", "διπλ", "αμερικαν", "ουρ", "πιθ",
+          "πουριτ", "σ", "ζωντ", "ικ", "καστ", "κοπ", "λιχ", "λουθηρ", "μαιντ",
+          "μελ", "σιγ", "σπ", "στεγ", "τραγ", "τσαγ", "φ", "ερ", "αδαπ",
+          "αθιγγ", "αμηχ", "ανικ", "ανοργ", "απηγ", "απιθ", "ατσιγγ", "βασ",
+          "βασκ", "βαθυγαλ", "βιομηχ", "βραχυκ", "διατ", "διαφ", "ενοργ",
+          "θυσ", "καπνοβιομηχ", "καταγαλ", "κλιβ", "κοιλαρφ", "λιβ",
+          "μεγλοβιομηχ", "μικροβιομηχ", "νταβ", "ξηροκλιβ", "ολιγοδαμ",
+          "ολογαλ", "πενταρφ", "περηφ", "περιτρ", "πλατ", "πολυδαπ", "πολυμηχ",
+          "στεφ", "ταβ", "τετ", "υπερηφ", "υποκοπ", "χαμηλοδαπ", "ψηλοταβ"),
+      false);
+  
+  private int rule8(char s[], int len) {
+    boolean removed = false;
+    
+    if (len > 8 && endsWith(s, len, "ιουντανε")) {
+      len -= 8;
+      removed = true;
+    } else if (len > 7 && endsWith(s, len, "ιοντανε") ||
+        endsWith(s, len, "ουντανε") ||
+        endsWith(s, len, "ηθηκανε")) {
+      len -= 7;
+      removed = true;
+    } else if (len > 6 && endsWith(s, len, "ιοτανε") ||
+        endsWith(s, len, "οντανε") ||
+        endsWith(s, len, "ουσανε")) {
+      len -= 6;
+      removed = true;
+    } else if (len > 5 && endsWith(s, len, "αγανε") ||
+        endsWith(s, len, "ησανε") ||
+        endsWith(s, len, "οτανε") ||
+        endsWith(s, len, "ηκανε")) {
+      len -= 5;
+      removed = true;
+    }
+    
+    if (removed && exc8a.contains(s, 0, len)) {
+      // add -αγαν (we removed > 4 chars so its safe)
+      len += 4;
+      s[len - 4] = 'α';
+      s[len - 3] = 'γ';
+      s[len - 2] = 'α';
+      s[len - 1] = 'ν';
+    }
+    
+    if (len > 3 && endsWith(s, len, "ανε")) {
+      len -= 3;
+      if (endsWithVowelNoY(s, len) || exc8b.contains(s, 0, len)) {
+        len += 2; // add back -αν
+      }
+    }
+    
+    return len;
+  }
+  
+  private static final CharArraySet exc9 = new CharArraySet(Version.LUCENE_31,
+      Arrays.asList("αβαρ", "βεν", "εναρ", "αβρ", "αδ", "αθ", "αν", "απλ",
+          "βαρον", "ντρ", "σκ", "κοπ", "μπορ", "νιφ", "παγ", "παρακαλ", "σερπ",
+          "σκελ", "συρφ", "τοκ", "υ", "δ", "εμ", "θαρρ", "θ"), 
+      false);
+  
+  private int rule9(char s[], int len) {
+    if (len > 5 && endsWith(s, len, "ησετε"))
+      len -= 5;
+    
+    if (len > 3 && endsWith(s, len, "ετε")) {
+      len -= 3;
+      if (exc9.contains(s, 0, len) ||
+          endsWithVowelNoY(s, len) ||
+          endsWith(s, len, "οδ") ||
+          endsWith(s, len, "αιρ") ||
+          endsWith(s, len, "φορ") ||
+          endsWith(s, len, "ταθ") ||
+          endsWith(s, len, "διαθ") ||
+          endsWith(s, len, "σχ") ||
+          endsWith(s, len, "ενδ") ||
+          endsWith(s, len, "ευρ") ||
+          endsWith(s, len, "τιθ") ||
+          endsWith(s, len, "υπερθ") ||
+          endsWith(s, len, "ραθ") ||
+          endsWith(s, len, "ενθ") ||
+          endsWith(s, len, "ροθ") ||
+          endsWith(s, len, "σθ") ||
+          endsWith(s, len, "πυρ") ||
+          endsWith(s, len, "αιν") ||
+          endsWith(s, len, "συνδ") ||
+          endsWith(s, len, "συν") ||
+          endsWith(s, len, "συνθ") ||
+          endsWith(s, len, "χωρ") ||
+          endsWith(s, len, "πον") ||
+          endsWith(s, len, "βρ") ||
+          endsWith(s, len, "καθ") ||
+          endsWith(s, len, "ευθ") ||
+          endsWith(s, len, "εκθ") ||
+          endsWith(s, len, "νετ") ||
+          endsWith(s, len, "ρον") ||
+          endsWith(s, len, "αρκ") ||
+          endsWith(s, len, "βαρ") ||
+          endsWith(s, len, "βολ") ||
+          endsWith(s, len, "ωφελ")) {
+        len += 2; // add back -ετ
+      }
+    }
+    
+    return len;
+  }
+
+  private int rule10(char s[], int len) {
+    if (len > 5 && (endsWith(s, len, "οντασ") || endsWith(s, len, "ωντασ"))) {
+      len -= 5;
+      if (len == 3 && endsWith(s, len, "αρχ")) {
+        len += 3; // add back *ντ
+        s[len - 3] = 'ο';
+      }
+      if (endsWith(s, len, "κρε")) {
+        len += 3; // add back *ντ
+        s[len - 3] = 'ω';
+      }
+    }
+    
+    return len;
+  }
+  
+  private int rule11(char s[], int len) {
+    if (len > 6 && endsWith(s, len, "ομαστε")) {
+      len -= 6;
+      if (len == 2 && endsWith(s, len, "ον")) {
+        len += 5; // add back -ομαστ
+      }
+    } else if (len > 7 && endsWith(s, len, "ιομαστε")) {
+      len -= 7;
+      if (len == 2 && endsWith(s, len, "ον")) {
+        len += 5;
+        s[len - 5] = 'ο';
+        s[len - 4] = 'μ';
+        s[len - 3] = 'α';
+        s[len - 2] = 'σ';
+        s[len - 1] = 'τ';
+      }
+    }
+    return len;
+  }
+
+  private static final CharArraySet exc12a = new CharArraySet(Version.LUCENE_31,
+      Arrays.asList("π", "απ", "συμπ", "ασυμπ", "ακαταπ", "αμεταμφ"),
+      false);
+
+  private static final CharArraySet exc12b = new CharArraySet(Version.LUCENE_31,
+      Arrays.asList("αλ", "αρ", "εκτελ", "ζ", "μ", "ξ", "παρακαλ", "αρ", "προ", "νισ"),
+      false);
+  
+  private int rule12(char s[], int len) {
+    if (len > 5 && endsWith(s, len, "ιεστε")) {
+      len -= 5;
+      if (exc12a.contains(s, 0, len))   
+        len += 4; // add back -ιεστ
+    }
+    
+    if (len > 4 && endsWith(s, len, "εστε")) {
+      len -= 4;
+      if (exc12b.contains(s, 0, len))
+        len += 3; // add back -εστ
+    }
+    
+    return len;
+  }
+  
+  private static final CharArraySet exc13 = new CharArraySet(Version.LUCENE_31,
+      Arrays.asList("διαθ", "θ", "παρακαταθ", "προσθ", "συνθ"),
+      false);
+  
+  private int rule13(char s[], int len) {
+    if (len > 6 && endsWith(s, len, "ηθηκεσ")) {
+      len -= 6;
+    } else if (len > 5 && (endsWith(s, len, "ηθηκα") || endsWith(s, len, "ηθηκε"))) {
+      len -= 5;
+    }
+    
+    boolean removed = false;
+    
+    if (len > 4 && endsWith(s, len, "ηκεσ")) {
+      len -= 4;
+      removed = true;
+    } else if (len > 3 && (endsWith(s, len, "ηκα") || endsWith(s, len, "ηκε"))) {
+      len -= 3;
+      removed = true;
+    }
+
+    if (removed && (exc13.contains(s, 0, len) 
+        || endsWith(s, len, "σκωλ")
+        || endsWith(s, len, "σκουλ")
+        || endsWith(s, len, "ναρθ")
+        || endsWith(s, len, "σφ")
+        || endsWith(s, len, "οθ")
+        || endsWith(s, len, "πιθ"))) { 
+      len += 2; // add back the -ηκ
+    }
+    
+    return len;
+  }
+  
+  private static final CharArraySet exc14 = new CharArraySet(Version.LUCENE_31,
+      Arrays.asList("φαρμακ", "χαδ", "αγκ", "αναρρ", "βρομ", "εκλιπ", "λαμπιδ",
+          "λεχ", "μ", "πατ", "ρ", "λ", "μεδ", "μεσαζ", "υποτειν", "αμ", "αιθ",
+          "ανηκ", "δεσποζ", "ενδιαφερ", "δε", "δευτερευ", "καθαρευ", "πλε",
+          "τσα"), 
+      false);
+
+  private int rule14(char s[], int len) {
+    boolean removed = false;
+    
+    if (len > 5 && endsWith(s, len, "ουσεσ")) {
+      len -= 5;
+      removed = true;
+    } else if (len > 4 && (endsWith(s, len, "ουσα") || endsWith(s, len, "ουσε"))) {
+      len -= 4;
+      removed = true;
+    }
+    
+    if (removed && (exc14.contains(s, 0, len) 
+        || endsWithVowel(s, len)
+        || endsWith(s, len, "ποδαρ")
+        || endsWith(s, len, "βλεπ")
+        || endsWith(s, len, "πανταχ")
+        || endsWith(s, len, "φρυδ") 
+        || endsWith(s, len, "μαντιλ")
+        || endsWith(s, len, "μαλλ")
+        || endsWith(s, len, "κυματ")
+        || endsWith(s, len, "λαχ")
+        || endsWith(s, len, "ληγ")
+        || endsWith(s, len, "φαγ")
+        || endsWith(s, len, "ομ")
+        || endsWith(s, len, "πρωτ"))) {
+      len += 3; // add back -ουσ
+    }
+
+   return len;
+  }
+  
+  private static final CharArraySet exc15a = new CharArraySet(Version.LUCENE_31,
+      Arrays.asList("αβαστ", "πολυφ", "αδηφ", "παμφ", "ρ", "ασπ", "αφ", "αμαλ",
+          "αμαλλι", "ανυστ", "απερ", "ασπαρ", "αχαρ", "δερβεν", "δροσοπ",
+          "ξεφ", "νεοπ", "νομοτ", "ολοπ", "ομοτ", "προστ", "προσωποπ", "συμπ",
+          "συντ", "τ", "υποτ", "χαρ", "αειπ", "αιμοστ", "ανυπ", "αποτ",
+          "αρτιπ", "διατ", "εν", "επιτ", "κροκαλοπ", "σιδηροπ", "λ", "ναυ",
+          "ουλαμ", "ουρ", "π", "τρ", "μ"), 
+      false);
+  
+  private static final CharArraySet exc15b = new CharArraySet(Version.LUCENE_31,
+      Arrays.asList("ψοφ", "ναυλοχ"),
+      false);
+  
+  private int rule15(char s[], int len) {
+    boolean removed = false;
+    if (len > 4 && endsWith(s, len, "αγεσ")) {
+      len -= 4;
+      removed = true;
+    } else if (len > 3 && (endsWith(s, len, "αγα") || endsWith(s, len, "αγε"))) {
+      len -= 3;
+      removed = true;
+    }
+    
+    if (removed) {
+      final boolean cond1 = exc15a.contains(s, 0, len) 
+        || endsWith(s, len, "οφ")
+        || endsWith(s, len, "πελ")
+        || endsWith(s, len, "χορτ")
+        || endsWith(s, len, "λλ")
+        || endsWith(s, len, "σφ")
+        || endsWith(s, len, "ρπ")
+        || endsWith(s, len, "φρ")
+        || endsWith(s, len, "πρ")
+        || endsWith(s, len, "λοχ")
+        || endsWith(s, len, "σμην");
+      
+      final boolean cond2 = exc15b.contains(s, 0, len)
+        || endsWith(s, len, "κολλ");
+      
+      if (cond1 && !cond2)
+        len += 2; // add back -αγ  
+    }
+    
+    return len;
+  }
+  
+  private static final CharArraySet exc16 = new CharArraySet(Version.LUCENE_31,
+      Arrays.asList("ν", "χερσον", "δωδεκαν", "ερημον", "μεγαλον", "επταν"),
+      false);
+  
+  private int rule16(char s[], int len) {
+    boolean removed = false;
+    if (len > 4 && endsWith(s, len, "ησου")) {
+      len -= 4;
+      removed = true;
+    } else if (len > 3 && (endsWith(s, len, "ησε") || endsWith(s, len, "ησα"))) {
+      len -= 3;
+      removed = true;
+    }
+    
+    if (removed && exc16.contains(s, 0, len))
+      len += 2; // add back -ησ
+    
+    return len;
+  }
+  
+  private static final CharArraySet exc17 = new CharArraySet(Version.LUCENE_31,
+      Arrays.asList("ασβ", "σβ", "αχρ", "χρ", "απλ", "αειμν", "δυσχρ", "ευχρ", "κοινοχρ", "παλιμψ"),
+      false);
+  
+  private int rule17(char s[], int len) {
+    if (len > 4 && endsWith(s, len, "ηστε")) {
+      len -= 4;
+      if (exc17.contains(s, 0, len))
+        len += 3; // add back the -ηστ
+    }
+    
+    return len;
+  }
+  
+  private static final CharArraySet exc18 = new CharArraySet(Version.LUCENE_31,
+      Arrays.asList("ν", "ρ", "σπι", "στραβομουτσ", "κακομουτσ", "εξων"),
+      false);
+  
+  private int rule18(char s[], int len) {
+    boolean removed = false;
+    
+    if (len > 6 && (endsWith(s, len, "ησουνε") || endsWith(s, len, "ηθουνε"))) {
+      len -= 6;
+      removed = true;
+    } else if (len > 4 && endsWith(s, len, "ουνε")) {
+      len -= 4;
+      removed = true;
+    }
+    
+    if (removed && exc18.contains(s, 0, len)) {
+      len += 3;
+      s[len - 3] = 'ο';
+      s[len - 2] = 'υ';
+      s[len - 1] = 'ν';
+    }
+    return len;
+  }
+  
+  private static final CharArraySet exc19 = new CharArraySet(Version.LUCENE_31,
+      Arrays.asList("παρασουσ", "φ", "χ", "ωριοπλ", "αζ", "αλλοσουσ", "ασουσ"),
+      false);
+  
+  private int rule19(char s[], int len) {
+    boolean removed = false;
+    
+    if (len > 6 && (endsWith(s, len, "ησουμε") || endsWith(s, len, "ηθουμε"))) {
+      len -= 6;
+      removed = true;
+    } else if (len > 4 && endsWith(s, len, "ουμε")) {
+      len -= 4;
+      removed = true;
+    }
+    
+    if (removed && exc19.contains(s, 0, len)) {
+      len += 3;
+      s[len - 3] = 'ο';
+      s[len - 2] = 'υ';
+      s[len - 1] = 'μ';
+    }
+    return len;
+  }
+  
+  private int rule20(char s[], int len) {
+    if (len > 5 && (endsWith(s, len, "ματων") || endsWith(s, len, "ματοσ")))
+      len -= 3;
+    else if (len > 4 && endsWith(s, len, "ματα"))
+      len -= 2;
+    return len;
+  }
+
+  private int rule21(char s[], int len) {
+    if (len > 9 && endsWith(s, len, "ιοντουσαν"))
+      return len - 9;
+    
+    if (len > 8 && (endsWith(s, len, "ιομασταν") ||
+        endsWith(s, len, "ιοσασταν") ||
+        endsWith(s, len, "ιουμαστε") ||
+        endsWith(s, len, "οντουσαν")))
+      return len - 8;
+    
+    if (len > 7 && (endsWith(s, len, "ιεμαστε") ||
+        endsWith(s, len, "ιεσαστε") ||
+        endsWith(s, len, "ιομουνα") ||
+        endsWith(s, len, "ιοσαστε") ||
+        endsWith(s, len, "ιοσουνα") ||
+        endsWith(s, len, "ιουνται") ||
+        endsWith(s, len, "ιουνταν") ||
+        endsWith(s, len, "ηθηκατε") ||
+        endsWith(s, len, "ομασταν") ||
+        endsWith(s, len, "οσασταν") ||
+        endsWith(s, len, "ουμαστε")))
+      return len - 7;
+    
+    if (len > 6 && (endsWith(s, len, "ιομουν") ||
+        endsWith(s, len, "ιονταν") ||
+        endsWith(s, len, "ιοσουν") ||
+        endsWith(s, len, "ηθειτε") ||
+        endsWith(s, len, "ηθηκαν") ||
+        endsWith(s, len, "ομουνα") ||
+        endsWith(s, len, "οσαστε") ||
+        endsWith(s, len, "οσουνα") ||
+        endsWith(s, len, "ουνται") ||
+        endsWith(s, len, "ουνταν") ||
+        endsWith(s, len, "ουσατε")))
+      return len - 6;
+    
+    if (len > 5 && (endsWith(s, len, "αγατε") ||
+        endsWith(s, len, "ιεμαι") ||
+        endsWith(s, len, "ιεται") ||
+        endsWith(s, len, "ιεσαι") ||
+        endsWith(s, len, "ιοταν") ||
+        endsWith(s, len, "ιουμα") ||
+        endsWith(s, len, "ηθεισ") ||
+        endsWith(s, len, "ηθουν") ||
+        endsWith(s, len, "ηκατε") ||
+        endsWith(s, len, "ησατε") ||
+        endsWith(s, len, "ησουν") ||
+        endsWith(s, len, "ομουν") ||
+        endsWith(s, len, "ονται") ||
+        endsWith(s, len, "ονταν") ||
+        endsWith(s, len, "οσουν") ||
+        endsWith(s, len, "ουμαι") ||
+        endsWith(s, len, "ουσαν")))
+      return len - 5;
+    
+    if (len > 4 && (endsWith(s, len, "αγαν") ||
+        endsWith(s, len, "αμαι") ||
+        endsWith(s, len, "ασαι") ||
+        endsWith(s, len, "αται") ||
+        endsWith(s, len, "ειτε") ||
+        endsWith(s, len, "εσαι") ||
+        endsWith(s, len, "εται") ||
+        endsWith(s, len, "ηδεσ") ||
+        endsWith(s, len, "ηδων") ||
+        endsWith(s, len, "ηθει") ||
+        endsWith(s, len, "ηκαν") ||
+        endsWith(s, len, "ησαν") ||
+        endsWith(s, len, "ησει") ||
+        endsWith(s, len, "ησεσ") ||
+        endsWith(s, len, "ομαι") ||
+        endsWith(s, len, "οταν")))
+      return len - 4;
+    
+    if (len > 3 && (endsWith(s, len, "αει") ||
+        endsWith(s, len, "εισ") ||
+        endsWith(s, len, "ηθω") ||
+        endsWith(s, len, "ησω") ||
+        endsWith(s, len, "ουν") ||
+        endsWith(s, len, "ουσ")))
+      return len - 3;
+    
+    if (len > 2 && (endsWith(s, len, "αν") ||
+        endsWith(s, len, "ασ") ||
+        endsWith(s, len, "αω") ||
+        endsWith(s, len, "ει") ||
+        endsWith(s, len, "εσ") ||
+        endsWith(s, len, "ησ") ||
+        endsWith(s, len, "οι") ||
+        endsWith(s, len, "οσ") ||
+        endsWith(s, len, "ου") ||
+        endsWith(s, len, "υσ") ||
+        endsWith(s, len, "ων")))
+      return len - 2;
+    
+    if (len > 1 && endsWithVowel(s, len))
+      return len - 1;
+
+    return len;
+  }
+  
+  private int rule22(char s[], int len) {
+    if (endsWith(s, len, "εστερ") ||
+        endsWith(s, len, "εστατ"))
+      return len - 5;
+    
+    if (endsWith(s, len, "οτερ") ||
+        endsWith(s, len, "οτατ") ||
+        endsWith(s, len, "υτερ") ||
+        endsWith(s, len, "υτατ") ||
+        endsWith(s, len, "ωτερ") ||
+        endsWith(s, len, "ωτατ"))
+      return len - 4;
+
+    return len;
+  }
+
+  private boolean endsWith(char s[], int len, String suffix) {
+    final int suffixLen = suffix.length();
+    if (suffixLen > len)
+      return false;
+    for (int i = suffixLen - 1; i >= 0; i--)
+      if (s[len -(suffixLen - i)] != suffix.charAt(i))
+        return false;
+    
+    return true;
+  }
+  
+  private boolean endsWithVowel(char s[], int len) {
+    if (len == 0)
+      return false;
+    switch(s[len - 1]) {
+      case 'α':
+      case 'ε':
+      case 'η':
+      case 'ι':
+      case 'ο':
+      case 'υ':
+      case 'ω':
+        return true;
+      default:
+        return false;
+    }
+  }
+  
+  private boolean endsWithVowelNoY(char s[], int len) {
+    if (len == 0)
+      return false;
+    switch(s[len - 1]) {
+      case 'α':
+      case 'ε':
+      case 'η':
+      case 'ι':
+      case 'ο':
+      case 'ω':
+        return true;
+      default:
+        return false;
+    }
+  }
+}
--- a/modules/analysis/common/src/resources/org/apache/lucene/analysis/el/stopwords.txt
+++ b/modules/analysis/common/src/resources/org/apache/lucene/analysis/el/stopwords.txt
@ -0,0 +1,76 @@
+# Lucene Greek Stopwords list
+ο
+η
+το
+οι
+τα
+του
+τησ
+των
+τον
+την
+και 
+κι
+κ
+ειμαι
+εισαι
+ειναι
+ειμαστε
+ειστε
+στο
+στον
+στη
+στην
+μα
+αλλα
+απο
+για
+προσ
+με
+σε
+ωσ
+παρα
+αντι
+κατα
+μετα
+θα
+να
+δε
+δεν
+μη
+μην
+επι
+ενω
+εαν
+αν
+τοτε
+που
+πωσ
+ποιοσ
+ποια
+ποιο
+ποιοι
+ποιεσ
+ποιων
+ποιουσ
+αυτοσ
+αυτη
+αυτο
+αυτοι
+αυτων
+αυτουσ
+αυτεσ
+αυτα
+εκεινοσ
+εκεινη
+εκεινο
+εκεινοι
+εκεινεσ
+εκεινα
+εκεινων
+εκεινουσ
+οπωσ
+ομωσ
+ισωσ
+οσο
+οτι
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
@ -26,42 +26,67 @@ import org.apache.lucene.util.Version;
 */
 public class GreekAnalyzerTest extends BaseTokenStreamTestCase {

+  /**
+   * Test the analysis of various greek strings.
+   *
+   * @throws Exception in case an error occurs
+   */
+  public void testAnalyzer() throws Exception {
+    Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT);
+    // Verify the correct analysis of capitals and small accented letters, and
+    // stemming
+    assertAnalyzesTo(a, "Μία εξαιρετικά καλή και πλούσια σειρά χαρακτήρων της Ελληνικής γλώσσας",
+        new String[] { "μια", "εξαιρετ", "καλ", "πλουσ", "σειρ", "χαρακτηρ",
+        "ελληνικ", "γλωσσ" });
+    // Verify the correct analysis of small letters with diaeresis and the elimination
+    // of punctuation marks
+    assertAnalyzesTo(a, "Προϊόντα (και)     [πολλαπλές] - ΑΝΑΓΚΕΣ",
+        new String[] { "προιοντ", "πολλαπλ", "αναγκ" });
+    // Verify the correct analysis of capital accented letters and capital letters with diaeresis,
+    // as well as the elimination of stop words
+    assertAnalyzesTo(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ  Άψογος, ο μεστός και οι άλλοι",
+        new String[] { "προυποθεσ", "αψογ", "μεστ", "αλλ" });
+  }
+  
 	/**
 	 * Test the analysis of various greek strings.
 	 *
 	 * @throws Exception in case an error occurs
+	 * @deprecated Remove this test when support for 3.0 is no longer needed
 	 */
-	public void testAnalyzer() throws Exception {
-		Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT);
+  @Deprecated
+	public void testAnalyzerBWCompat() throws Exception {
+		Analyzer a = new GreekAnalyzer(Version.LUCENE_30);
 		// Verify the correct analysis of capitals and small accented letters
-		assertAnalyzesTo(a, "\u039c\u03af\u03b1 \u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03ac \u03ba\u03b1\u03bb\u03ae \u03ba\u03b1\u03b9 \u03c0\u03bb\u03bf\u03cd\u03c3\u03b9\u03b1 \u03c3\u03b5\u03b9\u03c1\u03ac \u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03ae\u03c1\u03c9\u03bd \u03c4\u03b7\u03c2 \u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2",
-				new String[] { "\u03bc\u03b9\u03b1", "\u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03b1", "\u03ba\u03b1\u03bb\u03b7", "\u03c0\u03bb\u03bf\u03c5\u03c3\u03b9\u03b1", "\u03c3\u03b5\u03b9\u03c1\u03b1", "\u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03b7\u03c1\u03c9\u03bd",
-				"\u03b5\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03b7\u03c3", "\u03b3\u03bb\u03c9\u03c3\u03c3\u03b1\u03c3" });
+		assertAnalyzesTo(a, "Μία εξαιρετικά καλή και πλούσια σειρά χαρακτήρων της Ελληνικής γλώσσας",
+				new String[] { "μια", "εξαιρετικα", "καλη", "πλουσια", "σειρα", "χαρακτηρων",
+				"ελληνικησ", "γλωσσασ" });
 		// Verify the correct analysis of small letters with diaeresis and the elimination
 		// of punctuation marks
-		assertAnalyzesTo(a, "\u03a0\u03c1\u03bf\u03ca\u03cc\u03bd\u03c4\u03b1 (\u03ba\u03b1\u03b9)     [\u03c0\u03bf\u03bb\u03bb\u03b1\u03c0\u03bb\u03ad\u03c2]	-	\u0391\u039d\u0391\u0393\u039a\u0395\u03a3",
-				new String[] { "\u03c0\u03c1\u03bf\u03b9\u03bf\u03bd\u03c4\u03b1", "\u03c0\u03bf\u03bb\u03bb\u03b1\u03c0\u03bb\u03b5\u03c3", "\u03b1\u03bd\u03b1\u03b3\u03ba\u03b5\u03c3" });
-		// Verify the correct analysis of capital accented letters and capitalletters with diaeresis,
+		assertAnalyzesTo(a, "Προϊόντα (και)     [πολλαπλές] - ΑΝΑΓΚΕΣ",
+				new String[] { "προιοντα", "πολλαπλεσ", "αναγκεσ" });
+		// Verify the correct analysis of capital accented letters and capital letters with diaeresis,
 		// as well as the elimination of stop words
-		assertAnalyzesTo(a, "\u03a0\u03a1\u039f\u03ab\u03a0\u039f\u0398\u0395\u03a3\u0395\u0399\u03a3  \u0386\u03c8\u03bf\u03b3\u03bf\u03c2, \u03bf \u03bc\u03b5\u03c3\u03c4\u03cc\u03c2 \u03ba\u03b1\u03b9 \u03bf\u03b9 \u03ac\u03bb\u03bb\u03bf\u03b9",
-				new String[] { "\u03c0\u03c1\u03bf\u03c5\u03c0\u03bf\u03b8\u03b5\u03c3\u03b5\u03b9\u03c3", "\u03b1\u03c8\u03bf\u03b3\u03bf\u03c3", "\u03bc\u03b5\u03c3\u03c4\u03bf\u03c3", "\u03b1\u03bb\u03bb\u03bf\u03b9" });
+		assertAnalyzesTo(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ  Άψογος, ο μεστός και οι άλλοι",
+				new String[] { "προυποθεσεισ", "αψογοσ", "μεστοσ", "αλλοι" });
 	}
 	
-	public void testReusableTokenStream() throws Exception {
-	    Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT);
-	    // Verify the correct analysis of capitals and small accented letters
-	    assertAnalyzesToReuse(a, "\u039c\u03af\u03b1 \u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03ac \u03ba\u03b1\u03bb\u03ae \u03ba\u03b1\u03b9 \u03c0\u03bb\u03bf\u03cd\u03c3\u03b9\u03b1 \u03c3\u03b5\u03b9\u03c1\u03ac \u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03ae\u03c1\u03c9\u03bd \u03c4\u03b7\u03c2 \u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2",
-	            new String[] { "\u03bc\u03b9\u03b1", "\u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03b1", "\u03ba\u03b1\u03bb\u03b7", "\u03c0\u03bb\u03bf\u03c5\u03c3\u03b9\u03b1", "\u03c3\u03b5\u03b9\u03c1\u03b1", "\u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03b7\u03c1\u03c9\u03bd",
-	            "\u03b5\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03b7\u03c3", "\u03b3\u03bb\u03c9\u03c3\u03c3\u03b1\u03c3" });
-	    // Verify the correct analysis of small letters with diaeresis and the elimination
-	    // of punctuation marks
-	    assertAnalyzesToReuse(a, "\u03a0\u03c1\u03bf\u03ca\u03cc\u03bd\u03c4\u03b1 (\u03ba\u03b1\u03b9)     [\u03c0\u03bf\u03bb\u03bb\u03b1\u03c0\u03bb\u03ad\u03c2] -   \u0391\u039d\u0391\u0393\u039a\u0395\u03a3",
-	            new String[] { "\u03c0\u03c1\u03bf\u03b9\u03bf\u03bd\u03c4\u03b1", "\u03c0\u03bf\u03bb\u03bb\u03b1\u03c0\u03bb\u03b5\u03c3", "\u03b1\u03bd\u03b1\u03b3\u03ba\u03b5\u03c3" });
-	    // Verify the correct analysis of capital accented letters and capitalletters with diaeresis,
-	    // as well as the elimination of stop words
-	    assertAnalyzesToReuse(a, "\u03a0\u03a1\u039f\u03ab\u03a0\u039f\u0398\u0395\u03a3\u0395\u0399\u03a3  \u0386\u03c8\u03bf\u03b3\u03bf\u03c2, \u03bf \u03bc\u03b5\u03c3\u03c4\u03cc\u03c2 \u03ba\u03b1\u03b9 \u03bf\u03b9 \u03ac\u03bb\u03bb\u03bf\u03b9",
-	            new String[] { "\u03c0\u03c1\u03bf\u03c5\u03c0\u03bf\u03b8\u03b5\u03c3\u03b5\u03b9\u03c3", "\u03b1\u03c8\u03bf\u03b3\u03bf\u03c3", "\u03bc\u03b5\u03c3\u03c4\u03bf\u03c3", "\u03b1\u03bb\u03bb\u03bf\u03b9" });
-	}
+  public void testReusableTokenStream() throws Exception {
+    Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT);
+    // Verify the correct analysis of capitals and small accented letters, and
+    // stemming
+    assertAnalyzesToReuse(a, "Μία εξαιρετικά καλή και πλούσια σειρά χαρακτήρων της Ελληνικής γλώσσας",
+        new String[] { "μια", "εξαιρετ", "καλ", "πλουσ", "σειρ", "χαρακτηρ",
+        "ελληνικ", "γλωσσ" });
+    // Verify the correct analysis of small letters with diaeresis and the elimination
+    // of punctuation marks
+    assertAnalyzesToReuse(a, "Προϊόντα (και)     [πολλαπλές] - ΑΝΑΓΚΕΣ",
+        new String[] { "προιοντ", "πολλαπλ", "αναγκ" });
+    // Verify the correct analysis of capital accented letters and capital letters with diaeresis,
+    // as well as the elimination of stop words
+    assertAnalyzesToReuse(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ  Άψογος, ο μεστός και οι άλλοι",
+        new String[] { "προυποθεσ", "αψογ", "μεστ", "αλλ" });
+  }
 	
 	/**
 	 * Greek Analyzer didn't call standardFilter, so no normalization of acronyms.
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java
@ -0,0 +1,508 @@
+package org.apache.lucene.analysis.el;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+
+public class TestGreekStemmer extends BaseTokenStreamTestCase {
+  Analyzer a = new GreekAnalyzer(TEST_VERSION_CURRENT);
+
+  public void testMasculineNouns() throws Exception {
+    // -ος
+    checkOneTerm(a, "άνθρωπος", "ανθρωπ");
+    checkOneTerm(a, "ανθρώπου", "ανθρωπ");
+    checkOneTerm(a, "άνθρωπο", "ανθρωπ");
+    checkOneTerm(a, "άνθρωπε", "ανθρωπ");
+    checkOneTerm(a, "άνθρωποι", "ανθρωπ");
+    checkOneTerm(a, "ανθρώπων", "ανθρωπ");
+    checkOneTerm(a, "ανθρώπους", "ανθρωπ");
+    checkOneTerm(a, "άνθρωποι", "ανθρωπ");
+    
+    // -ης
+    checkOneTerm(a, "πελάτης", "πελατ");
+    checkOneTerm(a, "πελάτη", "πελατ");
+    checkOneTerm(a, "πελάτες", "πελατ");
+    checkOneTerm(a, "πελατών", "πελατ");
+    
+    // -ας/-ες
+    checkOneTerm(a, "ελέφαντας", "ελεφαντ");
+    checkOneTerm(a, "ελέφαντα", "ελεφαντ");
+    checkOneTerm(a, "ελέφαντες", "ελεφαντ");
+    checkOneTerm(a, "ελεφάντων", "ελεφαντ");
+    
+    // -ας/-αδες
+    checkOneTerm(a, "μπαμπάς", "μπαμπ");
+    checkOneTerm(a, "μπαμπά", "μπαμπ");
+    checkOneTerm(a, "μπαμπάδες", "μπαμπ");
+    checkOneTerm(a, "μπαμπάδων", "μπαμπ");
+    
+    // -ης/-ηδες
+    checkOneTerm(a, "μπακάλης", "μπακαλ");
+    checkOneTerm(a, "μπακάλη", "μπακαλ");
+    checkOneTerm(a, "μπακάληδες", "μπακαλ");
+    checkOneTerm(a, "μπακάληδων", "μπακαλ");
+    
+    // -ες
+    checkOneTerm(a, "καφές", "καφ");
+    checkOneTerm(a, "καφέ", "καφ");
+    checkOneTerm(a, "καφέδες", "καφ");
+    checkOneTerm(a, "καφέδων", "καφ");
+    
+    // -έας/είς
+    checkOneTerm(a, "γραμματέας", "γραμματε");
+    checkOneTerm(a, "γραμματέα", "γραμματε");
+    // plural forms conflate w/ each other, not w/ the sing forms
+    checkOneTerm(a, "γραμματείς", "γραμματ");
+    checkOneTerm(a, "γραμματέων", "γραμματ");
+    
+    // -ους/οι
+    checkOneTerm(a, "απόπλους", "αποπλ");
+    checkOneTerm(a, "απόπλου", "αποπλ");
+    checkOneTerm(a, "απόπλοι", "αποπλ");
+    checkOneTerm(a, "απόπλων", "αποπλ");
+    
+    // -ους/-ουδες
+    checkOneTerm(a, "παππούς", "παππ");
+    checkOneTerm(a, "παππού", "παππ");
+    checkOneTerm(a, "παππούδες", "παππ");
+    checkOneTerm(a, "παππούδων", "παππ");
+    
+    // -ης/-εις
+    checkOneTerm(a, "λάτρης", "λατρ");
+    checkOneTerm(a, "λάτρη", "λατρ");
+    checkOneTerm(a, "λάτρεις", "λατρ");
+    checkOneTerm(a, "λάτρεων", "λατρ");
+    
+    // -υς
+    checkOneTerm(a, "πέλεκυς", "πελεκ");
+    checkOneTerm(a, "πέλεκυ", "πελεκ");
+    checkOneTerm(a, "πελέκεις", "πελεκ");
+    checkOneTerm(a, "πελέκεων", "πελεκ");
+    
+    // -ωρ
+    // note: nom./voc. doesn't conflate w/ the rest
+    checkOneTerm(a, "μέντωρ", "μεντωρ");
+    checkOneTerm(a, "μέντορος", "μεντορ");
+    checkOneTerm(a, "μέντορα", "μεντορ");
+    checkOneTerm(a, "μέντορες", "μεντορ");
+    checkOneTerm(a, "μεντόρων", "μεντορ");
+    
+    // -ων
+    checkOneTerm(a, "αγώνας", "αγων");
+    checkOneTerm(a, "αγώνος", "αγων");
+    checkOneTerm(a, "αγώνα", "αγων");
+    checkOneTerm(a, "αγώνα", "αγων");
+    checkOneTerm(a, "αγώνες", "αγων");
+    checkOneTerm(a, "αγώνων", "αγων");
+    
+    // -ας/-ηδες
+    checkOneTerm(a, "αέρας", "αερ");
+    checkOneTerm(a, "αέρα", "αερ");
+    checkOneTerm(a, "αέρηδες", "αερ");
+    checkOneTerm(a, "αέρηδων", "αερ");
+    
+    // -ης/-ητες
+    checkOneTerm(a, "γόης", "γο");
+    checkOneTerm(a, "γόη", "γοη"); // too short
+    // the two plural forms conflate
+    checkOneTerm(a, "γόητες", "γοητ");
+    checkOneTerm(a, "γοήτων", "γοητ");
+  }
+  
+  public void testFeminineNouns() throws Exception {
+    // -α/-ες,-ών
+    checkOneTerm(a, "φορά", "φορ");
+    checkOneTerm(a, "φοράς", "φορ");
+    checkOneTerm(a, "φορές", "φορ");
+    checkOneTerm(a, "φορών", "φορ");
+    
+    // -α/-ες,-ων
+    checkOneTerm(a, "αγελάδα", "αγελαδ");
+    checkOneTerm(a, "αγελάδας", "αγελαδ");
+    checkOneTerm(a, "αγελάδες", "αγελαδ");
+    checkOneTerm(a, "αγελάδων", "αγελαδ");
+    
+    // -η/-ες
+    checkOneTerm(a, "ζάχαρη", "ζαχαρ");
+    checkOneTerm(a, "ζάχαρης", "ζαχαρ");
+    checkOneTerm(a, "ζάχαρες", "ζαχαρ");
+    checkOneTerm(a, "ζαχάρεων", "ζαχαρ");
+    
+    // -η/-εις
+    checkOneTerm(a, "τηλεόραση", "τηλεορασ");
+    checkOneTerm(a, "τηλεόρασης", "τηλεορασ");
+    checkOneTerm(a, "τηλεοράσεις", "τηλεορασ");
+    checkOneTerm(a, "τηλεοράσεων", "τηλεορασ");
+    
+    // -α/-αδες
+    checkOneTerm(a, "μαμά", "μαμ");
+    checkOneTerm(a, "μαμάς", "μαμ");
+    checkOneTerm(a, "μαμάδες", "μαμ");
+    checkOneTerm(a, "μαμάδων", "μαμ");
+    
+    // -ος
+    checkOneTerm(a, "λεωφόρος", "λεωφορ");
+    checkOneTerm(a, "λεωφόρου", "λεωφορ");
+    checkOneTerm(a, "λεωφόρο", "λεωφορ");
+    checkOneTerm(a, "λεωφόρε", "λεωφορ");
+    checkOneTerm(a, "λεωφόροι", "λεωφορ");
+    checkOneTerm(a, "λεωφόρων", "λεωφορ");
+    checkOneTerm(a, "λεωφόρους", "λεωφορ");
+    
+    // -ου
+    checkOneTerm(a, "αλεπού", "αλεπ");
+    checkOneTerm(a, "αλεπούς", "αλεπ");
+    checkOneTerm(a, "αλεπούδες", "αλεπ");
+    checkOneTerm(a, "αλεπούδων", "αλεπ");
+    
+    // -έας/είς
+    // note: not all forms conflate
+    checkOneTerm(a, "γραμματέας", "γραμματε");
+    checkOneTerm(a, "γραμματέως", "γραμματ");
+    checkOneTerm(a, "γραμματέα", "γραμματε");
+    checkOneTerm(a, "γραμματείς", "γραμματ");
+    checkOneTerm(a, "γραμματέων", "γραμματ");
+  }
+  
+  public void testNeuterNouns() throws Exception {
+    // ending with -ο
+    // note: nom doesnt conflate
+    checkOneTerm(a, "βιβλίο", "βιβλι");
+    checkOneTerm(a, "βιβλίου", "βιβλ");
+    checkOneTerm(a, "βιβλία", "βιβλ");
+    checkOneTerm(a, "βιβλίων", "βιβλ");
+    
+    // ending with -ι
+    checkOneTerm(a, "πουλί", "πουλ");
+    checkOneTerm(a, "πουλιού", "πουλ");
+    checkOneTerm(a, "πουλιά", "πουλ");
+    checkOneTerm(a, "πουλιών", "πουλ");
+    
+    // ending with -α
+    // note: nom. doesnt conflate
+    checkOneTerm(a, "πρόβλημα", "προβλημ");
+    checkOneTerm(a, "προβλήματος", "προβλημα");
+    checkOneTerm(a, "προβλήματα", "προβλημα");
+    checkOneTerm(a, "προβλημάτων", "προβλημα");
+    
+    // ending with -ος/-ους
+    checkOneTerm(a, "πέλαγος", "πελαγ");
+    checkOneTerm(a, "πελάγους", "πελαγ");
+    checkOneTerm(a, "πελάγη", "πελαγ");
+    checkOneTerm(a, "πελάγων", "πελαγ");
+    
+    // ending with -ός/-ότος
+    checkOneTerm(a, "γεγονός", "γεγον");
+    checkOneTerm(a, "γεγονότος", "γεγον");
+    checkOneTerm(a, "γεγονότα", "γεγον");
+    checkOneTerm(a, "γεγονότων", "γεγον");
+    
+    // ending with -υ/-ιου
+    checkOneTerm(a, "βράδυ", "βραδ");
+    checkOneTerm(a, "βράδι", "βραδ");
+    checkOneTerm(a, "βραδιού", "βραδ");
+    checkOneTerm(a, "βράδια", "βραδ");
+    checkOneTerm(a, "βραδιών", "βραδ");
+    
+    // ending with -υ/-ατος
+    // note: nom. doesnt conflate
+    checkOneTerm(a, "δόρυ", "δορ");
+    checkOneTerm(a, "δόρατος", "δορατ");
+    checkOneTerm(a, "δόρατα", "δορατ");
+    checkOneTerm(a, "δοράτων", "δορατ");
+    
+    // ending with -ας
+    checkOneTerm(a, "κρέας", "κρε");
+    checkOneTerm(a, "κρέατος", "κρε");
+    checkOneTerm(a, "κρέατα", "κρε");
+    checkOneTerm(a, "κρεάτων", "κρε");
+    
+    // ending with -ως
+    checkOneTerm(a, "λυκόφως", "λυκοφω");
+    checkOneTerm(a, "λυκόφωτος", "λυκοφω");
+    checkOneTerm(a, "λυκόφωτα", "λυκοφω");
+    checkOneTerm(a, "λυκοφώτων", "λυκοφω");
+    
+    // ending with -ον/-ου
+    // note: nom. doesnt conflate
+    checkOneTerm(a, "μέσον", "μεσον");
+    checkOneTerm(a, "μέσου", "μεσ");
+    checkOneTerm(a, "μέσα", "μεσ");
+    checkOneTerm(a, "μέσων", "μεσ");
+    
+    // ending in -ον/-οντος
+    // note: nom. doesnt conflate
+    checkOneTerm(a, "ενδιαφέρον", "ενδιαφερον");
+    checkOneTerm(a, "ενδιαφέροντος", "ενδιαφεροντ");
+    checkOneTerm(a, "ενδιαφέροντα", "ενδιαφεροντ");
+    checkOneTerm(a, "ενδιαφερόντων", "ενδιαφεροντ");
+    
+    // ending with -εν/-εντος
+    checkOneTerm(a, "ανακοινωθέν", "ανακοινωθεν");
+    checkOneTerm(a, "ανακοινωθέντος", "ανακοινωθεντ");
+    checkOneTerm(a, "ανακοινωθέντα", "ανακοινωθεντ");
+    checkOneTerm(a, "ανακοινωθέντων", "ανακοινωθεντ");
+    
+    // ending with -αν/-αντος
+    checkOneTerm(a, "σύμπαν", "συμπ");
+    checkOneTerm(a, "σύμπαντος", "συμπαντ");
+    checkOneTerm(a, "σύμπαντα", "συμπαντ");
+    checkOneTerm(a, "συμπάντων", "συμπαντ");
+    
+    // ending with  -α/-ακτος
+    checkOneTerm(a, "γάλα", "γαλ");
+    checkOneTerm(a, "γάλακτος", "γαλακτ");
+    checkOneTerm(a, "γάλατα", "γαλατ");
+    checkOneTerm(a, "γαλάκτων", "γαλακτ");
+  }
+  
+  public void testAdjectives() throws Exception {
+    // ending with -ής, -ές/-είς, -ή
+    checkOneTerm(a, "συνεχής", "συνεχ");
+    checkOneTerm(a, "συνεχούς", "συνεχ");
+    checkOneTerm(a, "συνεχή", "συνεχ");
+    checkOneTerm(a, "συνεχών", "συνεχ");
+    checkOneTerm(a, "συνεχείς", "συνεχ");
+    checkOneTerm(a, "συνεχές", "συνεχ");
+    
+    // ending with -ης, -ες/-εις, -η
+    checkOneTerm(a, "συνήθης", "συνηθ");
+    checkOneTerm(a, "συνήθους", "συνηθ");
+    checkOneTerm(a, "συνήθη", "συνηθ");
+    // note: doesn't conflate
+    checkOneTerm(a, "συνήθεις", "συν");
+    checkOneTerm(a, "συνήθων", "συνηθ");
+    checkOneTerm(a, "σύνηθες", "συνηθ");
+    
+    // ending with -υς, -υ/-εις, -ια
+    checkOneTerm(a, "βαθύς", "βαθ");
+    checkOneTerm(a, "βαθέος", "βαθε");
+    checkOneTerm(a, "βαθύ", "βαθ");
+    checkOneTerm(a, "βαθείς", "βαθ");
+    checkOneTerm(a, "βαθέων", "βαθ");
+    
+    checkOneTerm(a, "βαθιά", "βαθ");
+    checkOneTerm(a, "βαθιάς", "βαθι");
+    checkOneTerm(a, "βαθιές", "βαθι");
+    checkOneTerm(a, "βαθιών", "βαθ");
+    
+    checkOneTerm(a, "βαθέα", "βαθε");
+    
+    // comparative/superlative
+    checkOneTerm(a, "ψηλός", "ψηλ");
+    checkOneTerm(a, "ψηλότερος", "ψηλ");
+    checkOneTerm(a, "ψηλότατος", "ψηλ");
+    
+    checkOneTerm(a, "ωραίος", "ωραι");
+    checkOneTerm(a, "ωραιότερος", "ωραι");
+    checkOneTerm(a, "ωραιότατος", "ωραι");
+    
+    checkOneTerm(a, "επιεικής", "επιεικ");
+    checkOneTerm(a, "επιεικέστερος", "επιεικ");
+    checkOneTerm(a, "επιεικέστατος", "επιεικ");
+  }
+  
+
+  public void testVerbs() throws Exception {
+    // note, past/present verb stems will not conflate (from the paper)
+    //-ω,-α/-.ω,-.α
+    checkOneTerm(a, "ορίζω", "οριζ");
+    checkOneTerm(a, "όριζα", "οριζ");
+    checkOneTerm(a, "όριζε", "οριζ");
+    checkOneTerm(a, "ορίζοντας", "οριζ");
+    checkOneTerm(a, "ορίζομαι", "οριζ");
+    checkOneTerm(a, "οριζόμουν", "οριζ");
+    checkOneTerm(a, "ορίζεσαι", "οριζ");
+    
+    checkOneTerm(a, "όρισα", "ορισ");
+    checkOneTerm(a, "ορίσω", "ορισ");
+    checkOneTerm(a, "όρισε", "ορισ");
+    checkOneTerm(a, "ορίσει", "ορισ");
+    
+    checkOneTerm(a, "ορίστηκα", "οριστ");
+    checkOneTerm(a, "οριστώ", "οριστ");
+    checkOneTerm(a, "οριστείς", "οριστ");
+    checkOneTerm(a, "οριστεί", "οριστ");
+    
+    checkOneTerm(a, "ορισμένο", "ορισμεν");
+    checkOneTerm(a, "ορισμένη", "ορισμεν");
+    checkOneTerm(a, "ορισμένος", "ορισμεν");
+    
+    // -ω,-α/-ξω,-ξα
+    checkOneTerm(a, "ανοίγω", "ανοιγ");
+    checkOneTerm(a, "άνοιγα", "ανοιγ");
+    checkOneTerm(a, "άνοιγε", "ανοιγ");
+    checkOneTerm(a, "ανοίγοντας", "ανοιγ");
+    checkOneTerm(a, "ανοίγομαι", "ανοιγ");
+    checkOneTerm(a, "ανοιγόμουν", "ανοιγ");
+    
+    checkOneTerm(a, "άνοιξα", "ανοιξ");
+    checkOneTerm(a, "ανοίξω", "ανοιξ");
+    checkOneTerm(a, "άνοιξε", "ανοιξ");
+    checkOneTerm(a, "ανοίξει", "ανοιξ");
+    
+    checkOneTerm(a, "ανοίχτηκα", "ανοιχτ");
+    checkOneTerm(a, "ανοιχτώ", "ανοιχτ");
+    checkOneTerm(a, "ανοίχτηκα", "ανοιχτ");
+    checkOneTerm(a, "ανοιχτείς", "ανοιχτ");
+    checkOneTerm(a, "ανοιχτεί", "ανοιχτ");
+    
+    checkOneTerm(a, "ανοίξου", "ανοιξ");
+    
+    //-ώ/-άω,-ούσα/-άσω,-ασα
+    checkOneTerm(a, "περνώ", "περν");
+    checkOneTerm(a, "περνάω", "περν");
+    checkOneTerm(a, "περνούσα", "περν");
+    checkOneTerm(a, "πέρναγα", "περν");
+    checkOneTerm(a, "πέρνα", "περν");
+    checkOneTerm(a, "περνώντας", "περν");
+    
+    checkOneTerm(a, "πέρασα", "περασ");
+    checkOneTerm(a, "περάσω", "περασ");
+    checkOneTerm(a, "πέρασε", "περασ");
+    checkOneTerm(a, "περάσει", "περασ");
+    
+    checkOneTerm(a, "περνιέμαι", "περν");
+    checkOneTerm(a, "περνιόμουν", "περν");
+   
+    checkOneTerm(a, "περάστηκα", "περαστ");
+    checkOneTerm(a, "περαστώ", "περαστ");
+    checkOneTerm(a, "περαστείς", "περαστ");
+    checkOneTerm(a, "περαστεί", "περαστ");
+
+    checkOneTerm(a, "περασμένο", "περασμεν");
+    checkOneTerm(a, "περασμένη", "περασμεν");
+    checkOneTerm(a, "περασμένος", "περασμεν");
+    
+    // -ώ/-άω,-ούσα/-άξω,-αξα
+    checkOneTerm(a, "πετώ", "πετ");
+    checkOneTerm(a, "πετάω", "πετ");
+    checkOneTerm(a, "πετούσα", "πετ");
+    checkOneTerm(a, "πέταγα", "πετ");
+    checkOneTerm(a, "πέτα", "πετ");
+    checkOneTerm(a, "πετώντας", "πετ");
+    checkOneTerm(a, "πετιέμαι", "πετ");
+    checkOneTerm(a, "πετιόμουν", "πετ");
+    
+    checkOneTerm(a, "πέταξα", "πεταξ");
+    checkOneTerm(a, "πετάξω", "πεταξ");
+    checkOneTerm(a, "πέταξε", "πεταξ");
+    checkOneTerm(a, "πετάξει", "πεταξ");
+
+    checkOneTerm(a, "πετάχτηκα", "πεταχτ");
+    checkOneTerm(a, "πεταχτώ", "πεταχτ");
+    checkOneTerm(a, "πεταχτείς", "πεταχτ");
+    checkOneTerm(a, "πεταχτεί", "πεταχτ");
+    
+    checkOneTerm(a, "πεταμένο", "πεταμεν");
+    checkOneTerm(a, "πεταμένη", "πεταμεν");
+    checkOneTerm(a, "πεταμένος", "πεταμεν");
+    
+    // -ώ/-άω,-ούσα / -έσω,-εσα
+    checkOneTerm(a, "καλώ", "καλ");
+    checkOneTerm(a, "καλούσα", "καλ");
+    checkOneTerm(a, "καλείς", "καλ");
+    checkOneTerm(a, "καλώντας", "καλ");
+    
+    checkOneTerm(a, "καλούμαι", "καλ");
+    // pass. imperfect /imp. progressive doesnt conflate
+    checkOneTerm(a, "καλούμουν", "καλουμ");
+    checkOneTerm(a, "καλείσαι", "καλεισα");
+    
+    checkOneTerm(a, "καλέστηκα", "καλεστ");
+    checkOneTerm(a, "καλεστώ", "καλεστ");
+    checkOneTerm(a, "καλεστείς", "καλεστ");
+    checkOneTerm(a, "καλεστεί", "καλεστ");
+    
+    checkOneTerm(a, "καλεσμένο", "καλεσμεν");
+    checkOneTerm(a, "καλεσμένη", "καλεσμεν");
+    checkOneTerm(a, "καλεσμένος", "καλεσμεν");
+    
+    checkOneTerm(a, "φορώ", "φορ");
+    checkOneTerm(a, "φοράω", "φορ");
+    checkOneTerm(a, "φορούσα", "φορ");
+    checkOneTerm(a, "φόραγα", "φορ");
+    checkOneTerm(a, "φόρα", "φορ");
+    checkOneTerm(a, "φορώντας", "φορ");
+    checkOneTerm(a, "φοριέμαι", "φορ");
+    checkOneTerm(a, "φοριόμουν", "φορ");
+    checkOneTerm(a, "φοριέσαι", "φορ");
+    
+    checkOneTerm(a, "φόρεσα", "φορεσ");
+    checkOneTerm(a, "φορέσω", "φορεσ");
+    checkOneTerm(a, "φόρεσε", "φορεσ");
+    checkOneTerm(a, "φορέσει", "φορεσ");
+    
+    checkOneTerm(a, "φορέθηκα", "φορεθ");
+    checkOneTerm(a, "φορεθώ", "φορεθ");
+    checkOneTerm(a, "φορεθείς", "φορεθ");
+    checkOneTerm(a, "φορεθεί", "φορεθ");
+    
+    checkOneTerm(a, "φορεμένο", "φορεμεν");
+    checkOneTerm(a, "φορεμένη", "φορεμεν");
+    checkOneTerm(a, "φορεμένος", "φορεμεν");
+    
+    // -ώ/-άω,-ούσα / -ήσω,-ησα
+    checkOneTerm(a, "κρατώ", "κρατ");
+    checkOneTerm(a, "κρατάω", "κρατ");
+    checkOneTerm(a, "κρατούσα", "κρατ");
+    checkOneTerm(a, "κράταγα", "κρατ");
+    checkOneTerm(a, "κράτα", "κρατ");
+    checkOneTerm(a, "κρατώντας", "κρατ");
+    
+    checkOneTerm(a, "κράτησα", "κρατ");
+    checkOneTerm(a, "κρατήσω", "κρατ");
+    checkOneTerm(a, "κράτησε", "κρατ");
+    checkOneTerm(a, "κρατήσει", "κρατ");
+    
+    checkOneTerm(a, "κρατούμαι", "κρατ");
+    checkOneTerm(a, "κρατιέμαι", "κρατ");
+    // this imperfect form doesnt conflate 
+    checkOneTerm(a, "κρατούμουν", "κρατουμ");
+    checkOneTerm(a, "κρατιόμουν", "κρατ");
+    // this imp. prog form doesnt conflate
+    checkOneTerm(a, "κρατείσαι", "κρατεισα");
+
+    checkOneTerm(a, "κρατήθηκα", "κρατ");
+    checkOneTerm(a, "κρατηθώ", "κρατ");
+    checkOneTerm(a, "κρατηθείς", "κρατ");
+    checkOneTerm(a, "κρατηθεί", "κρατ");
+    checkOneTerm(a, "κρατήσου", "κρατ");
+    
+    checkOneTerm(a, "κρατημένο", "κρατημεν");
+    checkOneTerm(a, "κρατημένη", "κρατημεν");
+    checkOneTerm(a, "κρατημένος", "κρατημεν");
+    
+    // -.μαι,-.μουν / -.ώ,-.ηκα
+    checkOneTerm(a, "κοιμάμαι", "κοιμ");
+    checkOneTerm(a, "κοιμόμουν", "κοιμ");
+    checkOneTerm(a, "κοιμάσαι", "κοιμ");
+    
+    checkOneTerm(a, "κοιμήθηκα", "κοιμ");
+    checkOneTerm(a, "κοιμηθώ", "κοιμ");
+    checkOneTerm(a, "κοιμήσου", "κοιμ");
+    checkOneTerm(a, "κοιμηθεί", "κοιμ");
+    
+    checkOneTerm(a, "κοιμισμένο", "κοιμισμεν");
+    checkOneTerm(a, "κοιμισμένη", "κοιμισμεν");
+    checkOneTerm(a, "κοιμισμένος", "κοιμισμεν");
+  }
+  
+  public void testExceptions() throws Exception {
+    checkOneTerm(a, "καθεστώτα", "καθεστ");
+    checkOneTerm(a, "καθεστώτος", "καθεστ");
+    checkOneTerm(a, "καθεστώς", "καθεστ");
+    checkOneTerm(a, "καθεστώτων", "καθεστ");
+    
+    checkOneTerm(a, "χουμε", "χουμ");
+    checkOneTerm(a, "χουμ", "χουμ");
+    
+    checkOneTerm(a, "υποταγεσ", "υποταγ");
+    checkOneTerm(a, "υποταγ", "υποταγ");
+    
+    checkOneTerm(a, "εμετε", "εμετ");
+    checkOneTerm(a, "εμετ", "εμετ");
+    
+    checkOneTerm(a, "αρχοντασ", "αρχοντ");
+    checkOneTerm(a, "αρχοντων", "αρχοντ");
+  }
+}
--- a/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java
@ -33,6 +33,7 @@ public class GreekLowerCaseFilterFactory extends BaseTokenFilterFactory
  @Override
  public void init(Map<String, String> args) {
    super.init(args);
+    assureMatchVersion();
    if (args.containsKey("charset"))
      throw new SolrException(ErrorCode.SERVER_ERROR,
          "The charset parameter is no longer supported.  "
--- a/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java
@ -0,0 +1,30 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.el.GreekStemFilter;
+
+/** Factory for {@link GreekStemFilter} */
+public class GreekStemFilterFactory extends BaseTokenFilterFactory {
+
+  public TokenStream create(TokenStream input) {
+    return new GreekStemFilter(input);
+  }
+
+}
--- a/solr/src/test/org/apache/solr/analysis/TestGreekLowerCaseFilterFactory.java
+++ b/solr/src/test/org/apache/solr/analysis/TestGreekLowerCaseFilterFactory.java
@ -31,10 +31,11 @@ public class TestGreekLowerCaseFilterFactory extends BaseTokenTestCase {
  /**
   * Ensure the filter actually lowercases (and a bit more) greek text.
   */
-  public void testStemming() throws Exception {
+  public void testNormalization() throws Exception {
    Reader reader = new StringReader("Μάϊος ΜΆΪΟΣ");
    Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
    GreekLowerCaseFilterFactory factory = new GreekLowerCaseFilterFactory();
+    factory.init(DEFAULT_VERSION_PARAM);
    TokenStream stream = factory.create(tokenizer);
    assertTokenStreamContents(stream, new String[] { "μαιοσ", "μαιοσ" });
  }
--- a/solr/src/test/org/apache/solr/analysis/TestGreekStemFilterFactory.java
+++ b/solr/src/test/org/apache/solr/analysis/TestGreekStemFilterFactory.java
@ -0,0 +1,40 @@
+package org.apache.solr.analysis;
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.el.GreekLowerCaseFilter;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Simple tests to ensure the Greek stem filter factory is working.
+ */
+public class TestGreekStemFilterFactory extends BaseTokenTestCase {
+  public void testStemming() throws Exception {
+    Reader reader = new StringReader("άνθρωπος");
+    Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, reader);
+    TokenStream normalized = new GreekLowerCaseFilter(DEFAULT_VERSION, tokenizer);
+    GreekStemFilterFactory factory = new GreekStemFilterFactory();
+    TokenStream stream = factory.create(normalized);
+    assertTokenStreamContents(stream, new String[] { "ανθρωπ" });
+  }
+}