LUCENE-5468: hunspell2 -> hunspell (with previous options and tests)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5468@1572718 13f79535-47bb-0310-9956-ffa450edef68
2014-02-27 20:19:27 +00:00 · 2014-02-27 20:19:27 +00:00 · c4f4beb27e
parent b2b86dd8ad
commit c4f4beb27e
36 changed files with 320 additions and 2076 deletions
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/Dictionary.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/Dictionary.java
@ -1,4 +1,4 @@
-package org.apache.lucene.analysis.hunspell2;
+package org.apache.lucene.analysis.hunspell;

 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
@ -17,7 +17,6 @@ package org.apache.lucene.analysis.hunspell2;
 * limitations under the License.
 */

-import org.apache.lucene.analysis.util.CharArrayMap;
 import org.apache.lucene.store.ByteArrayDataOutput;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
@ -28,14 +27,19 @@ import org.apache.lucene.util.OfflineSorter;
 import org.apache.lucene.util.OfflineSorter.ByteSequencesReader;
 import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
 import org.apache.lucene.util.UnicodeUtil;
-import org.apache.lucene.util.Version;
 import org.apache.lucene.util.fst.Builder;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.IntSequenceOutputs;
 import org.apache.lucene.util.fst.PositiveIntOutputs;
 import org.apache.lucene.util.fst.Util;

-import java.io.*;
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.LineNumberReader;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CodingErrorAction;
@ -71,27 +75,27 @@ public class Dictionary {
  private static final String PREFIX_CONDITION_REGEX_PATTERN = "%s.*";
  private static final String SUFFIX_CONDITION_REGEX_PATTERN = ".*%s";

-  public FST<IntsRef> prefixes;
-  public FST<IntsRef> suffixes;
+  FST<IntsRef> prefixes;
+  FST<IntsRef> suffixes;
  
  // all Patterns used by prefixes and suffixes. these are typically re-used across
  // many affix stripping rules. so these are deduplicated, to save RAM.
  // TODO: maybe don't use Pattern for the condition check...
  // TODO: when we cut over Affix to FST, just store integer index to this.
-  public ArrayList<Pattern> patterns = new ArrayList<>();
+  ArrayList<Pattern> patterns = new ArrayList<>();
  
  // the entries in the .dic file, mapping to their set of flags.
  // the fst output is the ordinal for flagLookup
-  public FST<Long> words;
+  FST<Long> words;
  // the list of unique flagsets (wordforms). theoretically huge, but practically
  // small (e.g. for polish this is 756), otherwise humans wouldn't be able to deal with it either.
-  public BytesRefHash flagLookup = new BytesRefHash();
+  BytesRefHash flagLookup = new BytesRefHash();
  
  // the list of unique strip affixes.
-  public BytesRefHash stripLookup = new BytesRefHash();
+  BytesRefHash stripLookup = new BytesRefHash();
  
  // 8 bytes per affix
-  public byte[] affixData = new byte[64];
+  byte[] affixData = new byte[64];
  private int currentAffix = 0;

  private FlagParsingStrategy flagParsingStrategy = new SimpleFlagParsingStrategy(); // Default flag parsing strategy
@ -100,7 +104,11 @@ public class Dictionary {
  private int aliasCount = 0;
  
  private final File tempDir = OfflineSorter.defaultTempDir(); // TODO: make this configurable?
-
+  
+  public static final int IGNORE_CASE = 1;
+  
+  boolean ignoreCase;
+  
  /**
   * Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
   * and dictionary files.
@ -112,6 +120,21 @@ public class Dictionary {
   * @throws ParseException Can be thrown if the content of the files does not meet expected formats
   */
  public Dictionary(InputStream affix, InputStream dictionary) throws IOException, ParseException {
+    this(affix, Collections.singletonList(dictionary), false);
+  }
+
+  /**
+   * Creates a new Dictionary containing the information read from the provided InputStreams to hunspell affix
+   * and dictionary files.
+   * You have to close the provided InputStreams yourself.
+   *
+   * @param affix InputStream for reading the hunspell affix file (won't be closed).
+   * @param dictionaries InputStream for reading the hunspell dictionary files (won't be closed).
+   * @throws IOException Can be thrown while reading from the InputStreams
+   * @throws ParseException Can be thrown if the content of the files does not meet expected formats
+   */
+  public Dictionary(InputStream affix, List<InputStream> dictionaries, boolean ignoreCase) throws IOException, ParseException {
+    this.ignoreCase = ignoreCase;
    BufferedInputStream buffered = new BufferedInputStream(affix, 8192);
    buffered.mark(8192);
    String encoding = getDictionaryEncoding(affix);
@ -122,7 +145,7 @@ public class Dictionary {
    stripLookup.add(new BytesRef()); // no strip -> ord 0
    PositiveIntOutputs o = PositiveIntOutputs.getSingleton();
    Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE4, o);
-    readDictionaryFile(dictionary, decoder, b);
+    readDictionaryFiles(dictionaries, decoder, b);
    words = b.finish();
  }

@ -145,7 +168,7 @@ public class Dictionary {
    return decodeFlags(flagLookup.get(ord, scratch));
  }
  
-  public Integer lookupOrd(char word[], int offset, int length) throws IOException {
+  Integer lookupOrd(char word[], int offset, int length) throws IOException {
    final FST.BytesReader bytesReader = words.getBytesReader();
    final FST.Arc<Long> arc = words.getFirstArc(new FST.Arc<Long>());
    // Accumulate output as we go
@ -269,7 +292,6 @@ public class Dictionary {
      Util.toUTF32(entry.getKey(), scratch);
      List<Character> entries = entry.getValue();
      IntsRef output = new IntsRef(entries.size());
-      int upto = 0;
      for (Character c : entries) {
        output.ints[output.length++] = c;
      }
@ -480,23 +502,39 @@ public class Dictionary {
  }

  /**
-   * Reads the dictionary file through the provided InputStream, building up the words map
+   * Reads the dictionary file through the provided InputStreams, building up the words map
   *
-   * @param dictionary InputStream to read the dictionary file through
+   * @param dictionaries InputStreams to read the dictionary file through
   * @param decoder CharsetDecoder used to decode the contents of the file
   * @throws IOException Can be thrown while reading from the file
   */
-  private void readDictionaryFile(InputStream dictionary, CharsetDecoder decoder, Builder<Long> words) throws IOException {
+  private void readDictionaryFiles(List<InputStream> dictionaries, CharsetDecoder decoder, Builder<Long> words) throws IOException {
    BytesRef flagsScratch = new BytesRef();
    IntsRef scratchInts = new IntsRef();
    
-    BufferedReader lines = new BufferedReader(new InputStreamReader(dictionary, decoder));
-    String line = lines.readLine(); // first line is number of entries (approximately, sometimes)
-    
    File unsorted = File.createTempFile("unsorted", "dat", tempDir);
    try (ByteSequencesWriter writer = new ByteSequencesWriter(unsorted)) {
-      while ((line = lines.readLine()) != null) {
-        writer.write(line.getBytes(IOUtils.CHARSET_UTF_8));
+      for (InputStream dictionary : dictionaries) {
+        BufferedReader lines = new BufferedReader(new InputStreamReader(dictionary, decoder));
+        String line = lines.readLine(); // first line is number of entries (approximately, sometimes)
+        
+        while ((line = lines.readLine()) != null) {
+          if (ignoreCase) {
+            int flagSep = line.lastIndexOf('/');
+            if (flagSep == -1) {
+              writer.write(line.toLowerCase(Locale.ROOT).getBytes(IOUtils.CHARSET_UTF_8));
+            } else {
+              StringBuilder sb = new StringBuilder();
+              sb.append(line.substring(0, flagSep).toLowerCase(Locale.ROOT));
+              if (flagSep < line.length()) {
+                sb.append(line.substring(flagSep, line.length()));
+              }
+              writer.write(sb.toString().getBytes(IOUtils.CHARSET_UTF_8));
+            }
+          } else {
+            writer.write(line.getBytes(IOUtils.CHARSET_UTF_8));
+          }
+        }
      }
    }
    File sorted = File.createTempFile("sorted", "dat", tempDir);
@ -544,6 +582,7 @@ public class Dictionary {
    BytesRef currentEntry = new BytesRef();
    char currentFlags[] = new char[0];
    
+    String line;
    while (reader.read(scratchLine)) {
      line = scratchLine.utf8ToString();
      String entry;
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellAffix.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellAffix.java
@ -1,157 +0,0 @@
-package org.apache.lucene.analysis.hunspell;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.regex.Pattern;
-
-/**
- * Wrapper class representing a hunspell affix
- */
-public class HunspellAffix {
-
-  private String append; // the affix itself, what is appended
-  private char appendFlags[]; // continuation class flags
-  private String strip;
-  
-  private String condition;
-  private Pattern conditionPattern;
-  
-  private char flag;
-
-  private boolean crossProduct;
-
-  /**
-   * Checks whether the given text matches the conditional pattern on this affix
-   *
-   * @param text Text to check if it matches the affix's conditional pattern
-   * @return {@code true} if the text meets the condition, {@code false} otherwise
-   */
-  public boolean checkCondition(CharSequence text) {
-    return conditionPattern.matcher(text).matches();
-  }
-
-  /**
-   * Returns the append defined for the affix
-   *
-   * @return Defined append
-   */
-  public String getAppend() {
-    return append;
-  }
-
-  /**
-   * Sets the append defined for the affix
-   *
-   * @param append Defined append for the affix
-   */
-  public void setAppend(String append) {
-    this.append = append;
-  }
-
-  /**
-   * Returns the flags defined for the affix append
-   *
-   * @return Flags defined for the affix append
-   */
-  public char[] getAppendFlags() {
-    return appendFlags;
-  }
-
-  /**
-   * Sets the flags defined for the affix append
-   *
-   * @param appendFlags Flags defined for the affix append
-   */
-  public void setAppendFlags(char[] appendFlags) {
-    this.appendFlags = appendFlags;
-  }
-
-  /**
-   * Returns the stripping characters defined for the affix
-   *
-   * @return Stripping characters defined for the affix
-   */
-  public String getStrip() {
-    return strip;
-  }
-
-  /**
-   * Sets the stripping characters defined for the affix
-   *
-   * @param strip Stripping characters defined for the affix
-   */
-  public void setStrip(String strip) {
-    this.strip = strip;
-  }
-
-  /**
-   * Returns the condition that must be met before the affix can be applied
-   *
-   * @return Condition that must be met before the affix can be applied
-   */
-  public String getCondition() {
-    return condition;
-  }
-
-  /**
-   * Sets the condition that must be met before the affix can be applied
-   *
-   * @param condition Condition to be met before affix application
-   * @param pattern Condition as a regular expression pattern
-   */
-  public void setCondition(String condition, String pattern) {
-    this.condition = condition;
-    this.conditionPattern = Pattern.compile(pattern);
-  }
-
-  /**
-   * Returns the affix flag
-   *
-   * @return Affix flag
-   */
-  public char getFlag() {
-    return flag;
-  }
-
-  /**
-   * Sets the affix flag
-   *
-   * @param flag Affix flag
-   */
-  public void setFlag(char flag) {
-    this.flag = flag;
-  }
-
-  /**
-   * Returns whether the affix is defined as cross product
-   *
-   * @return {@code true} if the affix is cross product, {@code false} otherwise
-   */
-  public boolean isCrossProduct() {
-    return crossProduct;
-  }
-
-  /**
-   * Sets whether the affix is defined as cross product
-   *
-   * @param crossProduct Whether the affix is defined as cross product
-   */
-  public void setCrossProduct(boolean crossProduct) {
-    this.crossProduct = crossProduct;
-  }
-}
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java
@ -1,507 +0,0 @@
-package org.apache.lucene.analysis.hunspell;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.util.CharArrayMap;
-import org.apache.lucene.util.Version;
-
-import java.io.*;
-import java.nio.charset.Charset;
-import java.nio.charset.CharsetDecoder;
-import java.text.ParseException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Locale;
-
-/**
- * In-memory structure for the dictionary (.dic) and affix (.aff)
- * data of a hunspell dictionary.
- */
-public class HunspellDictionary {
-
-  static final HunspellWord NOFLAGS = new HunspellWord();
-  
-  private static final String ALIAS_KEY = "AF";
-  private static final String PREFIX_KEY = "PFX";
-  private static final String SUFFIX_KEY = "SFX";
-  private static final String FLAG_KEY = "FLAG";
-
-  private static final String NUM_FLAG_TYPE = "num";
-  private static final String UTF8_FLAG_TYPE = "UTF-8";
-  private static final String LONG_FLAG_TYPE = "long";
-  
-  private static final String PREFIX_CONDITION_REGEX_PATTERN = "%s.*";
-  private static final String SUFFIX_CONDITION_REGEX_PATTERN = ".*%s";
-
-  private static final boolean IGNORE_CASE_DEFAULT = false;
-  private static final boolean STRICT_AFFIX_PARSING_DEFAULT = true;
-
-  private CharArrayMap<List<HunspellWord>> words;
-  private CharArrayMap<List<HunspellAffix>> prefixes;
-  private CharArrayMap<List<HunspellAffix>> suffixes;
-
-  private FlagParsingStrategy flagParsingStrategy = new SimpleFlagParsingStrategy(); // Default flag parsing strategy
-  private boolean ignoreCase = IGNORE_CASE_DEFAULT;
-
-  private final Version version;
-
-  private String[] aliases;
-  private int aliasCount = 0;
-
-  /**
-   * Creates a new HunspellDictionary containing the information read from the provided InputStreams to hunspell affix
-   * and dictionary files.
-   * You have to close the provided InputStreams yourself.
-   *
-   * @param affix InputStream for reading the hunspell affix file (won't be closed).
-   * @param dictionary InputStream for reading the hunspell dictionary file (won't be closed).
-   * @param version Lucene Version
-   * @throws IOException Can be thrown while reading from the InputStreams
-   * @throws ParseException Can be thrown if the content of the files does not meet expected formats
-   */
-  public HunspellDictionary(InputStream affix, InputStream dictionary, Version version) throws IOException, ParseException {
-    this(affix, Arrays.asList(dictionary), version, IGNORE_CASE_DEFAULT);
-  }
-
-  /**
-   * Creates a new HunspellDictionary containing the information read from the provided InputStreams to hunspell affix
-   * and dictionary files.
-   * You have to close the provided InputStreams yourself.
-   *
-   * @param affix InputStream for reading the hunspell affix file (won't be closed).
-   * @param dictionary InputStream for reading the hunspell dictionary file (won't be closed).
-   * @param version Lucene Version
-   * @param ignoreCase If true, dictionary matching will be case insensitive
-   * @throws IOException Can be thrown while reading from the InputStreams
-   * @throws ParseException Can be thrown if the content of the files does not meet expected formats
-   */
-  public HunspellDictionary(InputStream affix, InputStream dictionary, Version version, boolean ignoreCase) throws IOException, ParseException {
-    this(affix, Arrays.asList(dictionary), version, ignoreCase);
-  }
-
-  /**
-   * Creates a new HunspellDictionary containing the information read from the provided InputStreams to hunspell affix
-   * and dictionary files.
-   * You have to close the provided InputStreams yourself.
-   *
-   * @param affix InputStream for reading the hunspell affix file (won't be closed).
-   * @param dictionaries InputStreams for reading the hunspell dictionary file (won't be closed).
-   * @param version Lucene Version
-   * @param ignoreCase If true, dictionary matching will be case insensitive
-   * @throws IOException Can be thrown while reading from the InputStreams
-   * @throws ParseException Can be thrown if the content of the files does not meet expected formats
-   */
-  public HunspellDictionary(InputStream affix, List<InputStream> dictionaries, Version version, boolean ignoreCase) throws IOException, ParseException {
-    this(affix, dictionaries, version, ignoreCase, STRICT_AFFIX_PARSING_DEFAULT);
-  }
-
-  /**
-   * Creates a new HunspellDictionary containing the information read from the provided InputStreams to hunspell affix
-   * and dictionary files.
-   * You have to close the provided InputStreams yourself.
-   *
-   * @param affix InputStream for reading the hunspell affix file (won't be closed).
-   * @param dictionaries InputStreams for reading the hunspell dictionary file (won't be closed).
-   * @param version Lucene Version
-   * @param ignoreCase If true, dictionary matching will be case insensitive
-   * @param strictAffixParsing Affix strict parsing enabled or not (an error while reading a rule causes exception or is ignored)
-   * @throws IOException Can be thrown while reading from the InputStreams
-   * @throws ParseException Can be thrown if the content of the files does not meet expected formats
-   */
-  public HunspellDictionary(InputStream affix, List<InputStream> dictionaries, Version version, boolean ignoreCase, boolean strictAffixParsing) throws IOException, ParseException {
-    this.version = version;
-    this.ignoreCase = ignoreCase;
-    String encoding = getDictionaryEncoding(affix);
-    CharsetDecoder decoder = getJavaEncoding(encoding);
-    readAffixFile(affix, decoder, strictAffixParsing);
-    words = new CharArrayMap<List<HunspellWord>>(version, 65535 /* guess */, this.ignoreCase);
-    for (InputStream dictionary : dictionaries) {
-      readDictionaryFile(dictionary, decoder);
-    }
-  }
-
-  /**
-   * Looks up HunspellWords that match the String created from the given char array, offset and length
-   *
-   * @param word Char array to generate the String from
-   * @param offset Offset in the char array that the String starts at
-   * @param length Length from the offset that the String is
-   * @return List of HunspellWords that match the generated String, or {@code null} if none are found
-   */
-  public List<HunspellWord> lookupWord(char word[], int offset, int length) {
-    return words.get(word, offset, length);
-  }
-
-  /**
-   * Looks up HunspellAffix prefixes that have an append that matches the String created from the given char array, offset and length
-   *
-   * @param word Char array to generate the String from
-   * @param offset Offset in the char array that the String starts at
-   * @param length Length from the offset that the String is
-   * @return List of HunspellAffix prefixes with an append that matches the String, or {@code null} if none are found
-   */
-  public List<HunspellAffix> lookupPrefix(char word[], int offset, int length) {
-    return prefixes.get(word, offset, length);
-  }
-
-  /**
-   * Looks up HunspellAffix suffixes that have an append that matches the String created from the given char array, offset and length
-   *
-   * @param word Char array to generate the String from
-   * @param offset Offset in the char array that the String starts at
-   * @param length Length from the offset that the String is
-   * @return List of HunspellAffix suffixes with an append that matches the String, or {@code null} if none are found
-   */
-  public List<HunspellAffix> lookupSuffix(char word[], int offset, int length) {
-    return suffixes.get(word, offset, length);
-  }
-
-  /**
-   * Reads the affix file through the provided InputStream, building up the prefix and suffix maps
-   *
-   * @param affixStream InputStream to read the content of the affix file from
-   * @param decoder CharsetDecoder to decode the content of the file
-   * @throws IOException Can be thrown while reading from the InputStream
-   */
-  private void readAffixFile(InputStream affixStream, CharsetDecoder decoder, boolean strict) throws IOException, ParseException {
-    prefixes = new CharArrayMap<List<HunspellAffix>>(version, 8, ignoreCase);
-    suffixes = new CharArrayMap<List<HunspellAffix>>(version, 8, ignoreCase);
-
-    LineNumberReader reader = new LineNumberReader(new InputStreamReader(affixStream, decoder));
-    String line = null;
-    while ((line = reader.readLine()) != null) {
-      if (line.startsWith(ALIAS_KEY)) {
-        parseAlias(line);
-      } else if (line.startsWith(PREFIX_KEY)) {
-        parseAffix(prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN, strict);
-      } else if (line.startsWith(SUFFIX_KEY)) {
-        parseAffix(suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN, strict);
-      } else if (line.startsWith(FLAG_KEY)) {
-        // Assume that the FLAG line comes before any prefix or suffixes
-        // Store the strategy so it can be used when parsing the dic file
-        flagParsingStrategy = getFlagParsingStrategy(line);
-      }
-    }
-  }
-
-  /**
-   * Parses a specific affix rule putting the result into the provided affix map
-   * 
-   * @param affixes Map where the result of the parsing will be put
-   * @param header Header line of the affix rule
-   * @param reader BufferedReader to read the content of the rule from
-   * @param conditionPattern {@link String#format(String, Object...)} pattern to be used to generate the condition regex
-   *                         pattern
-   * @throws IOException Can be thrown while reading the rule
-   */
-  private void parseAffix(CharArrayMap<List<HunspellAffix>> affixes,
-                          String header,
-                          LineNumberReader reader,
-                          String conditionPattern,
-                          boolean strict) throws IOException, ParseException {
-    String args[] = header.split("\\s+");
-
-    boolean crossProduct = args[2].equals("Y");
-    
-    int numLines = Integer.parseInt(args[3]);
-    for (int i = 0; i < numLines; i++) {
-      String line = reader.readLine();
-      String ruleArgs[] = line.split("\\s+");
-
-      if (ruleArgs.length < 5) {
-        if (strict) {
-          throw new ParseException("The affix file contains a rule with less than five elements", reader.getLineNumber());
-        }
-        continue;
-      }
-
-      HunspellAffix affix = new HunspellAffix();
-      
-      affix.setFlag(flagParsingStrategy.parseFlag(ruleArgs[1]));
-      affix.setStrip(ruleArgs[2].equals("0") ? "" : ruleArgs[2]);
-
-      String affixArg = ruleArgs[3];
-      
-      int flagSep = affixArg.lastIndexOf('/');
-      if (flagSep != -1) {
-        String flagPart = affixArg.substring(flagSep + 1);
-        
-        if (aliasCount > 0) {
-          flagPart = getAliasValue(Integer.parseInt(flagPart));
-        } 
-        
-        char appendFlags[] = flagParsingStrategy.parseFlags(flagPart);
-        Arrays.sort(appendFlags);
-        affix.setAppendFlags(appendFlags);
-        affix.setAppend(affixArg.substring(0, flagSep));
-      } else {
-        affix.setAppend(affixArg);
-      }
-
-      String condition = ruleArgs[4];
-      affix.setCondition(condition, String.format(Locale.ROOT, conditionPattern, condition));
-      affix.setCrossProduct(crossProduct);
-      
-      List<HunspellAffix> list = affixes.get(affix.getAppend());
-      if (list == null) {
-        list = new ArrayList<HunspellAffix>();
-        affixes.put(affix.getAppend(), list);
-      }
-      
-      list.add(affix);
-    }
-  }
-
-  /**
-   * Parses the encoding specified in the affix file readable through the provided InputStream
-   *
-   * @param affix InputStream for reading the affix file
-   * @return Encoding specified in the affix file
-   * @throws IOException Can be thrown while reading from the InputStream
-   * @throws ParseException Thrown if the first non-empty non-comment line read from the file does not adhere to the format {@code SET <encoding>}
-   */
-  private String getDictionaryEncoding(InputStream affix) throws IOException, ParseException {
-    final StringBuilder encoding = new StringBuilder();
-    for (;;) {
-      encoding.setLength(0);
-      int ch;
-      while ((ch = affix.read()) >= 0) {
-        if (ch == '\n') {
-          break;
-        }
-        if (ch != '\r') {
-          encoding.append((char)ch);
-        }
-      }
-      if (
-          encoding.length() == 0 || encoding.charAt(0) == '#' ||
-          // this test only at the end as ineffective but would allow lines only containing spaces:
-          encoding.toString().trim().length() == 0
-      ) {
-        if (ch < 0) {
-          throw new ParseException("Unexpected end of affix file.", 0);
-        }
-        continue;
-      }
-      if ("SET ".equals(encoding.substring(0, 4))) {
-        // cleanup the encoding string, too (whitespace)
-        return encoding.substring(4).trim();
-      }
-      throw new ParseException("The first non-comment line in the affix file must "+
-          "be a 'SET charset', was: '" + encoding +"'", 0);
-    }
-  }
-
-  /**
-   * Retrieves the CharsetDecoder for the given encoding.  Note, This isn't perfect as I think ISCII-DEVANAGARI and
-   * MICROSOFT-CP1251 etc are allowed...
-   *
-   * @param encoding Encoding to retrieve the CharsetDecoder for
-   * @return CharSetDecoder for the given encoding
-   */
-  private CharsetDecoder getJavaEncoding(String encoding) {
-    Charset charset = Charset.forName(encoding);
-    return charset.newDecoder();
-  }
-
-  /**
-   * Determines the appropriate {@link FlagParsingStrategy} based on the FLAG definition line taken from the affix file
-   *
-   * @param flagLine Line containing the flag information
-   * @return FlagParsingStrategy that handles parsing flags in the way specified in the FLAG definition
-   */
-  private FlagParsingStrategy getFlagParsingStrategy(String flagLine) {
-    String flagType = flagLine.substring(5);
-
-    if (NUM_FLAG_TYPE.equals(flagType)) {
-      return new NumFlagParsingStrategy();
-    } else if (UTF8_FLAG_TYPE.equals(flagType)) {
-      return new SimpleFlagParsingStrategy();
-    } else if (LONG_FLAG_TYPE.equals(flagType)) {
-      return new DoubleASCIIFlagParsingStrategy();
-    }
-
-    throw new IllegalArgumentException("Unknown flag type: " + flagType);
-  }
-
-  /**
-   * Reads the dictionary file through the provided InputStream, building up the words map
-   *
-   * @param dictionary InputStream to read the dictionary file through
-   * @param decoder CharsetDecoder used to decode the contents of the file
-   * @throws IOException Can be thrown while reading from the file
-   */
-  private void readDictionaryFile(InputStream dictionary, CharsetDecoder decoder) throws IOException {
-    BufferedReader reader = new BufferedReader(new InputStreamReader(dictionary, decoder));
-    // TODO: don't create millions of strings.
-    String line = reader.readLine(); // first line is number of entries
-    int numEntries = Integer.parseInt(line);
-    
-    // TODO: the flags themselves can be double-chars (long) or also numeric
-    // either way the trick is to encode them as char... but they must be parsed differently
-    while ((line = reader.readLine()) != null) {
-      String entry;
-      HunspellWord wordForm;
-      
-      int flagSep = line.lastIndexOf('/');
-      if (flagSep == -1) {
-        wordForm = NOFLAGS;
-        entry = line;
-      } else {
-        // note, there can be comments (morph description) after a flag.
-        // we should really look for any whitespace
-        int end = line.indexOf('\t', flagSep);
-        if (end == -1)
-          end = line.length();
-        
-        String flagPart = line.substring(flagSep + 1, end);
-        if (aliasCount > 0) {
-          flagPart = getAliasValue(Integer.parseInt(flagPart));
-        } 
-        
-        wordForm = new HunspellWord(flagParsingStrategy.parseFlags(flagPart));
-        Arrays.sort(wordForm.getFlags());
-        entry = line.substring(0, flagSep);
-      }
-      if(ignoreCase) {
-        entry = entry.toLowerCase(Locale.ROOT);
-      }
-
-      List<HunspellWord> entries = new ArrayList<HunspellWord>();
-      entries.add(wordForm);
-      words.put(entry, entries);
-    }
-  }
-
-  public Version getVersion() {
-    return version;
-  }
-
-  private void parseAlias(String line) {
-    String ruleArgs[] = line.split("\\s+");
-    if (aliases == null) {
-      //first line should be the aliases count
-      final int count = Integer.parseInt(ruleArgs[1]);
-      aliases = new String[count];
-    } else {
-      aliases[aliasCount++] = ruleArgs[1];
-    }
-  }
-  
-  private String getAliasValue(int id) {
-    try {
-      return aliases[id - 1];
-    } catch (IndexOutOfBoundsException ex) {
-      throw new IllegalArgumentException("Bad flag alias number:" + id, ex);
-    }
-  }
-
-  /**
-   * Abstraction of the process of parsing flags taken from the affix and dic files
-   */
-  private static abstract class FlagParsingStrategy {
-
-    /**
-     * Parses the given String into a single flag
-     *
-     * @param rawFlag String to parse into a flag
-     * @return Parsed flag
-     */
-    char parseFlag(String rawFlag) {
-      return parseFlags(rawFlag)[0];
-    }
-
-    /**
-     * Parses the given String into multiple flags
-     *
-     * @param rawFlags String to parse into flags
-     * @return Parsed flags
-     */
-    abstract char[] parseFlags(String rawFlags);
-  }
-
-  /**
-   * Simple implementation of {@link FlagParsingStrategy} that treats the chars in each String as a individual flags.
-   * Can be used with both the ASCII and UTF-8 flag types.
-   */
-  private static class SimpleFlagParsingStrategy extends FlagParsingStrategy {
-    /**
-     * {@inheritDoc}
-     */
-    @Override
-    public char[] parseFlags(String rawFlags) {
-      return rawFlags.toCharArray();
-    }
-  }
-
-  /**
-   * Implementation of {@link FlagParsingStrategy} that assumes each flag is encoded in its numerical form.  In the case
-   * of multiple flags, each number is separated by a comma.
-   */
-  private static class NumFlagParsingStrategy extends FlagParsingStrategy {
-    /**
-     * {@inheritDoc}
-     */
-    @Override
-    public char[] parseFlags(String rawFlags) {
-      String[] rawFlagParts = rawFlags.trim().split(",");
-      char[] flags = new char[rawFlagParts.length];
-
-      for (int i = 0; i < rawFlagParts.length; i++) {
-        // note, removing the trailing X/leading I for nepali... what is the rule here?! 
-        flags[i] = (char) Integer.parseInt(rawFlagParts[i].replaceAll("[^0-9]", ""));
-      }
-
-      return flags;
-    }
-  }
-
-  /**
-   * Implementation of {@link FlagParsingStrategy} that assumes each flag is encoded as two ASCII characters whose codes
-   * must be combined into a single character.
-   *
-   * TODO (rmuir) test
-   */
-  private static class DoubleASCIIFlagParsingStrategy extends FlagParsingStrategy {
-
-    /**
-     * {@inheritDoc}
-     */
-    @Override
-    public char[] parseFlags(String rawFlags) {
-      if (rawFlags.length() == 0) {
-        return new char[0];
-      }
-
-      StringBuilder builder = new StringBuilder();
-      for (int i = 0; i < rawFlags.length(); i+=2) {
-        char cookedFlag = (char) ((int) rawFlags.charAt(i) + (int) rawFlags.charAt(i + 1));
-        builder.append(cookedFlag);
-      }
-      
-      char flags[] = new char[builder.length()];
-      builder.getChars(0, builder.length(), flags, 0);
-      return flags;
-    }
-  }
-
-  public boolean isIgnoreCase() {
-    return ignoreCase;
-  }
-}
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java
@ -18,14 +18,16 @@ package org.apache.lucene.analysis.hunspell;
 */

 import java.io.IOException;
+import java.util.Collections;
+import java.util.Comparator;
 import java.util.List;

 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.hunspell.HunspellStemmer.Stem;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.util.CharsRef;

 /**
 * TokenFilter that uses hunspell affix rules and words to stem tokens.  Since hunspell supports a word having multiple
@ -41,71 +43,83 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 * {@link org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory}
 * </p>
 *
- *
+ * @lucene.experimental
 */
 public final class HunspellStemFilter extends TokenFilter {
  
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
  private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
-  private final HunspellStemmer stemmer;
+  private final Stemmer stemmer;
  
-  private List<Stem> buffer;
+  private List<CharsRef> buffer;
  private State savedState;
  
  private final boolean dedup;
+  private final boolean longestOnly;

  /** Create a {@link HunspellStemFilter} which deduplicates stems and has a maximum
   *  recursion level of 2. 
-   *  @see #HunspellStemFilter(TokenStream, HunspellDictionary, int) */
-  public HunspellStemFilter(TokenStream input, HunspellDictionary dictionary) {
+   *  @see #HunspellStemFilter(TokenStream, Dictionary, int) */
+  public HunspellStemFilter(TokenStream input, Dictionary dictionary) {
    this(input, dictionary, 2);
  }

  /**
-   * Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using affix rules in the provided
-   * HunspellDictionary
+   * Creates a new Hunspell2StemFilter that will stem tokens from the given TokenStream using affix rules in the provided
+   * Dictionary
   *
   * @param input TokenStream whose tokens will be stemmed
   * @param dictionary HunspellDictionary containing the affix rules and words that will be used to stem the tokens
   * @param recursionCap maximum level of recursion stemmer can go into, defaults to <code>2</code>
   */
-  public HunspellStemFilter(TokenStream input, HunspellDictionary dictionary, int recursionCap) {
+  public HunspellStemFilter(TokenStream input, Dictionary dictionary, int recursionCap) {
    this(input, dictionary, true, recursionCap);
  }

  /** Create a {@link HunspellStemFilter} which has a maximum recursion level of 2. 
-   *  @see #HunspellStemFilter(TokenStream, HunspellDictionary, boolean, int) */
-  public HunspellStemFilter(TokenStream input, HunspellDictionary dictionary, boolean dedup) {
+   *  @see #HunspellStemFilter(TokenStream, Dictionary, boolean, int) */
+  public HunspellStemFilter(TokenStream input, Dictionary dictionary, boolean dedup) {
    this(input, dictionary, dedup, 2);
  }
-
+  
  /**
   * Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using affix rules in the provided
-   * HunspellDictionary
+   * Dictionary
   *
   * @param input TokenStream whose tokens will be stemmed
   * @param dictionary HunspellDictionary containing the affix rules and words that will be used to stem the tokens
   * @param dedup true if only unique terms should be output.
   * @param recursionCap maximum level of recursion stemmer can go into, defaults to <code>2</code>
   */
-  public HunspellStemFilter(TokenStream input, HunspellDictionary dictionary, boolean dedup, int recursionCap) {
-    super(input);
-    this.dedup = dedup;
-    this.stemmer = new HunspellStemmer(dictionary, recursionCap);
+  public HunspellStemFilter(TokenStream input, Dictionary dictionary, boolean dedup, int recursionCap) {
+    this(input, dictionary, dedup, recursionCap, false);
  }

  /**
-   * {@inheritDoc}
+   * Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using affix rules in the provided
+   * Dictionary
+   *
+   * @param input TokenStream whose tokens will be stemmed
+   * @param dictionary HunspellDictionary containing the affix rules and words that will be used to stem the tokens
+   * @param dedup true if only unique terms should be output.
+   * @param recursionCap maximum level of recursion stemmer can go into, defaults to <code>2</code>
+   * @param longestOnly true if only the longest term should be output.
   */
+  public HunspellStemFilter(TokenStream input, Dictionary dictionary, boolean dedup, int recursionCap, boolean longestOnly) {
+    super(input);
+    this.dedup = dedup && longestOnly == false; // don't waste time deduping if longestOnly is set
+    this.stemmer = new Stemmer(dictionary, recursionCap);
+    this.longestOnly = longestOnly;
+  }
+
  @Override
  public boolean incrementToken() throws IOException {
    if (buffer != null && !buffer.isEmpty()) {
-      Stem nextStem = buffer.remove(0);
+      CharsRef nextStem = buffer.remove(0);
      restoreState(savedState);
      posIncAtt.setPositionIncrement(0);
-      termAtt.copyBuffer(nextStem.getStem(), 0, nextStem.getStemLength());
-      termAtt.setLength(nextStem.getStemLength());
+      termAtt.setEmpty().append(nextStem);
      return true;
    }
    
@ -122,24 +136,41 @@ public final class HunspellStemFilter extends TokenFilter {
    if (buffer.isEmpty()) { // we do not know this word, return it unchanged
      return true;
    }     
+    
+    if (longestOnly && buffer.size() > 1) {
+      Collections.sort(buffer, lengthComparator);
+    }

-    Stem stem = buffer.remove(0);
-    termAtt.copyBuffer(stem.getStem(), 0, stem.getStemLength());
-    termAtt.setLength(stem.getStemLength());
+    CharsRef stem = buffer.remove(0);
+    termAtt.setEmpty().append(stem);

-    if (!buffer.isEmpty()) {
-      savedState = captureState();
+    if (longestOnly) {
+      buffer.clear();
+    } else {
+      if (!buffer.isEmpty()) {
+        savedState = captureState();
+      }
    }

    return true;
  }

-  /**
-   * {@inheritDoc}
-   */
  @Override
  public void reset() throws IOException {
    super.reset();
    buffer = null;
  }
+  
+  static final Comparator<CharsRef> lengthComparator = new Comparator<CharsRef>() {
+    @Override
+    public int compare(CharsRef o1, CharsRef o2) {
+      int cmp = Integer.compare(o2.length, o1.length);
+      if (cmp == 0) {
+        // tie break on text
+        return o2.compareTo(o1);
+      } else {
+        return cmp;
+      }
+    }
+  };
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilterFactory.java
@ -31,89 +31,75 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
 import org.apache.lucene.util.IOUtils;

 /**
- * TokenFilterFactory that creates instances of {@link org.apache.lucene.analysis.hunspell.HunspellStemFilter}.
- * Example config for British English including a custom dictionary, case insensitive matching:
+ * TokenFilterFactory that creates instances of {@link HunspellStemFilter}.
+ * Example config for British English:
 * <pre class="prettyprint">
 * &lt;filter class=&quot;solr.HunspellStemFilterFactory&quot;
- *    dictionary=&quot;en_GB.dic,my_custom.dic&quot;
- *    affix=&quot;en_GB.aff&quot;
- *    ignoreCase=&quot;true&quot; /&gt;</pre>
+ *         dictionary=&quot;en_GB.dic,my_custom.dic&quot;
+ *         affix=&quot;en_GB.aff&quot; 
+ *         ignoreCase=&quot;false&quot;
+ *         longestOnly=&quot;false&quot; /&gt;</pre>
 * Both parameters dictionary and affix are mandatory.
- * <br/>
- * The parameter ignoreCase (true/false) controls whether matching is case sensitive or not. Default false.
- * <br/>
- * The parameter strictAffixParsing (true/false) controls whether the affix parsing is strict or not. Default true.
- * If strict an error while reading an affix rule causes a ParseException, otherwise is ignored.
- * <br/>
 * Dictionaries for many languages are available through the OpenOffice project.
 * 
 * See <a href="http://wiki.apache.org/solr/Hunspell">http://wiki.apache.org/solr/Hunspell</a>
+ * @lucene.experimental
 */
 public class HunspellStemFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
-  private static final String PARAM_DICTIONARY = "dictionary";
-  private static final String PARAM_AFFIX = "affix";
-  private static final String PARAM_IGNORE_CASE = "ignoreCase";
-  private static final String PARAM_STRICT_AFFIX_PARSING = "strictAffixParsing";
+  private static final String PARAM_DICTIONARY    = "dictionary";
+  private static final String PARAM_AFFIX         = "affix";
  private static final String PARAM_RECURSION_CAP = "recursionCap";
+  private static final String PARAM_IGNORE_CASE   = "ignoreCase";
+  private static final String PARAM_LONGEST_ONLY  = "longestOnly";

-  private final String dictionaryArg;
+  private final String dictionaryFiles;
  private final String affixFile;
  private final boolean ignoreCase;
-  private final boolean strictAffixParsing;
-  private HunspellDictionary dictionary;
+  private final boolean longestOnly;
+  private Dictionary dictionary;
  private int recursionCap;
  
  /** Creates a new HunspellStemFilterFactory */
  public HunspellStemFilterFactory(Map<String,String> args) {
    super(args);
-    assureMatchVersion();
-    dictionaryArg = require(args, PARAM_DICTIONARY);
+    dictionaryFiles = require(args, PARAM_DICTIONARY);
    affixFile = get(args, PARAM_AFFIX);
    ignoreCase = getBoolean(args, PARAM_IGNORE_CASE, false);
-    strictAffixParsing = getBoolean(args, PARAM_STRICT_AFFIX_PARSING, true);
    recursionCap = getInt(args, PARAM_RECURSION_CAP, 2);
+    longestOnly = getBoolean(args, PARAM_LONGEST_ONLY, false);
+    // this isnt necessary: we properly load all dictionaries.
+    // but recognize and ignore for back compat
+    getBoolean(args, "strictAffixParsing", true);
    if (!args.isEmpty()) {
      throw new IllegalArgumentException("Unknown parameters: " + args);
    }
  }

-  /**
-   * Loads the hunspell dictionary and affix files defined in the configuration
-   *  
-   * @param loader ResourceLoader used to load the files
-   */
  @Override
  public void inform(ResourceLoader loader) throws IOException {
-    String dictionaryFiles[] = dictionaryArg.split(",");
+    String dicts[] = dictionaryFiles.split(",");

    InputStream affix = null;
    List<InputStream> dictionaries = new ArrayList<InputStream>();

    try {
      dictionaries = new ArrayList<InputStream>();
-      for (String file : dictionaryFiles) {
+      for (String file : dicts) {
        dictionaries.add(loader.openResource(file));
      }
      affix = loader.openResource(affixFile);

-      this.dictionary = new HunspellDictionary(affix, dictionaries, luceneMatchVersion, ignoreCase, strictAffixParsing);
+      this.dictionary = new Dictionary(affix, dictionaries, ignoreCase);
    } catch (ParseException e) {
-      throw new IOException("Unable to load hunspell data! [dictionary=" + dictionaryArg + ",affix=" + affixFile + "]", e);
+      throw new IOException("Unable to load hunspell data! [dictionary=" + dictionaries + ",affix=" + affixFile + "]", e);
    } finally {
      IOUtils.closeWhileHandlingException(affix);
      IOUtils.closeWhileHandlingException(dictionaries);
    }
  }

-  /**
-   * Creates an instance of {@link org.apache.lucene.analysis.hunspell.HunspellStemFilter} that will filter the given
-   * TokenStream
-   *
-   * @param tokenStream TokenStream that will be filtered
-   * @return HunspellStemFilter that filters the TokenStream 
-   */
  @Override
  public TokenStream create(TokenStream tokenStream) {
-    return new HunspellStemFilter(tokenStream, dictionary, recursionCap);
+    return new HunspellStemFilter(tokenStream, dictionary, true, recursionCap, longestOnly);
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java
@ -1,392 +0,0 @@
-package org.apache.lucene.analysis.hunspell;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.CharacterUtils;
-import org.apache.lucene.util.Version;
-
-/**
- * HunspellStemmer uses the affix rules declared in the HunspellDictionary to generate one or more stems for a word.  It
- * conforms to the algorithm in the original hunspell algorithm, including recursive suffix stripping.
- */
-public class HunspellStemmer {
-  private final int recursionCap;
-  private final HunspellDictionary dictionary;
-  private final StringBuilder segment = new StringBuilder();
-  private CharacterUtils charUtils = CharacterUtils.getInstance(Version.LUCENE_CURRENT);
-
-  /**
-   * Constructs a new HunspellStemmer which will use the provided HunspellDictionary to create its stems. Uses the 
-   * default recursion cap of <code>2</code> (based on Hunspell documentation). 
-   *
-   * @param dictionary HunspellDictionary that will be used to create the stems
-   */
-  public HunspellStemmer(HunspellDictionary dictionary) {
-    this(dictionary, 2);
-  }
-
-  /**
-   * Constructs a new HunspellStemmer which will use the provided HunspellDictionary to create its stems
-   *
-   * @param dictionary HunspellDictionary that will be used to create the stems
-   * @param recursionCap maximum level of recursion stemmer can go into
-   */
-  public HunspellStemmer(HunspellDictionary dictionary, int recursionCap) {
-    this.dictionary = dictionary;
-    this.recursionCap = recursionCap;
-  } 
-  
-  /**
-   * Find the stem(s) of the provided word
-   * 
-   * @param word Word to find the stems for
-   * @return List of stems for the word
-   */
-  public List<Stem> stem(String word) {
-    return stem(word.toCharArray(), word.length());
-  }
-
-  /**
-   * Find the stem(s) of the provided word
-   * 
-   * @param word Word to find the stems for
-   * @return List of stems for the word
-   */
-  public List<Stem> stem(char word[], int length) {
-    List<Stem> stems = new ArrayList<Stem>();
-    if (dictionary.lookupWord(word, 0, length) != null) {
-      stems.add(new Stem(word, length));
-    }
-    stems.addAll(stem(word, length, null, 0));
-    return stems;
-  }
-  
-  /**
-   * Find the unique stem(s) of the provided word
-   * 
-   * @param word Word to find the stems for
-   * @return List of stems for the word
-   */
-  public List<Stem> uniqueStems(char word[], int length) {
-    List<Stem> stems = new ArrayList<Stem>();
-    CharArraySet terms = new CharArraySet(dictionary.getVersion(), 8, dictionary.isIgnoreCase());
-    if (dictionary.lookupWord(word, 0, length) != null) {
-      stems.add(new Stem(word, length));
-      terms.add(word);
-    }
-    List<Stem> otherStems = stem(word, length, null, 0);
-    for (Stem s : otherStems) {
-      if (!terms.contains(s.stem)) {
-        stems.add(s);
-        terms.add(s.stem);
-      }
-    }
-    return stems;
-  }
-
-  // ================================================= Helper Methods ================================================
-
-  /**
-   * Generates a list of stems for the provided word
-   *
-   * @param word Word to generate the stems for
-   * @param flags Flags from a previous stemming step that need to be cross-checked with any affixes in this recursive step
-   * @param recursionDepth Level of recursion this stemming step is at
-   * @return List of stems, pr an empty if no stems are found
-   */
-  private List<Stem> stem(char word[], int length, char[] flags, int recursionDepth) {
-    List<Stem> stems = new ArrayList<Stem>();
-
-    for (int i = 0; i < length; i++) {
-      List<HunspellAffix> suffixes = dictionary.lookupSuffix(word, i, length - i);
-      if (suffixes == null) {
-        continue;
-      }
-
-      for (HunspellAffix suffix : suffixes) {
-        if (hasCrossCheckedFlag(suffix.getFlag(), flags)) {
-          int deAffixedLength = length - suffix.getAppend().length();
-          // TODO: can we do this in-place?
-          String strippedWord = new StringBuilder().append(word, 0, deAffixedLength).append(suffix.getStrip()).toString();
-
-          List<Stem> stemList = applyAffix(strippedWord.toCharArray(), strippedWord.length(), suffix, recursionDepth);
-          for (Stem stem : stemList) {
-            stem.addSuffix(suffix);
-          }
-
-          stems.addAll(stemList);
-        }
-      }
-    }
-
-    for (int i = length - 1; i >= 0; i--) {
-      List<HunspellAffix> prefixes = dictionary.lookupPrefix(word, 0, i);
-      if (prefixes == null) {
-        continue;
-      }
-
-      for (HunspellAffix prefix : prefixes) {
-        if (hasCrossCheckedFlag(prefix.getFlag(), flags)) {
-          int deAffixedStart = prefix.getAppend().length();
-          int deAffixedLength = length - deAffixedStart;
-
-          String strippedWord = new StringBuilder().append(prefix.getStrip())
-              .append(word, deAffixedStart, deAffixedLength)
-              .toString();
-
-          List<Stem> stemList = applyAffix(strippedWord.toCharArray(), strippedWord.length(), prefix, recursionDepth);
-          for (Stem stem : stemList) {
-            stem.addPrefix(prefix);
-          }
-
-          stems.addAll(stemList);
-        }
-      }
-    }
-
-    return stems;
-  }
-
-  /**
-   * Applies the affix rule to the given word, producing a list of stems if any are found
-   *
-   * @param strippedWord Word the affix has been removed and the strip added
-   * @param affix HunspellAffix representing the affix rule itself
-   * @param recursionDepth Level of recursion this stemming step is at
-   * @return List of stems for the word, or an empty list if none are found
-   */
-  @SuppressWarnings("unchecked")
-  public List<Stem> applyAffix(char strippedWord[], int length, HunspellAffix affix, int recursionDepth) {
-    if(dictionary.isIgnoreCase()) {
-      charUtils.toLowerCase(strippedWord, 0, strippedWord.length);
-    }
-    segment.setLength(0);
-    segment.append(strippedWord, 0, length);
-    if (!affix.checkCondition(segment)) {
-      return Collections.EMPTY_LIST;
-    }
-
-    List<Stem> stems = new ArrayList<Stem>();
-
-    List<HunspellWord> words = dictionary.lookupWord(strippedWord, 0, length);
-    if (words != null) {
-      for (HunspellWord hunspellWord : words) {
-        if (hunspellWord.hasFlag(affix.getFlag())) {
-          stems.add(new Stem(strippedWord, length));
-        }
-      }
-    }
-
-    if (affix.isCrossProduct() && recursionDepth < recursionCap) {
-      stems.addAll(stem(strippedWord, length, affix.getAppendFlags(), ++recursionDepth));
-    }
-
-    return stems;
-  }
-
-  /**
-   * Checks if the given flag cross checks with the given array of flags
-   *
-   * @param flag Flag to cross check with the array of flags
-   * @param flags Array of flags to cross check against.  Can be {@code null}
-   * @return {@code true} if the flag is found in the array or the array is {@code null}, {@code false} otherwise
-   */
-  private boolean hasCrossCheckedFlag(char flag, char[] flags) {
-    return flags == null || Arrays.binarySearch(flags, flag) >= 0;
-  }
-
-  /**
-   * Stem represents all information known about a stem of a word.  This includes the stem, and the prefixes and suffixes
-   * that were used to change the word into the stem.
-   */
-  public static class Stem {
-
-    private final List<HunspellAffix> prefixes = new ArrayList<HunspellAffix>();
-    private final List<HunspellAffix> suffixes = new ArrayList<HunspellAffix>();
-    private final char stem[];
-    private final int stemLength;
-
-    /**
-     * Creates a new Stem wrapping the given word stem
-     *
-     * @param stem Stem of a word
-     */
-    public Stem(char stem[], int stemLength) {
-      this.stem = stem;
-      this.stemLength = stemLength;
-    }
-
-    /**
-     * Adds a prefix to the list of prefixes used to generate this stem.  Because it is assumed that prefixes are added
-     * depth first, the prefix is added to the front of the list
-     *
-     * @param prefix Prefix to add to the list of prefixes for this stem
-     */
-    public void addPrefix(HunspellAffix prefix) {
-      prefixes.add(0, prefix);
-    }
-
-    /**
-     * Adds a suffix to the list of suffixes used to generate this stem.  Because it is assumed that suffixes are added
-     * depth first, the suffix is added to the end of the list
-     *
-     * @param suffix Suffix to add to the list of suffixes for this stem
-     */
-    public void addSuffix(HunspellAffix suffix) {
-      suffixes.add(suffix);
-    }
-
-    /**
-     * Returns the list of prefixes used to generate the stem
-     *
-     * @return List of prefixes used to generate the stem or an empty list if no prefixes were required
-     */
-    public List<HunspellAffix> getPrefixes() {
-      return prefixes;
-    }
-
-    /**
-     * Returns the list of suffixes used to generate the stem
-     *
-     * @return List of suffixes used to generate the stem or an empty list if no suffixes were required
-     */
-    public List<HunspellAffix> getSuffixes() {
-      return suffixes;
-    }
-
-    /**
-     * Returns the actual word stem itself
-     *
-     * @return Word stem itself
-     */
-    public char[] getStem() {
-      return stem;
-    }
-
-    /**
-     * @return the stemLength
-     */
-    public int getStemLength() {
-      return stemLength;
-    }
-    
-    public String getStemString() {
-      return new String(stem, 0, stemLength);
-    }
-    
-  }
-
-
-  // ================================================= Entry Point ===================================================
-
-  /*
-   * HunspellStemmer entry point.  Accepts two arguments: location of affix file and location of dic file
-   *
-   * @param args Program arguments.  Should contain location of affix file and location of dic file
-   * @throws IOException Can be thrown while reading from the files
-   * @throws ParseException Can be thrown while parsing the files
-  public static void main(String[] args) throws IOException, ParseException {
-    boolean ignoreCase = false;
-    int offset = 0;
-    
-    if (args.length < 2) {
-      System.out.println("usage: HunspellStemmer [-i] <affix location> <dic location>");
-      System.exit(1);
-    }
-
-    if(args[offset].equals("-i")) {
-      ignoreCase = true;
-      System.out.println("Ignoring case. All stems will be returned lowercased");
-      offset++;
-    }
-    
-    InputStream affixInputStream = new FileInputStream(args[offset++]);
-    InputStream dicInputStream = new FileInputStream(args[offset++]);
-
-    // :Post-Release-Update-Version.LUCENE_XY:
-    HunspellDictionary dictionary = new HunspellDictionary(affixInputStream, dicInputStream, Version.LUCENE_50, ignoreCase);
-
-    affixInputStream.close();
-    dicInputStream.close();
-    
-    HunspellStemmer stemmer = new HunspellStemmer(dictionary);
-
-    Scanner scanner = new Scanner(System.in, Charset.defaultCharset().name());
-    
-    System.out.print("> ");
-    while (scanner.hasNextLine()) {
-      String word = scanner.nextLine();
-      
-      if ("exit".equals(word)) {
-        break;
-      }
-
-      printStemResults(word, stemmer.stem(word.toCharArray(), word.length()));
-      
-      System.out.print("> ");
-    }
-  }
-
-   * Prints the results of the stemming of a word
-   *
-   * @param originalWord Word that has been stemmed
-   * @param stems Stems of the word
-  private static void printStemResults(String originalWord, List<Stem> stems) {
-    StringBuilder builder = new StringBuilder().append("stem(").append(originalWord).append(")").append("\n");
-
-    for (Stem stem : stems) {
-      builder.append("- ").append(stem.getStem()).append(": ");
-
-      for (HunspellAffix prefix : stem.getPrefixes()) {
-        builder.append(prefix.getAppend()).append("+");
-
-        if (hasText(prefix.getStrip())) {
-          builder.append(prefix.getStrip()).append("-");
-        }
-      }
-
-      builder.append(stem.getStem());
-
-      for (HunspellAffix suffix : stem.getSuffixes()) {
-        if (hasText(suffix.getStrip())) {
-          builder.append("-").append(suffix.getStrip());
-        }
-        
-        builder.append("+").append(suffix.getAppend());
-      }
-      builder.append("\n");
-    }
-
-    System.out.println(builder);
-  }
-
-   * Simple utility to check if the given String has any text
-   *
-   * @param str String to check if it has any text
-   * @return {@code true} if the String has text, {@code false} otherwise
-  private static boolean hasText(String str) {
-    return str != null && str.length() > 0;
-  }
-  */
-}
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellWord.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellWord.java
@ -1,63 +0,0 @@
-package org.apache.lucene.analysis.hunspell;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.Arrays;
-
-/**
- * A dictionary (.dic) entry with its associated flags.
- */
-public class HunspellWord {
-  
-  private final char flags[]; // sorted, can we represent more concisely?
-
-  /**
-   * Creates a new HunspellWord with no associated flags
-   */
-  public HunspellWord() {
-    flags = null;
-  }
-
-  /**
-   * Constructs a new HunspellWord with the given flags
-   *
-   * @param flags Flags to associate with the word
-   */
-  public HunspellWord(char[] flags) {
-    this.flags = flags;
-  }
-
-  /**
-   * Checks whether the word has the given flag associated with it
-   *
-   * @param flag Flag to check whether it is associated with the word
-   * @return {@code true} if the flag is associated, {@code false} otherwise
-   */
-  public boolean hasFlag(char flag) {
-    return flags != null && Arrays.binarySearch(flags, flag) >= 0;
-  }
-
-  /**
-   * Returns the flags associated with the word
-   *
-   * @return Flags associated with the word
-   */
-  public char[] getFlags() {
-    return flags;
-  }
-}
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/ISO8859_14Decoder.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/ISO8859_14Decoder.java
@ -1,4 +1,4 @@
-package org.apache.lucene.analysis.hunspell2;
+package org.apache.lucene.analysis.hunspell;

 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/Stemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/Stemmer.java
@ -1,4 +1,4 @@
-package org.apache.lucene.analysis.hunspell2;
+package org.apache.lucene.analysis.hunspell;

 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
@ -24,6 +24,7 @@ import java.util.List;
 import java.util.regex.Pattern;

 import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.CharacterUtils;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
@ -37,9 +38,10 @@ import org.apache.lucene.util.Version;
 final class Stemmer {
  private final int recursionCap;
  private final Dictionary dictionary;
-  private BytesRef scratch = new BytesRef();
+  private final BytesRef scratch = new BytesRef();
  private final StringBuilder segment = new StringBuilder();
  private final ByteArrayDataInput affixReader;
+  private final CharacterUtils charUtils = CharacterUtils.getInstance(Version.LUCENE_CURRENT);

  /**
   * Constructs a new Stemmer which will use the provided Dictionary to create its stems. Uses the 
@ -80,6 +82,9 @@ final class Stemmer {
   * @return List of stems for the word
   */
  public List<CharsRef> stem(char word[], int length) {
+    if (dictionary.ignoreCase) {
+      charUtils.toLowerCase(word, 0, length);
+    }
    List<CharsRef> stems = new ArrayList<CharsRef>();
    if (dictionary.lookupWord(word, 0, length, scratch) != null) {
      stems.add(new CharsRef(word, 0, length));
@ -95,20 +100,19 @@ final class Stemmer {
   * @return List of stems for the word
   */
  public List<CharsRef> uniqueStems(char word[], int length) {
-    List<CharsRef> stems = new ArrayList<CharsRef>();
-    CharArraySet terms = new CharArraySet(Version.LUCENE_CURRENT, 8, false);
-    if (dictionary.lookupWord(word, 0, length, scratch) != null) {
-      stems.add(new CharsRef(word, 0, length));
-      terms.add(word);
+    List<CharsRef> stems = stem(word, length);
+    if (stems.size() < 2) {
+      return stems;
    }
-    List<CharsRef> otherStems = stem(word, length, Dictionary.NOFLAGS, 0);
-    for (CharsRef s : otherStems) {
+    CharArraySet terms = new CharArraySet(Version.LUCENE_CURRENT, 8, dictionary.ignoreCase);
+    List<CharsRef> deduped = new ArrayList<>();
+    for (CharsRef s : stems) {
      if (!terms.contains(s)) {
-        stems.add(s);
+        deduped.add(s);
        terms.add(s);
      }
    }
-    return stems;
+    return deduped;
  }

  // ================================================= Helper Methods ================================================
@ -188,7 +192,7 @@ final class Stemmer {
   * @param recursionDepth Level of recursion this stemming step is at
   * @return List of stems for the word, or an empty list if none are found
   */
-  public List<CharsRef> applyAffix(char strippedWord[], int length, int affix, int recursionDepth) {
+  List<CharsRef> applyAffix(char strippedWord[], int length, int affix, int recursionDepth) {
    segment.setLength(0);
    segment.append(strippedWord, 0, length);
    
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/Hunspell2StemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/Hunspell2StemFilter.java
@ -1,137 +0,0 @@
-package org.apache.lucene.analysis.hunspell2;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.util.CharsRef;
-
-/**
- * TokenFilter that uses hunspell affix rules and words to stem tokens.  Since hunspell supports a word having multiple
- * stems, this filter can emit multiple tokens for each consumed token
- *
- * <p>
- * Note: This filter is aware of the {@link KeywordAttribute}. To prevent
- * certain terms from being passed to the stemmer
- * {@link KeywordAttribute#isKeyword()} should be set to <code>true</code>
- * in a previous {@link TokenStream}.
- *
- * Note: For including the original term as well as the stemmed version, see
- * {@link org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory}
- * </p>
- *
- * @lucene.experimental
- */
-public final class Hunspell2StemFilter extends TokenFilter {
-  
-  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-  private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
-  private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
-  private final Stemmer stemmer;
-  
-  private List<CharsRef> buffer;
-  private State savedState;
-  
-  private final boolean dedup;
-
-  /** Create a {@link Hunspell2StemFilter} which deduplicates stems and has a maximum
-   *  recursion level of 2. 
-   *  @see #Hunspell2StemFilter(TokenStream, Dictionary, int) */
-  public Hunspell2StemFilter(TokenStream input, Dictionary dictionary) {
-    this(input, dictionary, 2);
-  }
-
-  /**
-   * Creates a new Hunspell2StemFilter that will stem tokens from the given TokenStream using affix rules in the provided
-   * Dictionary
-   *
-   * @param input TokenStream whose tokens will be stemmed
-   * @param dictionary HunspellDictionary containing the affix rules and words that will be used to stem the tokens
-   * @param recursionCap maximum level of recursion stemmer can go into, defaults to <code>2</code>
-   */
-  public Hunspell2StemFilter(TokenStream input, Dictionary dictionary, int recursionCap) {
-    this(input, dictionary, true, recursionCap);
-  }
-
-  /** Create a {@link Hunspell2StemFilter} which has a maximum recursion level of 2. 
-   *  @see #Hunspell2StemFilter(TokenStream, Dictionary, boolean, int) */
-  public Hunspell2StemFilter(TokenStream input, Dictionary dictionary, boolean dedup) {
-    this(input, dictionary, dedup, 2);
-  }
-
-  /**
-   * Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using affix rules in the provided
-   * Dictionary
-   *
-   * @param input TokenStream whose tokens will be stemmed
-   * @param dictionary HunspellDictionary containing the affix rules and words that will be used to stem the tokens
-   * @param dedup true if only unique terms should be output.
-   * @param recursionCap maximum level of recursion stemmer can go into, defaults to <code>2</code>
-   */
-  public Hunspell2StemFilter(TokenStream input, Dictionary dictionary, boolean dedup, int recursionCap) {
-    super(input);
-    this.dedup = dedup;
-    this.stemmer = new Stemmer(dictionary, recursionCap);
-  }
-
-  @Override
-  public boolean incrementToken() throws IOException {
-    if (buffer != null && !buffer.isEmpty()) {
-      CharsRef nextStem = buffer.remove(0);
-      restoreState(savedState);
-      posIncAtt.setPositionIncrement(0);
-      termAtt.setEmpty().append(nextStem);
-      return true;
-    }
-    
-    if (!input.incrementToken()) {
-      return false;
-    }
-    
-    if (keywordAtt.isKeyword()) {
-      return true;
-    }
-    
-    buffer = dedup ? stemmer.uniqueStems(termAtt.buffer(), termAtt.length()) : stemmer.stem(termAtt.buffer(), termAtt.length());
-
-    if (buffer.isEmpty()) { // we do not know this word, return it unchanged
-      return true;
-    }     
-
-    CharsRef stem = buffer.remove(0);
-    termAtt.setEmpty().append(stem);
-
-    if (!buffer.isEmpty()) {
-      savedState = captureState();
-    }
-
-    return true;
-  }
-
-  @Override
-  public void reset() throws IOException {
-    super.reset();
-    buffer = null;
-  }
-}
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/Hunspell2StemFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/Hunspell2StemFilterFactory.java
@ -1,80 +0,0 @@
-package org.apache.lucene.analysis.hunspell2;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.text.ParseException;
-import java.util.Map;
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.ResourceLoader;
-import org.apache.lucene.analysis.util.ResourceLoaderAware;
-import org.apache.lucene.analysis.util.TokenFilterFactory;
-
-/**
- * TokenFilterFactory that creates instances of {@link Hunspell2StemFilter}.
- * Example config for British English:
- * <pre class="prettyprint">
- * &lt;filter class=&quot;solr.Hunspell2StemFilterFactory&quot;
- *         dictionary=&quot;en_GB.dic&quot;
- *         affix=&quot;en_GB.aff&quot; /&gt;</pre>
- * Both parameters dictionary and affix are mandatory.
- * Dictionaries for many languages are available through the OpenOffice project.
- * 
- * See <a href="http://wiki.apache.org/solr/Hunspell">http://wiki.apache.org/solr/Hunspell</a>
- * @lucene.experimental
- */
-public class Hunspell2StemFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
-  private static final String PARAM_DICTIONARY    = "dictionary";
-  private static final String PARAM_AFFIX         = "affix";
-  private static final String PARAM_RECURSION_CAP = "recursionCap";
-
-  private final String dictionaryFile;
-  private final String affixFile;
-  private Dictionary dictionary;
-  private int recursionCap;
-  
-  /** Creates a new Hunspell2StemFilterFactory */
-  public Hunspell2StemFilterFactory(Map<String,String> args) {
-    super(args);
-    dictionaryFile = require(args, PARAM_DICTIONARY);
-    affixFile = get(args, PARAM_AFFIX);
-    recursionCap = getInt(args, PARAM_RECURSION_CAP, 2);
-    if (!args.isEmpty()) {
-      throw new IllegalArgumentException("Unknown parameters: " + args);
-    }
-  }
-
-  @Override
-  public void inform(ResourceLoader loader) throws IOException {
-    try (InputStream affix = loader.openResource(affixFile);
-        InputStream dictionary = loader.openResource(dictionaryFile)) {
-      try {
-        this.dictionary = new Dictionary(affix, dictionary);
-      } catch (ParseException e) {
-        throw new RuntimeException(e);
-      }
-    }
-  }
-
-  @Override
-  public TokenStream create(TokenStream tokenStream) {
-    return new Hunspell2StemFilter(tokenStream, dictionary, recursionCap);
-  }
-}
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/package.html
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell2/package.html
@ -1,26 +0,0 @@
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
-  -->
-<html>
-<body>
-Stemming TokenFilter using a Java implementation of the <a href="http://www.ldc.upenn.edu/Catalog/docs/LDC2008T01/acta04.pdf">
-Hunspell stemming algorithm.</a>
-<p>
-Dictionaries can be found on <a href="http://wiki.services.openoffice.org/wiki/Dictionaries">
-OpenOffice's wiki</a>
-</p>
-</body>
-</html>
--- a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
+++ b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
@ -51,7 +51,6 @@ org.apache.lucene.analysis.hi.HindiNormalizationFilterFactory
 org.apache.lucene.analysis.hi.HindiStemFilterFactory
 org.apache.lucene.analysis.hu.HungarianLightStemFilterFactory
 org.apache.lucene.analysis.hunspell.HunspellStemFilterFactory
-org.apache.lucene.analysis.hunspell2.Hunspell2StemFilterFactory
 org.apache.lucene.analysis.id.IndonesianStemFilterFactory
 org.apache.lucene.analysis.in.IndicNormalizationFilterFactory
 org.apache.lucene.analysis.it.ItalianLightStemFilterFactory
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
@ -62,8 +62,8 @@ import org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter;
 import org.apache.lucene.analysis.compound.HyphenationCompoundWordTokenFilter;
 import org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter;
 import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
-import org.apache.lucene.analysis.hunspell.HunspellDictionary;
-import org.apache.lucene.analysis.hunspell.HunspellDictionaryTest;
+import org.apache.lucene.analysis.hunspell.Dictionary;
+import org.apache.lucene.analysis.hunspell.TestHunspellStemFilter;
 import org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter;
 import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
 import org.apache.lucene.analysis.miscellaneous.LimitTokenPositionFilter;
@ -406,13 +406,13 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
        return new IdentityEncoder(); // the other encoders will throw exceptions if tokens arent numbers?
      }
    });
-    put(HunspellDictionary.class, new ArgProducer() {
+    put(Dictionary.class, new ArgProducer() {
      @Override public Object create(Random random) {
        // TODO: make nastier
-        InputStream affixStream = HunspellDictionaryTest.class.getResourceAsStream("test.aff");
-        InputStream dictStream = HunspellDictionaryTest.class.getResourceAsStream("test.dic");
+        InputStream affixStream = TestHunspellStemFilter.class.getResourceAsStream("simple.aff");
+        InputStream dictStream = TestHunspellStemFilter.class.getResourceAsStream("simple.dic");
        try {
-         return new HunspellDictionary(affixStream, dictStream, TEST_VERSION_CURRENT);
+         return new Dictionary(affixStream, dictStream);
        } catch (Exception ex) {
          Rethrow.rethrow(ex);
          return null; // unreachable code
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellDictionaryTest.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellDictionaryTest.java
@ -1,201 +0,0 @@
-package org.apache.lucene.analysis.hunspell;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.text.ParseException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.lucene.util.LuceneTestCase;
-import org.junit.Assert;
-import org.junit.Test;
-
-public class HunspellDictionaryTest extends LuceneTestCase {
-  
-  private class CloseCheckInputStream extends InputStream {
-    private InputStream delegate;
-    
-    private boolean closed = false;
-
-    public CloseCheckInputStream(InputStream delegate) {
-      super();
-      this.delegate = delegate;
-    }
-
-    @Override
-    public int read() throws IOException {
-      return delegate.read();
-    }
-
-    @Override
-    public int hashCode() {
-      return delegate.hashCode();
-    }
-
-    @Override
-    public int read(byte[] b) throws IOException {
-      return delegate.read(b);
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      return delegate.equals(obj);
-    }
-
-    @Override
-    public int read(byte[] b, int off, int len) throws IOException {
-      return delegate.read(b, off, len);
-    }
-
-    @Override
-    public long skip(long n) throws IOException {
-      return delegate.skip(n);
-    }
-
-    @Override
-    public String toString() {
-      return delegate.toString();
-    }
-
-    @Override
-    public int available() throws IOException {
-      return delegate.available();
-    }
-
-    @Override
-    public void close() throws IOException {
-      this.closed = true;
-      delegate.close();
-    }
-
-    @Override
-    public void mark(int readlimit) {
-      delegate.mark(readlimit);
-    }
-
-    @Override
-    public void reset() throws IOException {
-      delegate.reset();
-    }
-
-    @Override
-    public boolean markSupported() {
-      return delegate.markSupported();
-    }
-    
-    public boolean isClosed() {
-      return this.closed;
-    }
-    
-  }
-
-  @Test
-  public void testResourceCleanup() throws IOException, ParseException {
-    CloseCheckInputStream affixStream = new CloseCheckInputStream(getClass().getResourceAsStream("testCompressed.aff"));
-    CloseCheckInputStream dictStream = new CloseCheckInputStream(getClass().getResourceAsStream("testCompressed.dic"));
-    
-    new HunspellDictionary(affixStream, dictStream, TEST_VERSION_CURRENT);
-    
-    assertFalse(affixStream.isClosed());
-    assertFalse(dictStream.isClosed());
-    
-    affixStream.close();
-    dictStream.close();
-    
-    assertTrue(affixStream.isClosed());
-    assertTrue(dictStream.isClosed());
-  }
-
-  @Test
-  public void testHunspellDictionary_loadDicAff() throws IOException, ParseException {
-    InputStream affixStream = getClass().getResourceAsStream("test.aff");
-    InputStream dictStream = getClass().getResourceAsStream("test.dic");
-
-    HunspellDictionary dictionary = new HunspellDictionary(affixStream, dictStream, TEST_VERSION_CURRENT);
-    assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).size());
-    assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).size());
-    assertEquals(1, dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3).size());
-    assertEquals("Wrong number of flags for lucen", 1, dictionary.lookupWord(new char[]{'l', 'u', 'c', 'e', 'n'}, 0, 5).get(0).getFlags().length);
-
-    affixStream.close();
-    dictStream.close();
-  }
-
-  @Test
-  public void testHunspellDictionary_multipleDictWithOverride() throws IOException, ParseException {
-    InputStream affixStream = getClass().getResourceAsStream("test.aff");
-    List<InputStream> dictStreams = new ArrayList<InputStream>();
-    dictStreams.add(getClass().getResourceAsStream("test.dic"));
-    dictStreams.add(getClass().getResourceAsStream("testOverride.dic"));
-
-    HunspellDictionary dictionary = new HunspellDictionary(affixStream, dictStreams, TEST_VERSION_CURRENT, false);
-    assertEquals("Wrong number of flags for lucen", 3, dictionary.lookupWord(new char[]{'l', 'u', 'c', 'e', 'n'}, 0, 5).get(0).getFlags().length);
-    assertEquals("Wrong number of flags for bar", 1, dictionary.lookupWord(new char[]{'b', 'a', 'r'}, 0, 3).get(0).getFlags().length);
-
-    affixStream.close();
-    for(InputStream dstream : dictStreams) {
-      dstream.close();
-    }
-  }
-
-  @Test
-  public void testCompressedHunspellDictionary_loadDicAff() throws IOException, ParseException {
-    InputStream affixStream = getClass().getResourceAsStream("testCompressed.aff");
-    InputStream dictStream = getClass().getResourceAsStream("testCompressed.dic");
-
-    HunspellDictionary dictionary = new HunspellDictionary(affixStream, dictStream, TEST_VERSION_CURRENT);
-    assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).size());
-    assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).size());
-    assertEquals(1, dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3).size());
-    
-    affixStream.close();
-    dictStream.close();
-  }
-
-  @Test
-  public void testHunspellDictionary_loadDicWrongAff() throws IOException, ParseException {
-    InputStream affixStream = getClass().getResourceAsStream("testWrongAffixRule.aff");
-    InputStream dictStream = getClass().getResourceAsStream("test.dic");
-
-    HunspellDictionary dictionary = new HunspellDictionary(affixStream, Arrays.asList(dictStream), TEST_VERSION_CURRENT, false, false);
-    assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).size());
-    assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).size());
-    assertEquals(1, dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3).size());
-    //strict parsing disabled: malformed rule is not loaded
-    assertNull(dictionary.lookupPrefix(new char[]{'a'}, 0, 1));    
-    affixStream.close();
-    dictStream.close();
-
-    affixStream = getClass().getResourceAsStream("testWrongAffixRule.aff");
-    dictStream = getClass().getResourceAsStream("test.dic");
-    //strict parsing enabled: malformed rule causes ParseException
-    try {
-      dictionary = new HunspellDictionary(affixStream, Arrays.asList(dictStream), TEST_VERSION_CURRENT, false, true);
-      Assert.fail();
-    } catch(ParseException e) {
-      Assert.assertEquals("The affix file contains a rule with less than five elements", e.getMessage());
-      Assert.assertEquals(23, e.getErrorOffset());
-    }
-    
-    affixStream.close();
-    dictStream.close();
-  }
-}
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java
@ -1,92 +0,0 @@
-package org.apache.lucene.analysis.hunspell;
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.text.ParseException;
-import java.util.Arrays;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.TestUtil;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-
-public class HunspellStemFilterTest  extends BaseTokenStreamTestCase {
-  
-  private static HunspellDictionary DICTIONARY;
-  @BeforeClass
-  public static void beforeClass() throws IOException, ParseException {
-    DICTIONARY = createDict(true);
-  }
-  @AfterClass
-  public static void afterClass() {
-    DICTIONARY = null;
-  }
-  public static HunspellDictionary createDict(boolean ignoreCase) throws IOException, ParseException {
-    InputStream affixStream = HunspellStemmerTest.class.getResourceAsStream("test.aff");
-    InputStream dictStream = HunspellStemmerTest.class.getResourceAsStream("test.dic");
-
-    return new HunspellDictionary(affixStream, dictStream, TEST_VERSION_CURRENT, ignoreCase);
-  }
-  
-  /**
-   * Simple test for KeywordAttribute
-   */
-  public void testKeywordAttribute() throws IOException {
-    MockTokenizer tokenizer = whitespaceMockTokenizer("lucene is awesome");
-    tokenizer.setEnableChecks(true);
-    HunspellStemFilter filter = new HunspellStemFilter(tokenizer, DICTIONARY, TestUtil.nextInt(random(), 1, 3));
-    assertTokenStreamContents(filter, new String[]{"lucene", "lucen", "is", "awesome"}, new int[] {1, 0, 1, 1});
-    
-    // assert with keywork marker
-    tokenizer = whitespaceMockTokenizer("lucene is awesome");
-    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList("Lucene"), true);
-    filter = new HunspellStemFilter(new SetKeywordMarkerFilter(tokenizer, set), DICTIONARY, TestUtil.nextInt(random(), 1, 3));
-    assertTokenStreamContents(filter, new String[]{"lucene", "is", "awesome"}, new int[] {1, 1, 1});
-  }
-  
-  /** blast some random strings through the analyzer */
-  public void testRandomStrings() throws Exception {
-    Analyzer analyzer = new Analyzer() {
-
-      @Override
-      protected TokenStreamComponents createComponents(String fieldName) {
-        Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
-        return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, DICTIONARY, TestUtil.nextInt(random(), 1, 3)));
-      }  
-    };
-    checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
-  }
-  
-  public void testEmptyTerm() throws IOException {
-    Analyzer a = new Analyzer() {
-      @Override
-      protected TokenStreamComponents createComponents(String fieldName) {
-        Tokenizer tokenizer = new KeywordTokenizer();
-        return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, DICTIONARY, TestUtil.nextInt(random(), 1, 3)));
-      }
-    };
-    checkOneTerm(a, "", "");
-  }
-}
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemmerTest.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemmerTest.java
@ -1,137 +0,0 @@
-package org.apache.lucene.analysis.hunspell;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.Version;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.text.ParseException;
-import java.util.List;
-
-import static junit.framework.Assert.assertEquals;
-
-public class HunspellStemmerTest extends LuceneTestCase {
-
-  private static HunspellStemmer stemmer;
-
-  @BeforeClass
-  public static void beforeClass() throws IOException, ParseException {
-    createStemmer(true);
-  }
-  
-  @AfterClass
-  public static void afterClass() {
-    stemmer = null;
-  }
-
-  @Test
-  public void testStem_simpleSuffix() {
-    List<HunspellStemmer.Stem> stems = stemmer.stem("lucene");
-
-    assertEquals(2, stems.size());
-    assertEquals("lucene", stems.get(0).getStemString());
-    assertEquals("lucen", stems.get(1).getStemString());
-
-    stems = stemmer.stem("mahoute");
-    assertEquals(1, stems.size());
-    assertEquals("mahout", stems.get(0).getStemString());
-  }
-
-  @Test
-  public void testStem_simplePrefix() {
-    List<HunspellStemmer.Stem> stems = stemmer.stem("solr");
-
-    assertEquals(1, stems.size());
-    assertEquals("olr", stems.get(0).getStemString());
-  }
-
-  @Test
-  public void testStem_recursiveSuffix() {
-    List<HunspellStemmer.Stem> stems = stemmer.stem("abcd");
-
-    assertEquals(1, stems.size());
-    assertEquals("ab", stems.get(0).getStemString());
-  }
-
-  @Test
-  public void testStem_ignoreCase() throws IOException, ParseException {
-    List<HunspellStemmer.Stem> stems;
-    createStemmer(true);
-
-    stems = stemmer.stem("apache");
-    assertEquals(1, stems.size());
-    assertEquals("apach", stems.get(0).getStemString());
-
-    stems = stemmer.stem("APACHE");
-    assertEquals(1, stems.size());
-    assertEquals("apach", stems.get(0).getStemString());
-
-    stems = stemmer.stem("Apache");
-    assertEquals(1, stems.size());
-    assertEquals("apach", stems.get(0).getStemString());
-    
-    stems = stemmer.stem("foos");
-    assertEquals(1, stems.size());
-    assertEquals("foo", stems.get(0).getStemString());
-    
-    stems = stemmer.stem("mood");
-    assertEquals(1, stems.size());
-    assertEquals("moo", stems.get(0).getStemString());
-    
-    stems = stemmer.stem("Foos");
-    assertEquals(1, stems.size());
-    assertEquals("foo", stems.get(0).getStemString());
-
-    // The "Foo" rule gets overridden by the "foo" rule, and we don't merge
-    stems = stemmer.stem("Food");
-    assertEquals(0, stems.size());
-
-    stems = stemmer.stem("Mood");
-    assertEquals(1, stems.size());
-    assertEquals("moo", stems.get(0).getStemString());
-  }
-
-  @Test
-  public void testStem_caseSensitive() throws IOException, ParseException {
-    createStemmer(false);
-    List<HunspellStemmer.Stem> stems = stemmer.stem("apache");
-    assertEquals(0, stems.size());
-
-    stems = stemmer.stem("Apache");
-    assertEquals(1, stems.size());
-    assertEquals("Apach", stems.get(0).getStemString());
-  }
-
-  
-  private static void createStemmer(boolean ignoreCase) throws IOException, ParseException {
-    InputStream affixStream = HunspellStemmerTest.class.getResourceAsStream("test.aff");
-    InputStream dictStream = HunspellStemmerTest.class.getResourceAsStream("test.dic");
-
-    HunspellDictionary dictionary = new HunspellDictionary(affixStream, dictStream, TEST_VERSION_CURRENT, ignoreCase);
-    stemmer = new HunspellStemmer(dictionary);
-
-    affixStream.close();
-    dictStream.close();
-  }
-
-}
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/TestAllDictionaries.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/TestAllDictionaries.java
@ -1,4 +1,4 @@
-package org.apache.lucene.analysis.hunspell2;
+package org.apache.lucene.analysis.hunspell;

 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
@ -22,7 +22,7 @@ import java.io.InputStream;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipFile;

-import org.apache.lucene.analysis.hunspell.HunspellDictionary;
+import org.apache.lucene.analysis.hunspell.Dictionary;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.RamUsageEstimator;
@ -33,7 +33,7 @@ import org.junit.Ignore;
 * wget --mirror -np http://archive.services.openoffice.org/pub/mirror/OpenOffice.org/contrib/dictionaries/
 * Note some of the files differ only in case. This may be a problem on your operating system!
 */
-//@Ignore("enable manually")
+@Ignore("enable manually")
 public class TestAllDictionaries extends LuceneTestCase {
  
  // set this to the location of where you downloaded all the files
@ -162,21 +162,11 @@ public class TestAllDictionaries extends LuceneTestCase {
        assert dicEntry != null;
        ZipEntry affEntry = zip.getEntry(tests[i+2]);
        assert affEntry != null;
-        
-        // get ram from previous impl
-        String oldRAM = "FAIL";
-        try (InputStream dictionary = zip.getInputStream(dicEntry);
-            InputStream affix = zip.getInputStream(affEntry)) {
-          try {
-            HunspellDictionary dic = new HunspellDictionary(affix, dictionary, TEST_VERSION_CURRENT);
-            oldRAM = RamUsageEstimator.humanSizeOf(dic);
-          } catch (Throwable t) {}
-       }
      
        try (InputStream dictionary = zip.getInputStream(dicEntry);
             InputStream affix = zip.getInputStream(affEntry)) {
          Dictionary dic = new Dictionary(affix, dictionary);
-          System.out.println(tests[i] + "\t" + oldRAM + "\t" + RamUsageEstimator.humanSizeOf(dic) + "\t(" +
+          System.out.println(tests[i] + "\t" + RamUsageEstimator.humanSizeOf(dic) + "\t(" +
                             "words=" + RamUsageEstimator.humanSizeOf(dic.words) + ", " +
                             "flags=" + RamUsageEstimator.humanSizeOf(dic.flagLookup) + ", " +
                             "strips=" + RamUsageEstimator.humanSizeOf(dic.stripLookup) + ", " +
@ -204,7 +194,7 @@ public class TestAllDictionaries extends LuceneTestCase {
        
          try (InputStream dictionary = zip.getInputStream(dicEntry);
               InputStream affix = zip.getInputStream(affEntry)) {
-              Dictionary dic = new Dictionary(affix, dictionary);
+              new Dictionary(affix, dictionary);
          }
        }
      }
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCaseInsensitive.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestCaseInsensitive.java
@ -0,0 +1,110 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.hunspell.Dictionary;
+import org.apache.lucene.analysis.hunspell.Stemmer;
+import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+public class TestCaseInsensitive extends LuceneTestCase {
+  private static Stemmer stemmer;
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    try (InputStream affixStream = TestCaseInsensitive.class.getResourceAsStream("simple.aff");
+        InputStream dictStream = TestCaseInsensitive.class.getResourceAsStream("mixedcase.dic")) {
+     Dictionary dictionary = new Dictionary(affixStream, Collections.singletonList(dictStream), true);
+     stemmer = new Stemmer(dictionary);
+   }
+  }
+  
+  @AfterClass
+  public static void afterClass() {
+    stemmer = null;
+  }
+
+  public void testCaseInsensitivity() {
+    assertStemsTo("lucene", "lucene", "lucen");
+    assertStemsTo("LuCeNe", "lucene", "lucen");
+    assertStemsTo("mahoute", "mahout");
+    assertStemsTo("MaHoUte", "mahout");
+  }
+
+  public void testSimplePrefix() {
+    assertStemsTo("solr", "olr");
+  }
+
+  public void testRecursiveSuffix() {
+    assertStemsTo("abcd", "ab");
+  }
+
+  // all forms unmunched from dictionary
+  public void testAllStems() {
+    assertStemsTo("ab", "ab");
+    assertStemsTo("abc", "ab");
+    assertStemsTo("apach", "apach");
+    assertStemsTo("apache", "apach");
+    assertStemsTo("foo", "foo");
+    assertStemsTo("food", "foo");
+    assertStemsTo("foos", "foo");
+    assertStemsTo("lucen", "lucen");
+    assertStemsTo("lucene", "lucen", "lucene");
+    assertStemsTo("mahout", "mahout");
+    assertStemsTo("mahoute", "mahout");
+    assertStemsTo("moo", "moo");
+    assertStemsTo("mood", "moo");
+    assertStemsTo("olr", "olr");
+    assertStemsTo("solr", "olr");
+  }
+  
+  // some bogus stuff that should not stem (empty lists)!
+  public void testBogusStems() {    
+    assertStemsTo("abs");
+    assertStemsTo("abe");
+    assertStemsTo("sab");
+    assertStemsTo("sapach");
+    assertStemsTo("sapache");
+    assertStemsTo("apachee");
+    assertStemsTo("sfoo");
+    assertStemsTo("sfoos");
+    assertStemsTo("fooss");
+    assertStemsTo("lucenee");
+    assertStemsTo("solre");
+  }
+  
+  private void assertStemsTo(String s, String... expected) {
+    Arrays.sort(expected);
+    
+    List<CharsRef> stems = stemmer.stem(s);
+    String actual[] = new String[stems.size()];
+    for (int i = 0; i < actual.length; i++) {
+      actual[i] = stems.get(i).toString();
+    }
+    Arrays.sort(actual);
+    
+    assertArrayEquals(expected, actual);
+  }
+}
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/TestDictionary.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/TestDictionary.java
@ -1,4 +1,4 @@
-package org.apache.lucene.analysis.hunspell2;
+package org.apache.lucene.analysis.hunspell;

 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
@ -22,6 +22,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.text.ParseException;

+import org.apache.lucene.analysis.hunspell.Dictionary;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;

--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/TestHunspell2StemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/TestHunspell2StemFilter.java
@ -1,4 +1,4 @@
-package org.apache.lucene.analysis.hunspell2;
+package org.apache.lucene.analysis.hunspell;

 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
@ -26,13 +26,15 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.hunspell.Dictionary;
+import org.apache.lucene.analysis.hunspell.HunspellStemFilter;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.util.TestUtil;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;

-public class TestHunspell2StemFilter extends BaseTokenStreamTestCase {
+public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
  private static Dictionary dictionary;
  
  @BeforeClass
@ -52,13 +54,21 @@ public class TestHunspell2StemFilter extends BaseTokenStreamTestCase {
  public void testKeywordAttribute() throws IOException {
    MockTokenizer tokenizer = whitespaceMockTokenizer("lucene is awesome");
    tokenizer.setEnableChecks(true);
-    Hunspell2StemFilter filter = new Hunspell2StemFilter(tokenizer, dictionary, TestUtil.nextInt(random(), 1, 3));
+    HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary, TestUtil.nextInt(random(), 1, 3));
    assertTokenStreamContents(filter, new String[]{"lucene", "lucen", "is", "awesome"}, new int[] {1, 0, 1, 1});
    
    // assert with keyword marker
    tokenizer = whitespaceMockTokenizer("lucene is awesome");
    CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList("Lucene"), true);
-    filter = new Hunspell2StemFilter(new SetKeywordMarkerFilter(tokenizer, set), dictionary, TestUtil.nextInt(random(), 1, 3));
+    filter = new HunspellStemFilter(new SetKeywordMarkerFilter(tokenizer, set), dictionary, TestUtil.nextInt(random(), 1, 3));
+    assertTokenStreamContents(filter, new String[]{"lucene", "is", "awesome"}, new int[] {1, 1, 1});
+  }
+  
+  /** simple test for longestOnly option */
+  public void testLongestOnly() throws IOException {
+    MockTokenizer tokenizer = whitespaceMockTokenizer("lucene is awesome");
+    tokenizer.setEnableChecks(true);
+    HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary, true, TestUtil.nextInt(random(), 1, 3), true);
    assertTokenStreamContents(filter, new String[]{"lucene", "is", "awesome"}, new int[] {1, 1, 1});
  }
  
@ -68,7 +78,7 @@ public class TestHunspell2StemFilter extends BaseTokenStreamTestCase {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
        Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
-        return new TokenStreamComponents(tokenizer, new Hunspell2StemFilter(tokenizer, dictionary, TestUtil.nextInt(random(), 1, 3)));
+        return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, dictionary, TestUtil.nextInt(random(), 1, 3)));
      }  
    };
    checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
@ -79,7 +89,7 @@ public class TestHunspell2StemFilter extends BaseTokenStreamTestCase {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
        Tokenizer tokenizer = new KeywordTokenizer();
-        return new TokenStreamComponents(tokenizer, new Hunspell2StemFilter(tokenizer, dictionary, TestUtil.nextInt(random(), 1, 3)));
+        return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, dictionary, TestUtil.nextInt(random(), 1, 3)));
      }
    };
    checkOneTerm(a, "", "");
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilterFactory.java
@ -20,7 +20,6 @@ package org.apache.lucene.analysis.hunspell;
 import java.io.Reader;
 import java.io.StringReader;

-import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;

@ -31,17 +30,17 @@ public class TestHunspellStemFilterFactory extends BaseTokenStreamFactoryTestCas
  public void testStemming() throws Exception {
    Reader reader = new StringReader("abc");
    TokenStream stream = whitespaceMockTokenizer(reader);
-    stream = tokenFilterFactory("HunspellStem",
-        "dictionary", "test.dic",
-        "affix", "test.aff").create(stream);
+    stream = tokenFilterFactory("Hunspell2Stem",
+        "dictionary", "simple.dic",
+        "affix", "simple.aff").create(stream);
    assertTokenStreamContents(stream, new String[] { "ab" });
  }
  
  /** Test that bogus arguments result in exception */
  public void testBogusArguments() throws Exception {
    try {
-      tokenFilterFactory("HunspellStem",
-          "dictionary", "test.dic",
+      tokenFilterFactory("Hunspell2Stem",
+          "dictionary", "simple.dic",
          "bogusArg", "bogusValue");
      fail();
    } catch (IllegalArgumentException expected) {
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/TestStemmer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/TestStemmer.java
@ -1,4 +1,4 @@
-package org.apache.lucene.analysis.hunspell2;
+package org.apache.lucene.analysis.hunspell;

 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
@ -17,6 +17,8 @@ package org.apache.lucene.analysis.hunspell2;
 * limitations under the License.
 */

+import org.apache.lucene.analysis.hunspell.Dictionary;
+import org.apache.lucene.analysis.hunspell.Stemmer;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.junit.AfterClass;
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/broken.aff
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/broken.aff
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/compressed.aff
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/compressed.aff
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/compressed.dic
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/compressed.dic
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/mixedcase.dic
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/mixedcase.dic
@ -0,0 +1,10 @@
+9
+Ab/C
+apach/A
+Foo/D
+foo/E
+Lucen/A
+Lucene
+mahout/A
+Moo/E
+olr/B
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/simple.aff
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/simple.aff
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/simple.dic
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/simple.dic
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/test.aff
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/test.aff
@ -1,20 +0,0 @@
-SET UTF-8
-TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
-
-SFX A Y 3
-SFX A   0     e         n
-SFX A   0     e         t
-SFX A   0     e         h
-
-SFX C Y 2
-SFX C   0     d/C       c
-SFX C   0     c         b
-
-SFX D Y 1
-SFX D   0     s         o
-
-SFX E Y 1
-SFX E   0     d         o
-
-PFX B Y 1
-PFX B   0     s         o
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/test.dic
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/test.dic
@ -1,10 +0,0 @@
-9
-lucen/A
-lucene
-mahout/A
-olr/B
-ab/C
-Apach/A
-Foo/E
-foo/D
-Moo/E
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/testCompressed.aff
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/testCompressed.aff
@ -1,29 +0,0 @@
-SET UTF-8
-TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
-
-FLAG long
-
-AF 5
-AF AA
-AF BB
-AF CC
-AF DD
-AF EE
-
-SFX AA Y 3
-SFX AA   0     e         n
-SFX AA   0     e         t
-SFX AA   0     e         h
-
-SFX CC Y 2
-SFX CC   0     d/3       c
-SFX CC   0     c         b
-
-SFX DD Y 1
-SFX DD   0     s         o
-
-SFX EE Y 1
-SFX EE   0     d         o
-
-PFX BB Y 1
-PFX BB   0     s         o
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/testCompressed.dic
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/testCompressed.dic
@ -1,9 +0,0 @@
-6
-lucen/1
-lucene
-mahout/1
-olr/2
-ab/3
-Apach/1
-foo/4
-Foo/5
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/testOverride.dic
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/testOverride.dic
@ -1,3 +0,0 @@
-2
-lucen/ABC
-bar/A
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/testWrongAffixRule.aff
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/testWrongAffixRule.aff
@ -1,24 +0,0 @@
-SET UTF-8
-TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
-
-SFX A Y 3
-SFX A   0     e         n
-SFX A   0     e         t
-SFX A   0     e         h
-
-SFX C Y 2
-SFX C   0     d/C       c
-SFX C   0     c         b
-
-SFX D Y 1
-SFX D   0     s         o
-
-SFX E Y 1
-SFX E   0     d         o
-
-PFX B Y 1
-PFX B   0     s         o
-
-#wrong rule (only 4 elements)
-PFX A0 Y 1
-PFX A0 0 a
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/TestHunspell2StemFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell2/TestHunspell2StemFilterFactory.java
@ -1,50 +0,0 @@
-package org.apache.lucene.analysis.hunspell2;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.Reader;
-import java.io.StringReader;
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
-
-/**
- * Simple tests to ensure the Hunspell stemmer loads from factory
- */
-public class TestHunspell2StemFilterFactory extends BaseTokenStreamFactoryTestCase {
-  public void testStemming() throws Exception {
-    Reader reader = new StringReader("abc");
-    TokenStream stream = whitespaceMockTokenizer(reader);
-    stream = tokenFilterFactory("Hunspell2Stem",
-        "dictionary", "simple.dic",
-        "affix", "simple.aff").create(stream);
-    assertTokenStreamContents(stream, new String[] { "ab" });
-  }
-  
-  /** Test that bogus arguments result in exception */
-  public void testBogusArguments() throws Exception {
-    try {
-      tokenFilterFactory("Hunspell2Stem",
-          "dictionary", "simple.dic",
-          "bogusArg", "bogusValue");
-      fail();
-    } catch (IllegalArgumentException expected) {
-      assertTrue(expected.getMessage().contains("Unknown parameters"));
-    }
-  }
-}