LUCENE-3233: improve ram/perf of SynonymFilter, add wordnet parsing, nuke contrib/wordnet

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1145158 13f79535-47bb-0310-9956-ffa450edef68
2011-07-11 12:58:52 +00:00 · 2011-07-11 12:58:52 +00:00 · 015ecfa0a0
parent 19fd2508c6
commit 015ecfa0a0
55 changed files with 2945 additions and 2205 deletions
--- a/lucene/build.xml
+++ b/lucene/build.xml
@ -230,7 +230,6 @@
          <packageset dir="contrib/misc/src/java"/>
          <packageset dir="contrib/queries/src/java"/>
          <packageset dir="contrib/spatial/src/java"/>
-          <packageset dir="contrib/wordnet/src/java"/>
          <packageset dir="contrib/xml-query-parser/src/java"/>
          <packageset dir="contrib/queryparser/src/java"/>
          <!-- end alpha sort -->
@ -250,7 +249,6 @@
          <group title="contrib: Queries" packages="org.apache.lucene.search.similar*:org.apache.lucene.search.regex*:org.apache.regexp*"/>
          <group title="contrib: Query Parser" packages="org.apache.lucene.queryParser.*"/>
          <group title="contrib: Spatial" packages="org.apache.lucene.spatial*"/>
-          <group title="contrib: WordNet" packages="org.apache.lucene.wordnet*"/>
          <group title="contrib: XML Query Parser" packages="org.apache.lucene.xmlparser*"/>
          
        </sources>
--- a/lucene/contrib/CHANGES.txt
+++ b/lucene/contrib/CHANGES.txt
@ -5,11 +5,6 @@ http://s.apache.org/luceneversions

 ======================= Trunk (not yet released) =======================

-Changes in runtime behavior
-
- * LUCENE-3250: Wordnet's SynExpand requires a non-null Analyzer (it no longer
-   treats null as StandardAnalyzer).  (Robert Muir)
-  
 Build

 * LUCENE-2845: Moved contrib/benchmark to modules.
@ -78,6 +73,10 @@ New Features
   documents must be indexed as a document block, using
   IndexWriter.add/UpdateDocuments (Mark Harwood, Mike McCandless)

+ * LUCENE-3233: Added SynonymFilter for applying multi-word synonyms
+   during indexing or querying (with parsers for wordnet and solr formats).
+   Removed contrib/wordnet.  (Robert Muir, Mike McCandless)
+
 API Changes
   
 Bug Fixes
--- a/lucene/contrib/wordnet/README.txt
+++ b/lucene/contrib/wordnet/README.txt
@ -1,5 +0,0 @@
-As of 2002-11-13 WordNet Lucene contribution contains a single Java class:
-	org.apache.lucene.wordnet.Syns2Index.
-
-This class creates a Lucene index with synonyms for English words from
-a Prolog file, which is a part of WordNet database.
--- a/lucene/contrib/wordnet/build.xml
+++ b/lucene/contrib/wordnet/build.xml
@ -1,70 +0,0 @@
-<?xml version="1.0"?>
-
-<!--
-    Licensed to the Apache Software Foundation (ASF) under one or more
-    contributor license agreements.  See the NOTICE file distributed with
-    this work for additional information regarding copyright ownership.
-    The ASF licenses this file to You under the Apache License, Version 2.0
-    the "License"); you may not use this file except in compliance with
-    the License.  You may obtain a copy of the License at
- 
-        http://www.apache.org/licenses/LICENSE-2.0
- 
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
- -->
-
-<project name="wordnet" default="default">
-
-  <description>
-    WordNet
-  </description>
-
-  <property name="prolog.file" location="prologwn/wn_s.pl"/>
-  <property name="synindex.dir" location="index"/>
-
-  <available property="synindex.exists" file="${synindex.dir}" type="dir"/>
-
-  <import file="../contrib-build.xml"/>
-
-  <target name="index" depends="compile" description="Build WordNet index">
-    <fail if="synindex.exists">
-      Index already exists - must remove first.
-    </fail>
-
-    <java classname="org.apache.lucene.wordnet.Syns2Index">
-      <classpath>
-        <path refid="compile.classpath"/>
-        <pathelement location="${build.dir}/classes"/>
-      </classpath>
-
-      <arg file="${prolog.file}"/>
-      <arg file="${synindex.dir}"/>
-    </java>
-  </target>
-
-
-  <target name="synonym" description="Find synonyms for word">
-    <fail unless="synindex.exists">
-      Index does not exist.
-    </fail>
-
-    <fail unless="word">
-      Must specify 'word' property.
-    </fail>
-    
-    <java classname="org.apache.lucene.wordnet.SynLookup">
-      <classpath>
-        <path refid="compile.classpath"/>
-        <pathelement location="${build.dir}/classes"/>
-      </classpath>
-
-      <arg file="${synindex.dir}"/>
-      <arg value="${word}"/>
-    </java>
-  </target>
-
-</project>
--- a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java
+++ b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java
@ -1,142 +0,0 @@
-package org.apache.lucene.wordnet;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.IndexReader.AtomicReaderContext;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.TermQuery;
-
-
-/**
- * Expand a query by looking up synonyms for every term.
- * You need to invoke {@link Syns2Index} first to build the synonym index.
- *
- * @see Syns2Index
- */
-public final class SynExpand {
-
-	/**
-	 * Perform synonym expansion on a query.
-	 *
-	 * @param query users query that is assumed to not have any "special" query syntax, thus it should be just normal words, so "big dog" makes sense, but a query like "title:foo^1.2" doesn't as this should presumably be passed directly to the default query parser.
-	 *
-	 * @param syns a opened to the Lucene index you previously created with {@link Syns2Index}. The searcher is not closed or otherwise altered.
-	 *
-	 * @param a analyzer used to parse the users query.
-	 *
-	 * @param f optional field name to search in or null if you want the default of "contents"
-	 *
-	 * @param boost optional boost applied to synonyms else no boost is applied
-	 *
-	 * @return the expanded Query
-	 */ 
-	public static Query expand( String query,
-								IndexSearcher syns,
-								Analyzer a,
-								String f,
-								final float boost)
-		throws IOException
-	{
-		final Set<String> already = new HashSet<String>(); // avoid dups 
-		List<String> top = new LinkedList<String>(); // needs to be separately listed..
-		final String field = ( f == null) ? "contents" : f;
-
-		// [1] Parse query into separate words so that when we expand we can avoid dups
-		TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
-		CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
-		ts.reset();
-		while (ts.incrementToken()) {
-		  String word = termAtt.toString();
-			if ( already.add( word))
-				top.add( word);
-		}
-		ts.end();
-		ts.close();
-		final BooleanQuery tmp = new BooleanQuery();
-		
-		// [2] form query
-		Iterator<String> it = top.iterator();
-		while ( it.hasNext())
-		{
-			// [2a] add to level words in
-			String word = it.next();
-			TermQuery tq = new TermQuery( new Term( field, word));
-			tmp.add( tq, BooleanClause.Occur.SHOULD);
-
-			syns.search(new TermQuery( new Term(Syns2Index.F_WORD, word)), new Collector() {
-			  IndexReader reader;
-			  
-        @Override
-        public boolean acceptsDocsOutOfOrder() {
-          return true;
-        }
-
-        @Override
-        public void collect(int doc) throws IOException {
-          Document d = reader.document(doc);
-          String[] values = d.getValues( Syns2Index.F_SYN);
-          for ( int j = 0; j < values.length; j++)
-          {
-            String syn = values[ j];
-            if ( already.add( syn)) // avoid dups of top level words and synonyms
-            {
-              TermQuery tq = new TermQuery( new Term( field, syn));
-              if ( boost > 0) // else keep normal 1.0
-                tq.setBoost( boost);
-              tmp.add( tq, BooleanClause.Occur.SHOULD); 
-            }
-          }
-        }
-
-        @Override
-        public void setNextReader(AtomicReaderContext context)
-            throws IOException {
-          this.reader = context.reader;
-        }
-
-        @Override
-        public void setScorer(Scorer scorer) throws IOException {}
-			});
-			
-			// [2b] add in unique synonums
-		}
-
-
-		return tmp;
-	}
-								
-}
--- a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java
+++ b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java
@ -1,170 +0,0 @@
-package org.apache.lucene.wordnet;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.File;
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.IndexReader.AtomicReaderContext;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.TotalHitCountCollector;
-import org.apache.lucene.store.FSDirectory;
-
-
-/**
- * Test program to look up synonyms.
- */
-public class SynLookup {
-  
-	public static void main(String[] args) throws IOException {
-		if (args.length != 2) {
-			System.out.println(
-							   "java org.apache.lucene.wordnet.SynLookup <index path> <word>");
-		}
-
-		FSDirectory directory = FSDirectory.open(new File(args[0]));
-		IndexSearcher searcher = new IndexSearcher(directory, true);
-
-		String word = args[1];
-		Query query = new TermQuery(new Term(Syns2Index.F_WORD, word));
-		TotalHitCountCollector countingCollector = new TotalHitCountCollector();
-		searcher.search(query, countingCollector);
-
-		if (countingCollector.getTotalHits() == 0) {
-			System.out.println("No synonyms found for " + word);
-		} else {
-			System.out.println("Synonyms found for \"" + word + "\":");
-		}
-
-		ScoreDoc[] hits = searcher.search(query, countingCollector.getTotalHits()).scoreDocs;
-		
-		for (int i = 0; i < hits.length; i++) {
-			Document doc = searcher.doc(hits[i].doc);
-
-			String[] values = doc.getValues(Syns2Index.F_SYN);
-
-			for (int j = 0; j < values.length; j++) {
-				System.out.println(values[j]);
-			}
-		}
-
-		searcher.close();
-		directory.close();
-	}
-
-
-	/**
-	 * Perform synonym expansion on a query.
-	 *
-	 * @param query
-	 * @param syns
-	 * @param a
-	 * @param field
-	 * @param boost
-	 */ 
-	public static Query expand( String query,
-								IndexSearcher syns,
-								Analyzer a,
-								final String field,
-								final float boost)
-		throws IOException
-	{
-		final Set<String> already = new HashSet<String>(); // avoid dups		
-		List<String> top = new LinkedList<String>(); // needs to be separately listed..
-
-		// [1] Parse query into separate words so that when we expand we can avoid dups
-		TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
-    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
-    
-		while (ts.incrementToken()) {
-			String word = termAtt.toString();
-			if ( already.add( word))
-				top.add( word);
-		}
-		final BooleanQuery tmp = new BooleanQuery();
-		
-		// [2] form query
-		Iterator<String> it = top.iterator();
-		while ( it.hasNext())
-		{
-			// [2a] add to level words in
-			String word = it.next();
-			TermQuery tq = new TermQuery( new Term( field, word));
-			tmp.add( tq, BooleanClause.Occur.SHOULD);
-
-			// [2b] add in unique synonums
-			syns.search(new TermQuery( new Term(Syns2Index.F_WORD, word)), new Collector() {
-			  IndexReader reader;
-			  
-        @Override
-        public boolean acceptsDocsOutOfOrder() {
-          return true;
-        }
-
-        @Override
-        public void collect(int doc) throws IOException {
-          Document d = reader.document(doc);
-          String[] values = d.getValues( Syns2Index.F_SYN);
-          for ( int j = 0; j < values.length; j++)
-          {
-            String syn = values[ j];
-            if ( already.add( syn))
-            {
-              TermQuery tq = new TermQuery( new Term( field, syn));
-              if ( boost > 0) // else keep normal 1.0
-                tq.setBoost( boost);
-              tmp.add( tq, BooleanClause.Occur.SHOULD); 
-            }
-          }
-        }
-
-        @Override
-        public void setNextReader(AtomicReaderContext context)
-            throws IOException {
-          this.reader = context.reader;
-        }
-
-        @Override
-        public void setScorer(Scorer scorer) throws IOException {}
-			});
-		}
-
-
-		return tmp;
-	}
-								
-}
--- a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymMap.java
+++ b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymMap.java
@ -1,400 +0,0 @@
-package org.apache.lucene.wordnet;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.ByteBuffer;
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.TreeMap;
-import java.util.TreeSet;
-
-/**
- * Loads the <a target="_blank" 
- * href="http://www.cogsci.princeton.edu/~wn/">WordNet </a> prolog file <a
- * href="http://www.cogsci.princeton.edu/2.0/WNprolog-2.0.tar.gz">wn_s.pl </a>
- * into a thread-safe main-memory hash map that can be used for fast
- * high-frequency lookups of synonyms for any given (lowercase) word string.
- * <p>
- * There holds: If B is a synonym for A (A -> B) then A is also a synonym for B (B -> A).
- * There does not necessarily hold: A -> B, B -> C then A -> C.
- * <p>
- * Loading typically takes some 1.5 secs, so should be done only once per
- * (server) program execution, using a singleton pattern. Once loaded, a
- * synonym lookup via {@link #getSynonyms(String)}takes constant time O(1).
- * A loaded default synonym map consumes about 10 MB main memory.
- * An instance is immutable, hence thread-safe.
- * <p>
- * This implementation borrows some ideas from the Lucene Syns2Index demo that 
- * Dave Spencer originally contributed to Lucene. Dave's approach
- * involved a persistent Lucene index which is suitable for occasional
- * lookups or very large synonym tables, but considered unsuitable for 
- * high-frequency lookups of medium size synonym tables.
- * <p>
- * Example Usage:
- * <pre class="prettyprint">
- * String[] words = new String[] { "hard", "woods", "forest", "wolfish", "xxxx"};
- * SynonymMap map = new SynonymMap(new FileInputStream("samples/fulltext/wn_s.pl"));
- * for (int i = 0; i &lt; words.length; i++) {
- *     String[] synonyms = map.getSynonyms(words[i]);
- *     System.out.println(words[i] + ":" + java.util.Arrays.asList(synonyms).toString());
- * }
- * </pre>
- * <b/>
- * Example output:
- * <pre class="prettyprint">
- * hard:[arduous, backbreaking, difficult, fermented, firmly, grueling, gruelling, heavily, heavy, intemperately, knockout, laborious, punishing, severe, severely, strong, toilsome, tough]
- * woods:[forest, wood]
- * forest:[afforest, timber, timberland, wood, woodland, woods]
- * wolfish:[edacious, esurient, rapacious, ravening, ravenous, voracious, wolflike]
- * xxxx:[]
- * </pre>
- *
- * <p>
- * <b>See also:</b><br>
- * <a target="_blank"
- *      href="http://www.cogsci.princeton.edu/~wn/man/prologdb.5WN.html">prologdb
- *      man page </a><br>
- * <a target="_blank" href="http://www.hostmon.com/rfc/advanced.jsp">Dave's synonym demo site</a>
- */
-public class SynonymMap {
-
-  /** the index data; Map<String word, String[] synonyms> */
-  private final HashMap<String,String[]> table;
-  
-  private static final String[] EMPTY = new String[0];
-  
-  private static final boolean DEBUG = false;
-
-  /**
-   * Constructs an instance, loading WordNet synonym data from the given input
-   * stream. Finally closes the stream. The words in the stream must be in
-   * UTF-8 or a compatible subset (for example ASCII, MacRoman, etc.).
-   * 
-   * @param input
-   *            the stream to read from (null indicates an empty synonym map)
-   * @throws IOException
-   *             if an error occured while reading the stream.
-   */
-  public SynonymMap(InputStream input) throws IOException {
-    this.table = input == null ? new HashMap<String,String[]>(0) : read(toByteArray(input));
-  }
-  
-  /**
-   * Returns the synonym set for the given word, sorted ascending.
-   * 
-   * @param word
-   *            the word to lookup (must be in lowercase).
-   * @return the synonyms; a set of zero or more words, sorted ascending, each
-   *         word containing lowercase characters that satisfy
-   *         <code>Character.isLetter()</code>.
-   */
-  public String[] getSynonyms(String word) {
-    String[] synonyms = table.get(word);
-    if (synonyms == null) return EMPTY;
-    String[] copy = new String[synonyms.length]; // copy for guaranteed immutability
-    System.arraycopy(synonyms, 0, copy, 0, synonyms.length);
-    return copy;
-  }
-  
-  /**
-   * Returns a String representation of the index data for debugging purposes.
-   * 
-   * @return a String representation
-   */
-  @Override
-  public String toString() {
-    StringBuilder buf = new StringBuilder();
-    Iterator<String> iter = new TreeMap<String,String[]>(table).keySet().iterator();
-    int count = 0;
-    int f0 = 0;
-    int f1 = 0;
-    int f2 = 0;
-    int f3 = 0;
-    
-    while (iter.hasNext()) {
-      String word = iter.next();
-      buf.append(word + ":");
-      String[] synonyms = getSynonyms(word);
-      buf.append(Arrays.asList(synonyms));
-      buf.append("\n");
-      count += synonyms.length;
-      if (synonyms.length == 0) f0++;
-      if (synonyms.length == 1) f1++;
-      if (synonyms.length == 2) f2++;
-      if (synonyms.length == 3) f3++;
-    }
-    
-    buf.append("\n\nkeys=" + table.size() + ", synonyms=" + count + ", f0=" + f0 +", f1=" + f1 + ", f2=" + f2 + ", f3=" + f3);
-    return buf.toString();
-  }
-  
-  /**
-   * Analyzes/transforms the given word on input stream loading. This default implementation simply
-   * lowercases the word. Override this method with a custom stemming
-   * algorithm or similar, if desired.
-   * 
-   * @param word
-   *            the word to analyze
-   * @return the same word, or a different word (or null to indicate that the
-   *         word should be ignored)
-   */
-  protected String analyze(String word) {
-    return word.toLowerCase();
-  }
-
-  protected boolean isValid(String str) {
-    for (int i=str.length(); --i >= 0; ) {
-      if (!Character.isLetter(str.charAt(i))) return false;
-    }
-    return true;
-  }
-
-  private HashMap<String,String[]> read(byte[] data) {
-    int WORDS  = (int) (76401 / 0.7); // presizing
-    int GROUPS = (int) (88022 / 0.7); // presizing
-    HashMap<String,ArrayList<Integer>> word2Groups = new HashMap<String,ArrayList<Integer>>(WORDS);  // Map<String word, int[] groups>
-    HashMap<Integer,ArrayList<String>> group2Words = new HashMap<Integer,ArrayList<String>>(GROUPS); // Map<int group, String[] words>
-    HashMap<String,String> internedWords = new HashMap<String,String>(WORDS);// Map<String word, String word>
-
-    Charset charset = Charset.forName("UTF-8");
-    int lastNum = -1;
-    Integer lastGroup = null;
-    int len = data.length;
-    int i=0;
-    
-    while (i < len) { // until EOF
-      /* Part A: Parse a line */
-      
-      // scan to beginning of group
-      while (i < len && data[i] != '(') i++;
-      if (i >= len) break; // EOF
-      i++;
-      
-      // parse group
-      int num = 0;
-      while (i < len && data[i] != ',') {
-        num = 10*num + (data[i] - 48);
-        i++;
-      }
-      i++;
-//      if (DEBUG) System.err.println("num="+ num);
-      
-      // scan to beginning of word
-      while (i < len && data[i] != '\'') i++;
-      i++;
-  
-      // scan to end of word
-      int start = i;
-      do {
-        while (i < len && data[i] != '\'') i++;
-        i++;
-      } while (i < len && data[i] != ','); // word must end with "',"
-      
-      if (i >= len) break; // EOF
-      String word = charset.decode(ByteBuffer.wrap(data, start, i-start-1)).toString();
-//      String word = new String(data, 0, start, i-start-1); // ASCII
-      
-      /*
-       * Part B: ignore phrases (with spaces and hyphens) and
-       * non-alphabetic words, and let user customize word (e.g. do some
-       * stemming)
-       */
-      if (!isValid(word)) continue; // ignore
-      word = analyze(word);
-      if (word == null || word.length() == 0) continue; // ignore
-      
-      
-      /* Part C: Add (group,word) to tables */
-      
-      // ensure compact string representation, minimizing memory overhead
-      String w = internedWords.get(word);
-      if (w == null) {
-        word = new String(word); // ensure compact string
-        internedWords.put(word, word);
-      } else {
-        word = w;
-      }
-      
-      Integer group = lastGroup;
-      if (num != lastNum) {
-        group = Integer.valueOf(num);
-        lastGroup = group;
-        lastNum = num;
-      }
-      
-      // add word --> group
-      ArrayList<Integer> groups =  word2Groups.get(word);
-      if (groups == null) {
-        groups = new ArrayList<Integer>(1);
-        word2Groups.put(word, groups);
-      }
-      groups.add(group);
-
-      // add group --> word
-      ArrayList<String> words = group2Words.get(group);
-      if (words == null) {
-        words = new ArrayList<String>(1);
-        group2Words.put(group, words);
-      } 
-      words.add(word);
-    }
-    
-    
-    /* Part D: compute index data structure */
-    HashMap<String,String[]> word2Syns = createIndex(word2Groups, group2Words);    
-        
-    /* Part E: minimize memory consumption by a factor 3 (or so) */
-//    if (true) return word2Syns;
-    word2Groups = null; // help gc
-    //TODO: word2Groups.clear(); would be more appropriate  ? 
-    group2Words = null; // help gc
-    //TODO: group2Words.clear(); would be more appropriate  ? 
-    
-    return optimize(word2Syns, internedWords);
-  }
-  
-  private HashMap<String,String[]> createIndex(Map<String,ArrayList<Integer>> word2Groups, Map<Integer,ArrayList<String>> group2Words) {
-    HashMap<String,String[]> word2Syns = new HashMap<String,String[]>();
-    
-    for (final Map.Entry<String,ArrayList<Integer>> entry : word2Groups.entrySet()) { // for each word
-      ArrayList<Integer> group = entry.getValue();     
-      String word = entry.getKey();
-      
-//      HashSet synonyms = new HashSet();
-      TreeSet<String> synonyms = new TreeSet<String>();
-      for (int i=group.size(); --i >= 0; ) { // for each groupID of word
-        ArrayList<String> words = group2Words.get(group.get(i));
-        for (int j=words.size(); --j >= 0; ) { // add all words       
-          String synonym = words.get(j); // note that w and word are interned
-          if (synonym != word) { // a word is implicitly it's own synonym
-            synonyms.add(synonym);
-          }
-        }
-      }
-
-      int size = synonyms.size();
-      if (size > 0) {
-        String[] syns = new String[size];
-        if (size == 1)  
-          syns[0] = synonyms.first();
-        else
-          synonyms.toArray(syns);
-//        if (syns.length > 1) Arrays.sort(syns);
-//        if (DEBUG) System.err.println("word=" + word + ":" + Arrays.asList(syns));
-        word2Syns.put(word, syns);
-      }
-    }
-  
-    return word2Syns;
-  }
-
-  private HashMap<String,String[]> optimize(HashMap<String,String[]> word2Syns, HashMap<String,String> internedWords) {
-    if (DEBUG) {
-      System.err.println("before gc");
-      for (int i=0; i < 10; i++) System.gc();
-      System.err.println("after gc");
-    }
-    
-    // collect entries
-    int len = 0;
-    int size = word2Syns.size();
-    String[][] allSynonyms = new String[size][];
-    String[] words = new String[size];
-    Iterator<Map.Entry<String,String[]>> iter = word2Syns.entrySet().iterator();
-    for (int j=0; j < size; j++) {
-      Map.Entry<String,String[]> entry = iter.next();
-      allSynonyms[j] = entry.getValue(); 
-      words[j] = entry.getKey();
-      len += words[j].length();
-    }
-    
-    // assemble large string containing all words
-    StringBuilder buf = new StringBuilder(len);
-    for (int j=0; j < size; j++) buf.append(words[j]);
-    String allWords = new String(buf.toString()); // ensure compact string across JDK versions
-    buf = null;
-    
-    // intern words at app level via memory-overlaid substrings
-    for (int p=0, j=0; j < size; j++) {
-      String word = words[j];
-      internedWords.put(word, allWords.substring(p, p + word.length()));
-      p += word.length();
-    }
-    
-    // replace words with interned words
-    for (int j=0; j < size; j++) {
-      String[] syns = allSynonyms[j];
-      for (int k=syns.length; --k >= 0; ) {
-        syns[k] = internedWords.get(syns[k]);
-      }
-      word2Syns.remove(words[j]);
-      word2Syns.put(internedWords.get(words[j]), syns);
-    }
-    
-    if (DEBUG) {
-      words = null;
-      allSynonyms = null;
-      internedWords = null;
-      allWords = null;
-      System.err.println("before gc");
-      for (int i=0; i < 10; i++) System.gc();
-      System.err.println("after gc");
-    }
-    return word2Syns;
-  }
-  
-  // the following utility methods below are copied from Apache style Nux library - see http://dsd.lbl.gov/nux
-  private static byte[] toByteArray(InputStream input) throws IOException {
-    try {
-      // safe and fast even if input.available() behaves weird or buggy
-      int len = Math.max(256, input.available());
-      byte[] buffer = new byte[len];
-      byte[] output = new byte[len];
-      
-      len = 0;
-      int n;
-      while ((n = input.read(buffer)) >= 0) {
-        if (len + n > output.length) { // grow capacity
-          byte tmp[] = new byte[Math.max(output.length << 1, len + n)];
-          System.arraycopy(output, 0, tmp, 0, len);
-          System.arraycopy(buffer, 0, tmp, len, n);
-          buffer = output; // use larger buffer for future larger bulk reads
-          output = tmp;
-        } else {
-          System.arraycopy(buffer, 0, output, len, n);
-        }
-        len += n;
-      }
-
-      if (len == output.length) return output;
-      buffer = null; // help gc
-      buffer = new byte[len];
-      System.arraycopy(output, 0, buffer, 0, len);
-      return buffer;
-    } finally {
-      input.close();
-    }
-  }
-  
-}
--- a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymTokenFilter.java
+++ b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymTokenFilter.java
@ -1,148 +0,0 @@
-package org.apache.lucene.wordnet;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.util.AttributeSource;
-
-/**
- * Injects additional tokens for synonyms of token terms fetched from the
- * underlying child stream; the child stream must deliver lowercase tokens
- * for synonyms to be found.
- *
- */
-public class SynonymTokenFilter extends TokenFilter {
-    
-  /** The Token.type used to indicate a synonym to higher level filters. */
-  public static final String SYNONYM_TOKEN_TYPE = "SYNONYM";
-
-  private final SynonymMap synonyms;
-  private final int maxSynonyms;
-  
-  private String[] stack = null;
-  private int index = 0;
-  private AttributeSource.State current = null;
-  private int todo = 0;
-  
-  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
-  private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-  
-  /**
-   * Creates an instance for the given underlying stream and synonym table.
-   * 
-   * @param input
-   *            the underlying child token stream
-   * @param synonyms
-   *            the map used to extract synonyms for terms
-   * @param maxSynonyms
-   *            the maximum number of synonym tokens to return per underlying
-   *            token word (a value of Integer.MAX_VALUE indicates unlimited)
-   */
-  public SynonymTokenFilter(TokenStream input, SynonymMap synonyms, int maxSynonyms) {
-    super(input);
-    if (input == null)
-      throw new IllegalArgumentException("input must not be null");
-    if (synonyms == null)
-      throw new IllegalArgumentException("synonyms must not be null");
-    if (maxSynonyms < 0) 
-      throw new IllegalArgumentException("maxSynonyms must not be negative");
-    
-    this.synonyms = synonyms;
-    this.maxSynonyms = maxSynonyms;
-  }
-  
-  /** Returns the next token in the stream, or null at EOS. */
-  @Override
-  public final boolean incrementToken() throws IOException {
-    while (todo > 0 && index < stack.length) { // pop from stack
-      if (createToken(stack[index++], current)) {
-        todo--;
-        return true;
-      }
-    }
-    
-    if (!input.incrementToken()) return false; // EOS; iterator exhausted 
-    
-    stack = synonyms.getSynonyms(termAtt.toString()); // push onto stack
-    if (stack.length > maxSynonyms) randomize(stack);
-    index = 0;
-    current = captureState();
-    todo = maxSynonyms;
-    return true;
-  }
-  
-  /**
-   * Creates and returns a token for the given synonym of the current input
-   * token; Override for custom (stateless or stateful) behavior, if desired.
-   * 
-   * @param synonym 
-   *            a synonym for the current token's term
-   * @param current
-   *            the current token from the underlying child stream
-   * @return a new token, or null to indicate that the given synonym should be
-   *         ignored
-   */
-  protected boolean createToken(String synonym, AttributeSource.State current) {
-    restoreState(current);
-    termAtt.setEmpty().append(synonym);
-    typeAtt.setType(SYNONYM_TOKEN_TYPE);
-    posIncrAtt.setPositionIncrement(0);
-    return true;
-  }
-  
-  /**
-   * Randomize synonyms to later sample a subset. Uses constant random seed
-   * for reproducibility. Uses "DRand", a simple, fast, uniform pseudo-random
-   * number generator with medium statistical quality (multiplicative
-   * congruential method), producing integers in the range [Integer.MIN_VALUE,
-   * Integer.MAX_VALUE].
-   */
-  private static void randomize(Object[] arr) {
-    int seed = 1234567; // constant
-    int randomState = 4*seed + 1;
-//    Random random = new Random(seed); // unnecessary overhead
-    int len = arr.length;
-    for (int i=0; i < len-1; i++) {
-      randomState *= 0x278DDE6D; // z(i+1)=a*z(i) (mod 2**32)
-      int r = randomState % (len-i);
-      if (r < 0) r = -r; // e.g. -9 % 2 == -1
-//      int r = random.nextInt(len-i);
-      
-      // swap arr[i, i+r]
-      Object tmp = arr[i];
-      arr[i] = arr[i + r];
-      arr[i + r] = tmp;
-    }   
-  }
-
-  @Override
-  public void reset() throws IOException {
-    super.reset();
-    stack = null;
-    index = 0;
-    current = null;
-    todo = 0;
-  }
-}
--- a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java
+++ b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java
@ -1,329 +0,0 @@
-package org.apache.lucene.wordnet;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.InputStreamReader;
-import java.io.PrintStream;
-import java.io.Reader;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-import java.util.TreeSet;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.TieredMergePolicy;
-import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.util.Version;
-
-/**
- * Convert the prolog file wn_s.pl from the <a href="http://www.cogsci.princeton.edu/2.0/WNprolog-2.0.tar.gz">WordNet prolog download</a>
- * into a Lucene index suitable for looking up synonyms and performing query expansion ({@link SynExpand#expand SynExpand.expand(...)}).
- *
- * This has been tested with WordNet 2.0.
- *
- * The index has fields named "word" ({@link #F_WORD})
- * and "syn" ({@link #F_SYN}).
- * <p>
- * The source word (such as 'big') can be looked up in the
- * "word" field, and if present there will be fields named "syn"
- * for every synonym. What's tricky here is that there could be <b>multiple</b>
- * fields with the same name, in the general case for words that have multiple synonyms.
- * That's not a problem with Lucene, you just use {@link org.apache.lucene.document.Document#getValues}
- * </p>
- * <p>
- * While the WordNet file distinguishes groups of synonyms with
- * related meanings we don't do that here.
- * </p>
- *
- * This can take 4 minutes to execute and build an index on a "fast" system and the index takes up almost 3 MB.
- *
- * @see <a href="http://www.cogsci.princeton.edu/~wn/">WordNet home page</a>
- * @see <a href="http://www.cogsci.princeton.edu/~wn/man/prologdb.5WN.html">prologdb man page</a>
- * @see <a href="http://www.hostmon.com/rfc/advanced.jsp">sample site that uses it</a>
- */
-public class Syns2Index
-{
-	/**
-	 *
-	 */
-	private static final PrintStream o = System.out;
-
-	/**
-	 *
-	 */
-	private static final PrintStream err = System.err;
-	
-	/**
-	 *
-	 */
-	public static final String F_SYN = "syn";
-
-	/**
-	 *
-	 */
-	public static final String F_WORD = "word";
-
-	/**
-	 * we don't actually analyze any text (only a NOT_ANALYZED field),
-	 * but analyzer can't be null, docinverter wants the offset gap!
-	 */
-    private static final Analyzer ana = new Analyzer() {
-      @Override
-      public TokenStream tokenStream(String fieldName, Reader reader) {
-        return null;
-      }
-    };
-
-    /**
-     * Takes arg of prolog file name and index directory.
-     */
-    public static void main(String[] args)
-        throws Throwable
-    {
-        // get command line arguments
-        String prologFilename = null; // name of file "wn_s.pl"
-        String indexDir = null;
-        if (args.length == 2)
-        {
-            prologFilename = args[0];
-            indexDir = args[1];
-        }
-        else
-        {
-            usage();
-            System.exit(1);
-        }
-
-        // ensure that the prolog file is readable
-        if (! (new File(prologFilename)).canRead())
-        {
-            err.println("Error: cannot read Prolog file: " + prologFilename);
-            System.exit(1);
-        }
-        // exit if the target index directory already exists
-        if ((new File(indexDir)).isDirectory())
-        {
-            err.println("Error: index directory already exists: " + indexDir);
-            err.println("Please specify a name of a non-existent directory");
-            System.exit(1);
-        }
-
-        o.println("Opening Prolog file " + prologFilename);
-        final FileInputStream fis = new FileInputStream(prologFilename);
-        final BufferedReader br = new BufferedReader(new InputStreamReader(fis));
-        String line;
-
-        // maps a word to all the "groups" it's in
-        final Map<String,List<String>> word2Nums = new TreeMap<String,List<String>>();
-        // maps a group to all the words in it
-        final Map<String,List<String>> num2Words = new TreeMap<String,List<String>>();
-        // number of rejected words
-        int ndecent = 0;
-
-        // status output
-        int mod = 1;
-        int row = 1;
-        // parse prolog file
-		o.println( "[1/2] Parsing " + prologFilename);
-        while ((line = br.readLine()) != null)
-        {
-            // occasional progress
-            if ((++row) % mod == 0) // periodically print out line we read in
-            {
-                mod *= 2;
-                o.println("\t" + row + " " + line + " " + word2Nums.size()
-                    + " " + num2Words.size() + " ndecent=" + ndecent);
-            }
-
-            // syntax check
-            if (! line.startsWith("s("))
-            {
-                err.println("OUCH: " + line);
-                System.exit(1);
-            }
-
-            // parse line
-            line = line.substring(2);
-            int comma = line.indexOf(',');
-            String num = line.substring(0, comma);
-            int q1 = line.indexOf('\'');
-            line = line.substring(q1 + 1);
-            int q2 = line.lastIndexOf('\'');
-            String word = line.substring(0, q2).toLowerCase().replace("''", "'");
-
-            // make sure is a normal word
-            if (! isDecent(word))
-            {
-                ndecent++;
-                continue; // don't store words w/ spaces
-            }
-
-            // 1/2: word2Nums map
-            // append to entry or add new one
-            List<String> lis = word2Nums.get(word);
-            if (lis == null)
-            {
-                lis = new LinkedList<String>();
-                lis.add(num);
-                word2Nums.put(word, lis);
-            }
-            else
-                lis.add(num);
-
-            // 2/2: num2Words map
-            lis = num2Words.get(num);
-            if (lis == null)
-            {
-                lis = new LinkedList<String>();
-                lis.add(word);
-                num2Words.put(num, lis);
-            }
-            else
-                lis.add(word);
-        }
-
-        // close the streams
-        fis.close();
-        br.close();
-
-        // create the index
-		o.println( "[2/2] Building index to store synonyms, " +
-				   " map sizes are " + word2Nums.size() + " and " + num2Words.size());
-        index(indexDir, word2Nums, num2Words);
-    }
-
-    /**
-     * Checks to see if a word contains only alphabetic characters by
-     * checking it one character at a time.
-     *
-     * @param s string to check
-     * @return <code>true</code> if the string is decent
-     */
-    private static boolean isDecent(String s)
-    {
-        int len = s.length();
-        for (int i = 0; i < len; i++)
-        {
-            if (!Character.isLetter(s.charAt(i)))
-            {
-                return false;
-            }
-        }
-        return true;
-    }
-
-    /**
-     * Forms a Lucene index based on the 2 maps.
-     *
-     * @param indexDir the directory where the index should be created
-     * @param word2Nums
-     * @param num2Words
-     */
-    private static void index(String indexDir, Map<String,List<String>> word2Nums, Map<String,List<String>> num2Words)
-        throws Throwable
-    {
-        int row = 0;
-        int mod = 1;
-        FSDirectory dir = FSDirectory.open(new File(indexDir));
-        try {
-
-          // override the specific index if it already exists
-          IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
-              Version.LUCENE_CURRENT, ana).setOpenMode(OpenMode.CREATE));
-          ((TieredMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(true); // why?
-          Iterator<String> i1 = word2Nums.keySet().iterator();
-          while (i1.hasNext()) // for each word
-          {
-              String g = i1.next();
-              Document doc = new Document();
-
-              int n = index(word2Nums, num2Words, g, doc);
-              if (n > 0)
-              {
-          doc.add( new Field( F_WORD, g, Field.Store.YES, Field.Index.NOT_ANALYZED));
-                  if ((++row % mod) == 0)
-                  {
-                      o.println("\trow=" + row + "/" + word2Nums.size() + " doc= " + doc);
-                      mod *= 2;
-                  }
-                  writer.addDocument(doc);
-              } // else degenerate
-          }
-          o.println( "Optimizing..");
-          writer.optimize();
-          writer.close();
-        } finally {
-          dir.close();
-        }
-    }
-
-    /**
-     * Given the 2 maps fills a document for 1 word.
-     */
-    private static int index(Map<String,List<String>> word2Nums, Map<String,List<String>> num2Words, String g, Document doc)
-        throws Throwable
-    {
-        List<String> keys = word2Nums.get(g); // get list of key#'s
-        Iterator<String> i2 = keys.iterator();
-
-        Set<String> already = new TreeSet<String>(); // keep them sorted
-
-        // pass 1: fill up 'already' with all words
-        while (i2.hasNext()) // for each key#
-        {
-            already.addAll(num2Words.get(i2.next())); // get list of words
-        }
-        int num = 0;
-        already.remove(g); // of course a word is it's own syn
-        Iterator<String> it = already.iterator();
-        while (it.hasNext())
-        {
-            String cur = it.next();
-            // don't store things like 'pit bull' -> 'american pit bull'
-            if (!isDecent(cur))
-            {
-                continue;
-            }
-            num++;
-			doc.add( new Field( F_SYN, cur, Field.Store.YES, Field.Index.NO));
-        }
-        return num;
-    }
-
-	/**
-	 *
-	 */
-    private static void usage()
-    {
-        o.println("\n\n" +
-            "java org.apache.lucene.wordnet.Syns2Index <prolog file> <index dir>\n\n");
-    }
-
-}
--- a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/package.html
+++ b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/package.html
@ -1,57 +0,0 @@
-<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-->
-<html>
-    <head>
-<title>WordNet Lucene Synonyms Integration</title>
-</head>
-<body>
-
-    This package uses synonyms defined by <a href="http://www.cogsci.princeton.edu/~wn/">WordNet</a>.
-    There are two methods: query expansion and analysis.
-    
-    Both methods first require you to download the <a href="http://www.cogsci.princeton.edu/2.0/WNprolog-2.0.tar.gz">WordNet prolog database</a>
-    Inside this archive is a file named wn_s.pl, which contains the WordNet synonyms. 
-    
-    <h1>Query Expansion Method</h1>
-    This method creates Lucene index storing the synonyms, which in turn can be used for query expansion.
-
-    You normally run {@link org.apache.lucene.wordnet.Syns2Index} once to build the query index/"database", and then call
-    {@link org.apache.lucene.wordnet.SynExpand#expand SynExpand.expand(...)} to expand a query.
-
-    <p>
-
-	<h3> Instructions </h3>
-	<ol>
-	<li> Invoke Syn2Index as appropriate to build a synonym index.
-	    It'll take 2 arguments, the path to wn_s.pl from the WordNet download, and the index name.
-   
-	 <li> Update your UI so that as appropriate you call SynExpand.expand(...) to expand user queries with synonyms.
-       </ol>
-    
-    <h1>Analysis Method</h1>
-    This method injects additional synonym tokens for tokens from a child {@link org.apache.lucene.analysis.TokenStream}.
-    
-    <h3> Instructions </h3>
-    <ol>
-    	<li>Create a {@link org.apache.lucene.wordnet.SynonymMap}, passing in the path to wn_s.pl
-    	<li>Add a {@link org.apache.lucene.wordnet.SynonymTokenFilter} to your analyzer. Note: SynonymTokenFilter should be after LowerCaseFilter, 
-    	because it expects terms to already be in lowercase.
-    </ol>
-
-</body>
-    </html>
--- a/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestSynonymTokenFilter.java
+++ b/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestSynonymTokenFilter.java
@ -1,119 +0,0 @@
-package org.apache.lucene.wordnet;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.Reader;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-
-public class TestSynonymTokenFilter extends BaseTokenStreamTestCase {
-  final String testFile = "testSynonyms.txt";
-  
-  public void testSynonyms() throws Exception {
-    SynonymMap map = new SynonymMap(getClass().getResourceAsStream(testFile));
-    /* all expansions */
-    Analyzer analyzer = new SynonymWhitespaceAnalyzer(map, Integer.MAX_VALUE);
-    assertAnalyzesTo(analyzer, "Lost in the woods",
-        new String[] { "lost", "in", "the", "woods", "forest", "wood" },
-        new int[] { 0, 5, 8, 12, 12, 12 },
-        new int[] { 4, 7, 11, 17, 17, 17 },
-        new int[] { 1, 1, 1, 1, 0, 0 });
-  }
-  
-  public void testSynonymsSingleQuote() throws Exception {
-    SynonymMap map = new SynonymMap(getClass().getResourceAsStream(testFile));
-    /* all expansions */
-    Analyzer analyzer = new SynonymWhitespaceAnalyzer(map, Integer.MAX_VALUE);
-    assertAnalyzesTo(analyzer, "king",
-        new String[] { "king", "baron" });
-  }
-  
-  public void testSynonymsLimitedAmount() throws Exception {
-    SynonymMap map = new SynonymMap(getClass().getResourceAsStream(testFile));
-    /* limit to one synonym expansion */
-    Analyzer analyzer = new SynonymWhitespaceAnalyzer(map, 1);
-    assertAnalyzesTo(analyzer, "Lost in the woods",
-        /* wood comes before forest due to 
-         * the input file, not lexicographic order
-         */
-        new String[] { "lost", "in", "the", "woods", "wood" },
-        new int[] { 0, 5, 8, 12, 12 },
-        new int[] { 4, 7, 11, 17, 17 },
-        new int[] { 1, 1, 1, 1, 0 });
-  }
-  
-  public void testReusableTokenStream() throws Exception {
-    SynonymMap map = new SynonymMap(getClass().getResourceAsStream(testFile));
-    /* limit to one synonym expansion */
-    Analyzer analyzer = new SynonymWhitespaceAnalyzer(map, 1);
-    assertAnalyzesToReuse(analyzer, "Lost in the woods",
-        new String[] { "lost", "in", "the", "woods", "wood" },
-        new int[] { 0, 5, 8, 12, 12 },
-        new int[] { 4, 7, 11, 17, 17 },
-        new int[] { 1, 1, 1, 1, 0 });
-    assertAnalyzesToReuse(analyzer, "My wolfish dog went to the forest",
-        new String[] { "my", "wolfish", "ravenous", "dog", "went", "to",
-          "the", "forest", "woods" },
-        new int[] { 0, 3, 3, 11, 15, 20, 23, 27, 27 },
-        new int[] { 2, 10, 10, 14, 19, 22, 26, 33, 33 },
-        new int[] { 1, 1, 0, 1, 1, 1, 1, 1, 0 });
-  }
-  
-  private class SynonymWhitespaceAnalyzer extends Analyzer {
-    private SynonymMap synonyms;
-    private int maxSynonyms;
-    
-    public SynonymWhitespaceAnalyzer(SynonymMap synonyms, int maxSynonyms) {
-      this.synonyms = synonyms;
-      this.maxSynonyms = maxSynonyms;
-    }
-    
-    @Override
-    public TokenStream tokenStream(String fieldName, Reader reader) {
-      TokenStream ts = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
-      ts = new SynonymTokenFilter(ts, synonyms, maxSynonyms);
-      return ts;
-    }
-    
-    private class SavedStreams {
-      Tokenizer source;
-      TokenStream result;
-    }
-    
-    @Override
-    public TokenStream reusableTokenStream(String fieldName, Reader reader)
-        throws IOException {
-      SavedStreams streams = (SavedStreams) getPreviousTokenStream();
-      if (streams == null) {
-        streams = new SavedStreams();
-        streams.source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
-        streams.result = new SynonymTokenFilter(streams.source, synonyms, maxSynonyms);
-        setPreviousTokenStream(streams);
-      } else {
-        streams.source.reset(reader);
-      }
-      return streams.result;
-    }
-  }
-  
-}
--- a/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java
+++ b/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java
@ -1,94 +0,0 @@
-package org.apache.lucene.wordnet;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util._TestUtil;
-
-public class TestWordnet extends LuceneTestCase {
-  private IndexSearcher searcher;
-  private Directory dir;
-  
-  String storePathName = new File(TEMP_DIR,"testLuceneWordnet").getAbsolutePath();
-  
-  @Override
-  public void setUp() throws Exception {
-    super.setUp();
-    // create a temporary synonym index
-    File testFile = getDataFile("testSynonyms.txt");
-    String commandLineArgs[] = { testFile.getAbsolutePath(), storePathName };
-    _TestUtil.rmDir(new File(storePathName));
-    
-    try {
-      Syns2Index.main(commandLineArgs);
-    } catch (Throwable t) { throw new RuntimeException(t); }
-    
-    dir = newFSDirectory(new File(storePathName));
-    searcher = new IndexSearcher(dir, true);
-  }
-  
-  public void testExpansion() throws IOException {
-    assertExpandsTo("woods", new String[] { "woods", "forest", "wood" });
-  }
-  
-  public void testExpansionSingleQuote() throws IOException {
-    assertExpandsTo("king", new String[] { "king", "baron" });
-  }
-  
-  private void assertExpandsTo(String term, String expected[]) throws IOException {
-    Query expandedQuery = SynExpand.expand(term, searcher, new 
-        MockAnalyzer(random), "field", 1F);
-    BooleanQuery expectedQuery = new BooleanQuery();
-    for (String t : expected)
-      expectedQuery.add(new TermQuery(new Term("field", t)), 
-          BooleanClause.Occur.SHOULD);
-    assertEquals(expectedQuery, expandedQuery);
-  }
-
-  @Override
-  public void tearDown() throws Exception {
-    if (searcher != null) {
-      searcher.close();
-    }
-    if (dir != null) {
-      dir.close();
-    }
-    rmDir(storePathName); // delete our temporary synonym index
-    super.tearDown();
-  }
-  
-  private void rmDir(String directory) {
-    File dir = new File(directory);
-    File[] files = dir.listFiles();
-    for (int i = 0; i < files.length; i++) {
-      files[i].delete();
-    }
-    dir.delete();
-  }
-}
--- a/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/testSynonyms.txt
+++ b/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/testSynonyms.txt
@ -1,9 +0,0 @@
-s(100000001,1,'woods',n,1,0).
-s(100000001,2,'wood',n,1,0).
-s(100000001,3,'forest',n,1,0).
-s(100000002,1,'wolfish',n,1,0).
-s(100000002,2,'ravenous',n,1,0).
-s(100000003,1,'king',n,1,1).
-s(100000003,2,'baron',n,1,1).
-s(100000004,1,'king''sevil',n,1,1).
-s(100000004,2,'meany',n,1,1).
--- a/lucene/src/java/org/apache/lucene/index/codecs/memory/MemoryCodec.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/memory/MemoryCodec.java
@ -95,9 +95,6 @@ public class MemoryCodec extends Codec {
      this.out = out;
      this.field = field;
      builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, outputs);
-
-      // The byte[] output we create can easily be > 255 bytes:
-      builder.setAllowArrayArcs(false);
    }

    private class PostingsWriter extends PostingsConsumer {
--- a/lucene/src/java/org/apache/lucene/store/ByteArrayDataOutput.java
+++ b/lucene/src/java/org/apache/lucene/store/ByteArrayDataOutput.java
@ -0,0 +1,52 @@
+package org.apache.lucene.store;
+
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * @lucene.experimental
+ */
+public class ByteArrayDataOutput extends DataOutput {
+  private byte[] bytes;
+
+  private int pos;
+  private int limit;
+
+  public ByteArrayDataOutput(byte[] bytes) {
+    reset(bytes);
+  }
+
+  public ByteArrayDataOutput(byte[] bytes, int offset, int len) {
+    reset(bytes, offset, len);
+  }
+
+  public ByteArrayDataOutput() {
+    reset(BytesRef.EMPTY_BYTES);
+  }
+
+  public void reset(byte[] bytes) {
+    reset(bytes, 0, bytes.length);
+  }
+  
+  public void reset(byte[] bytes, int offset, int len) {
+    this.bytes = bytes;
+    pos = offset;
+    limit = offset + len;
+  }
+  
+  public int getPosition() {
+    return pos;
+  }
+
+  @Override
+  public void writeByte(byte b) {
+    assert pos < limit;
+    bytes[pos++] = b;
+  }
+
+  @Override
+  public void writeBytes(byte[] b, int offset, int length) {
+    assert pos + length <= limit;
+    System.arraycopy(b, offset, bytes, pos, length);
+    pos += length;
+  }
+}
--- a/lucene/src/java/org/apache/lucene/util/CharsRef.java
+++ b/lucene/src/java/org/apache/lucene/util/CharsRef.java
@ -1,5 +1,7 @@
 package org.apache.lucene.util;

+import java.util.Comparator;
+
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
@ -167,7 +169,11 @@ public final class CharsRef implements Comparable<CharsRef>, CharSequence {
   *          the {@link CharsRef} to copy
   */
  public void copy(CharsRef other) {
+    if (chars == null) {
+      chars = new char[other.length];
+    } else {
      chars = ArrayUtil.grow(chars, other.length);
+    }
    System.arraycopy(other.chars, other.offset, chars, 0, other.length);
    length = other.length;
    offset = 0;
@ -213,4 +219,56 @@ public final class CharsRef implements Comparable<CharsRef>, CharSequence {
  public CharSequence subSequence(int start, int end) {
    return new CharsRef(chars, offset + start, offset + end - 1);
  }
+  
+  private final static Comparator<CharsRef> utf16SortedAsUTF8SortOrder = new UTF16SortedAsUTF8Comparator();
+  
+  public static Comparator<CharsRef> getUTF16SortedAsUTF8Comparator() {
+    return utf16SortedAsUTF8SortOrder;
+  }
+  
+  private static class UTF16SortedAsUTF8Comparator implements Comparator<CharsRef> {
+    // Only singleton
+    private UTF16SortedAsUTF8Comparator() {};
+
+    public int compare(CharsRef a, CharsRef b) {
+      if (a == b)
+        return 0;
+
+      final char[] aChars = a.chars;
+      int aUpto = a.offset;
+      final char[] bChars = b.chars;
+      int bUpto = b.offset;
+
+      final int aStop = aUpto + Math.min(a.length, b.length);
+
+      while (aUpto < aStop) {
+        char aChar = aChars[aUpto++];
+        char bChar = bChars[bUpto++];
+        if (aChar != bChar) {
+          // http://icu-project.org/docs/papers/utf16_code_point_order.html
+          
+          /* aChar != bChar, fix up each one if they're both in or above the surrogate range, then compare them */
+          if (aChar >= 0xd800 && bChar >= 0xd800) {
+            if (aChar >= 0xe000) {
+              aChar -= 0x800;
+            } else {
+              aChar += 0x2000;
+            }
+            
+            if (bChar >= 0xe000) {
+              bChar -= 0x800;
+            } else {
+              bChar += 0x2000;
+            }
+          }
+          
+          /* now aChar and bChar are in code point order */
+          return (int)aChar - (int)bChar; /* int must be 32 bits wide */
+        }
+      }
+
+      // One is a prefix of the other, or, they are equal:
+      return a.length - b.length;
+    }
+  }
 }
--- a/lucene/src/java/org/apache/lucene/util/fst/FST.java
+++ b/lucene/src/java/org/apache/lucene/util/fst/FST.java
@ -71,7 +71,11 @@ public class FST<T> {
  // Increment version to change it
  private final static String FILE_FORMAT_NAME = "FST";
  private final static int VERSION_START = 0;
-  private final static int VERSION_CURRENT = VERSION_START;
+
+  /** Changed numBytesPerArc for array'd case from byte to int. */
+  private final static int VERSION_INT_NUM_BYTES_PER_ARC = 1;
+
+  private final static int VERSION_CURRENT = VERSION_INT_NUM_BYTES_PER_ARC;

  // Never serialized; just used to represent the virtual
  // final node w/ no arcs:
@ -106,6 +110,8 @@ public class FST<T> {

  private boolean allowArrayArcs = true;

+  private Arc<T> cachedRootArcs[];
+
  public final static class Arc<T> {
    public int label;
    public T output;
@ -113,7 +119,7 @@ public class FST<T> {
    int target;

    byte flags;
-    T nextFinalOutput;
+    public T nextFinalOutput;
    int nextArc;

    // This is non-zero if current arcs are fixed array:
@ -176,7 +182,7 @@ public class FST<T> {
  public FST(DataInput in, Outputs<T> outputs) throws IOException {
    this.outputs = outputs;
    writer = null;
-    CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_START, VERSION_START);
+    CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_INT_NUM_BYTES_PER_ARC, VERSION_INT_NUM_BYTES_PER_ARC);
    if (in.readByte() == 1) {
      // accepts empty string
      int numBytes = in.readVInt();
@ -209,6 +215,8 @@ public class FST<T> {
    bytes = new byte[in.readVInt()];
    in.readBytes(bytes, 0, bytes.length);
    NO_OUTPUT = outputs.getNoOutput();
+
+    cacheRootArcs();
  }

  public INPUT_TYPE getInputType() {
@ -220,7 +228,7 @@ public class FST<T> {
    return bytes.length;
  }

-  void finish(int startNode) {
+  void finish(int startNode) throws IOException {
    if (startNode == FINAL_END_NODE && emptyOutput != null) {
      startNode = 0;
    }
@ -231,6 +239,32 @@ public class FST<T> {
    System.arraycopy(bytes, 0, finalBytes, 0, writer.posWrite);
    bytes = finalBytes;
    this.startNode = startNode;
+
+    cacheRootArcs();
+  }
+
+  // Caches first 128 labels
+  @SuppressWarnings("unchecked")
+  private void cacheRootArcs() throws IOException {
+    cachedRootArcs = (FST.Arc<T>[]) new FST.Arc[0x80];
+    final FST.Arc<T> arc = new FST.Arc<T>();
+    getFirstArc(arc);
+    final BytesReader in = getBytesReader(0);
+    if (targetHasArcs(arc)) {
+      readFirstRealArc(arc.target, arc);
+      while(true) {
+        assert arc.label != END_LABEL;
+        if (arc.label < cachedRootArcs.length) {
+          cachedRootArcs[arc.label] = new Arc<T>().copyFrom(arc);
+        } else {
+          break;
+        }
+        if (arc.isLast()) {
+          break;
+        }
+        readNextRealArc(arc, in);
+      }
+    }
  }

  void setEmptyOutput(T v) throws IOException {
@ -345,8 +379,9 @@ public class FST<T> {
      writer.writeByte((byte) BIT_ARCS_AS_FIXED_ARRAY);
      writer.writeVInt(node.numArcs);
      // placeholder -- we'll come back and write the number
-      // of bytes per arc here:
-      writer.writeByte((byte) 0);
+      // of bytes per arc (int) here:
+      // TODO: we could make this a vInt instead
+      writer.writeInt(0);
      fixedArrayStart = writer.posWrite;
      //System.out.println("  do fixed arcs array arcsStart=" + fixedArrayStart);
    } else {
@ -421,15 +456,21 @@ public class FST<T> {
      }
    }

+    // TODO: if arc'd arrays will be "too wasteful" by some
+    // measure, eg if arcs have vastly different sized
+    // outputs, then we should selectively disable array for
+    // such cases
+
    if (doFixedArray) {
      assert maxBytesPerArc > 0;
      // 2nd pass just "expands" all arcs to take up a fixed
      // byte size
      final int sizeNeeded = fixedArrayStart + node.numArcs * maxBytesPerArc;
      bytes = ArrayUtil.grow(bytes, sizeNeeded);
-      if (maxBytesPerArc > 255) {
-        throw new IllegalStateException("max arc size is too large (" + maxBytesPerArc + "); disable array arcs by calling Builder.setAllowArrayArcs(false)");
-      }
+      // TODO: we could make this a vInt instead
+      bytes[fixedArrayStart-4] = (byte) (maxBytesPerArc >> 24);
+      bytes[fixedArrayStart-3] = (byte) (maxBytesPerArc >> 16);
+      bytes[fixedArrayStart-2] = (byte) (maxBytesPerArc >> 8);
      bytes[fixedArrayStart-1] = (byte) maxBytesPerArc;

      // expand the arcs in place, backwards
@ -502,7 +543,7 @@ public class FST<T> {
      if (arc.flag(BIT_ARCS_AS_FIXED_ARRAY)) {
        // array: jump straight to end
        arc.numArcs = in.readVInt();
-        arc.bytesPerArc = in.readByte() & 0xFF;
+        arc.bytesPerArc = in.readInt();
        //System.out.println("  array numArcs=" + arc.numArcs + " bpa=" + arc.bytesPerArc);
        arc.posArcsStart = in.pos;
        arc.arcIdx = arc.numArcs - 2;
@ -528,7 +569,7 @@ public class FST<T> {
        }
        arc.nextArc = in.pos+1;
      }
-      readNextRealArc(arc);
+      readNextRealArc(arc, in);
      assert arc.isLast();
      return arc;
    }
@ -572,7 +613,7 @@ public class FST<T> {
      //System.out.println("  fixedArray");
      // this is first arc in a fixed-array
      arc.numArcs = in.readVInt();
-      arc.bytesPerArc = in.readByte() & 0xFF;
+      arc.bytesPerArc = in.readInt();
      arc.arcIdx = -1;
      arc.nextArc = arc.posArcsStart = in.pos;
      //System.out.println("  bytesPer=" + arc.bytesPerArc + " numArcs=" + arc.numArcs + " arcsStart=" + pos);
@ -580,7 +621,7 @@ public class FST<T> {
      arc.nextArc = address;
      arc.bytesPerArc = 0;
    }
-    return readNextRealArc(arc);
+    return readNextRealArc(arc, in);
  }

  /**
@ -609,7 +650,7 @@ public class FST<T> {
      }
      return readFirstRealArc(arc.nextArc, arc);
    } else {
-      return readNextRealArc(arc);
+      return readNextRealArc(arc, getBytesReader(0));
    }
  }

@ -627,7 +668,7 @@ public class FST<T> {
        //System.out.println("    nextArc fake array");
        in.pos--;
        in.readVInt();
-        in.readByte();
+        in.readInt();
      }
    } else {
      if (arc.bytesPerArc != 0) {
@ -645,17 +686,16 @@ public class FST<T> {
    return readLabel(in);
  }

-  Arc<T> readNextRealArc(Arc<T> arc) throws IOException {
+  Arc<T> readNextRealArc(Arc<T> arc, final BytesReader in) throws IOException {
    // this is a continuing arc in a fixed array
-    final BytesReader in;
    if (arc.bytesPerArc != 0) {
      // arcs are at fixed entries
      arc.arcIdx++;
      assert arc.arcIdx < arc.numArcs;
-      in = getBytesReader(arc.posArcsStart - arc.arcIdx*arc.bytesPerArc);
+      in.pos = arc.posArcsStart - arc.arcIdx*arc.bytesPerArc;
    } else {
      // arcs are packed
-      in = getBytesReader(arc.nextArc);
+      in.pos = arc.nextArc;
    }
    arc.flags = in.readByte();
    arc.label = readLabel(in);
@ -701,6 +741,17 @@ public class FST<T> {
  /** Finds an arc leaving the incoming arc, replacing the arc in place.
   *  This returns null if the arc was not found, else the incoming arc. */
  public Arc<T> findTargetArc(int labelToMatch, Arc<T> follow, Arc<T> arc) throws IOException {
+    assert cachedRootArcs != null;
+    // Short-circuit if this arc is in the root arc cache:
+    if (follow.target == startNode && labelToMatch != END_LABEL && labelToMatch < cachedRootArcs.length) {
+      final Arc<T> result = cachedRootArcs[labelToMatch];
+      if (result == null) {
+        return result;
+      } else {
+        arc.copyFrom(result);
+        return arc;
+      }
+    }
 
    if (labelToMatch == END_LABEL) {
      if (follow.isFinal()) {
@ -726,14 +777,18 @@ public class FST<T> {
    // reusable stuff eg BytesReader:
    final BytesReader in = getBytesReader(follow.target);

+    // System.out.println("fta label=" + (char) labelToMatch);
+
    if ((in.readByte() & BIT_ARCS_AS_FIXED_ARRAY) != 0) {
      // Arcs are full array; do binary search:
      arc.numArcs = in.readVInt();
-      arc.bytesPerArc = in.readByte() & 0xFF;
+      //System.out.println("  bs " + arc.numArcs);
+      arc.bytesPerArc = in.readInt();
      arc.posArcsStart = in.pos;
      int low = 0;
      int high = arc.numArcs-1;
      while (low <= high) {
+        //System.out.println("    cycle");
        int mid = (low + high) >>> 1;
        in.pos = arc.posArcsStart - arc.bytesPerArc*mid - 1;
        int midLabel = readLabel(in);
@ -744,7 +799,8 @@ public class FST<T> {
          high = mid - 1;
        else {
          arc.arcIdx = mid-1;
-          return readNextRealArc(arc);
+          //System.out.println("    found!");
+          return readNextRealArc(arc, in);
        }
      }

@ -754,7 +810,12 @@ public class FST<T> {
    // Linear scan
    readFirstTargetArc(follow, arc);
    while(true) {
+      //System.out.println("  non-bs cycle");
+      // TODO: we should fix this code to not have to create
+      // object for the output of every arc we scan... only
+      // for the matching arc, if found
      if (arc.label == labelToMatch) {
+        //System.out.println("    found!");
        return arc;
      } else if (arc.label > labelToMatch) {
        return null;
@ -863,7 +924,7 @@ public class FST<T> {
  }

  // Non-static: reads byte[] from FST
-  class BytesReader extends DataInput {
+  final class BytesReader extends DataInput {
    int pos;

    public BytesReader(int pos) {
--- a/lucene/src/java/org/apache/lucene/util/fst/FSTEnum.java
+++ b/lucene/src/java/org/apache/lucene/util/fst/FSTEnum.java
@ -170,7 +170,7 @@ abstract class FSTEnum<T> {
        if (found) {
          // Match
          arc.arcIdx = mid-1;
-          fst.readNextRealArc(arc);
+          fst.readNextRealArc(arc, in);
          assert arc.arcIdx == mid;
          assert arc.label == targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel + " mid=" + mid;
          output[upto] = fst.outputs.add(output[upto-1], arc.output);
@ -185,7 +185,7 @@ abstract class FSTEnum<T> {
        } else if (low == arc.numArcs) {
          // Dead end
          arc.arcIdx = arc.numArcs-2;
-          fst.readNextRealArc(arc);
+          fst.readNextRealArc(arc, in);
          assert arc.isLast();
          // Dead end (target is after the last arc);
          // rollback to last fork then push
@ -205,7 +205,7 @@ abstract class FSTEnum<T> {
          }
        } else {
          arc.arcIdx = (low > high ? low : high)-1;
-          fst.readNextRealArc(arc);
+          fst.readNextRealArc(arc, in);
          assert arc.label > targetLabel;
          pushFirst();
          return;
@ -309,7 +309,7 @@ abstract class FSTEnum<T> {
          // Match -- recurse
          //System.out.println("  match!  arcIdx=" + mid);
          arc.arcIdx = mid-1;
-          fst.readNextRealArc(arc);
+          fst.readNextRealArc(arc, in);
          assert arc.arcIdx == mid;
          assert arc.label == targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel + " mid=" + mid;
          output[upto] = fst.outputs.add(output[upto-1], arc.output);
@ -352,7 +352,7 @@ abstract class FSTEnum<T> {
          // There is a floor arc:
          arc.arcIdx = (low > high ? high : low)-1;
          //System.out.println(" hasFloor arcIdx=" + (arc.arcIdx+1));
-          fst.readNextRealArc(arc);
+          fst.readNextRealArc(arc, in);
          assert arc.isLast() || fst.readNextArcLabel(arc) > targetLabel;
          assert arc.label < targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel;
          pushLast();
--- a/lucene/src/java/org/apache/lucene/util/fst/NodeHash.java
+++ b/lucene/src/java/org/apache/lucene/util/fst/NodeHash.java
@ -35,6 +35,7 @@ final class NodeHash<T> {
  }

  private boolean nodesEqual(Builder.UnCompiledNode<T> node, int address) throws IOException {
+    final FST<T>.BytesReader in = fst.getBytesReader(0);
    fst.readFirstRealArc(address, scratchArc);
    if (scratchArc.bytesPerArc != 0 && node.numArcs != scratchArc.numArcs) {
      return false;
@ -56,7 +57,7 @@ final class NodeHash<T> {
          return false;
        }
      }
-      fst.readNextRealArc(scratchArc);
+      fst.readNextRealArc(scratchArc, in);
    }

    return false;
@ -87,6 +88,7 @@ final class NodeHash<T> {
  // hash code for a frozen node
  private int hash(int node) throws IOException {
    final int PRIME = 31;
+    final FST<T>.BytesReader in = fst.getBytesReader(0);
    //System.out.println("hash frozen");
    int h = 0;
    fst.readFirstRealArc(node, scratchArc);
@ -102,7 +104,7 @@ final class NodeHash<T> {
      if (scratchArc.isLast()) {
        break;
      }
-      fst.readNextRealArc(scratchArc);
+      fst.readNextRealArc(scratchArc, in);
    }
    //System.out.println("  ret " + (h&Integer.MAX_VALUE));
    return h & Integer.MAX_VALUE;
--- a/lucene/src/site/build/site/contributions.html
+++ b/lucene/src/site/build/site/contributions.html
@ -168,9 +168,6 @@ document.write("Last Published: " + document.lastModified);
 <a href="api/contrib-spellchecker/index.html">Spellchecker</a>
 </div>
 <div class="menuitem">
-<a href="api/contrib-wordnet/index.html">Wordnet</a>
-</div>
-<div class="menuitem">
 <a href="api/contrib-xml-query-parser/index.html">XML Query Parser</a>
 </div>
 </div>
--- a/lucene/src/site/build/site/demo.html
+++ b/lucene/src/site/build/site/demo.html
@ -168,9 +168,6 @@ document.write("Last Published: " + document.lastModified);
 <a href="api/contrib-spellchecker/index.html">Spellchecker</a>
 </div>
 <div class="menuitem">
-<a href="api/contrib-wordnet/index.html">Wordnet</a>
-</div>
-<div class="menuitem">
 <a href="api/contrib-xml-query-parser/index.html">XML Query Parser</a>
 </div>
 </div>
--- a/lucene/src/site/build/site/demo2.html
+++ b/lucene/src/site/build/site/demo2.html
@ -168,9 +168,6 @@ document.write("Last Published: " + document.lastModified);
 <a href="api/contrib-spellchecker/index.html">Spellchecker</a>
 </div>
 <div class="menuitem">
-<a href="api/contrib-wordnet/index.html">Wordnet</a>
-</div>
-<div class="menuitem">
 <a href="api/contrib-xml-query-parser/index.html">XML Query Parser</a>
 </div>
 </div>
--- a/lucene/src/site/build/site/fileformats.html
+++ b/lucene/src/site/build/site/fileformats.html
@ -168,9 +168,6 @@ document.write("Last Published: " + document.lastModified);
 <a href="api/contrib-spellchecker/index.html">Spellchecker</a>
 </div>
 <div class="menuitem">
-<a href="api/contrib-wordnet/index.html">Wordnet</a>
-</div>
-<div class="menuitem">
 <a href="api/contrib-xml-query-parser/index.html">XML Query Parser</a>
 </div>
 </div>
--- a/lucene/src/site/build/site/gettingstarted.html
+++ b/lucene/src/site/build/site/gettingstarted.html
@ -168,9 +168,6 @@ document.write("Last Published: " + document.lastModified);
 <a href="api/contrib-spellchecker/index.html">Spellchecker</a>
 </div>
 <div class="menuitem">
-<a href="api/contrib-wordnet/index.html">Wordnet</a>
-</div>
-<div class="menuitem">
 <a href="api/contrib-xml-query-parser/index.html">XML Query Parser</a>
 </div>
 </div>
--- a/lucene/src/site/build/site/index.html
+++ b/lucene/src/site/build/site/index.html
@ -166,9 +166,6 @@ document.write("Last Published: " + document.lastModified);
 <a href="api/contrib-spellchecker/index.html">Spellchecker</a>
 </div>
 <div class="menuitem">
-<a href="api/contrib-wordnet/index.html">Wordnet</a>
-</div>
-<div class="menuitem">
 <a href="api/contrib-xml-query-parser/index.html">XML Query Parser</a>
 </div>
 </div>
--- a/lucene/src/site/build/site/linkmap.html
+++ b/lucene/src/site/build/site/linkmap.html
@ -166,9 +166,6 @@ document.write("Last Published: " + document.lastModified);
 <a href="api/contrib-spellchecker/index.html">Spellchecker</a>
 </div>
 <div class="menuitem">
-<a href="api/contrib-wordnet/index.html">Wordnet</a>
-</div>
-<div class="menuitem">
 <a href="api/contrib-xml-query-parser/index.html">XML Query Parser</a>
 </div>
 </div>
@ -359,12 +356,6 @@ document.write("Last Published: " + document.lastModified);
 </li>
 </ul>						    
 		    
-<ul>
-<li>
-<a href="api/contrib-wordnet/index.html">Wordnet</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>javadoc-contrib-wordnet</em>
-</li>
-</ul>			
-		    
 <ul>
 <li>
 <a href="api/contrib-xml-query-parser/index.html">XML Query Parser</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>javadoc-contrib-xml-query-parser</em>
--- a/lucene/src/site/build/site/lucene-contrib/index.html
+++ b/lucene/src/site/build/site/lucene-contrib/index.html
@ -168,9 +168,6 @@ document.write("Last Published: " + document.lastModified);
 <a href="../api/contrib-spellchecker/index.html">Spellchecker</a>
 </div>
 <div class="menuitem">
-<a href="../api/contrib-wordnet/index.html">Wordnet</a>
-</div>
-<div class="menuitem">
 <a href="../api/contrib-xml-query-parser/index.html">XML Query Parser</a>
 </div>
 </div>
@ -263,9 +260,6 @@ document.write("Last Published: " + document.lastModified);
 <a href="#spellchecker">spellchecker</a>
 </li>
 <li>
-<a href="#wordnet">wordnet</a>
-</li>
-<li>
 <a href="#xml-query-parser">xml-query-parser</a>
 </li>
 </ul>
@ -375,12 +369,7 @@ document.write("Last Published: " + document.lastModified);
 <p>Provides tools for spellchecking and suggestions with Lucene.</p>
 <p>See <a href="../api/contrib-spellchecker/index.html">spellchecker javadoc</a>
 </p>
-<a name="N100DE"></a><a name="wordnet"></a>
-<h3 class="boxed">wordnet</h3>
-<p>Tools to help utilize wordnet synonyms with Lucene</p>
-<p>See <a href="../api/contrib-wordnet/index.html">wordnet javadoc</a>
-</p>
-<a name="N100ED"></a><a name="xml-query-parser"></a>
+<a name="N100DE"></a><a name="xml-query-parser"></a>
 <h3 class="boxed">xml-query-parser</h3>
 <p>A QueryParser that can read queries written in an XML format.</p>
 <p>See <a href="../api/contrib-wordnet/index.html">xml-query-parser javadoc</a>
--- a/lucene/src/site/build/site/queryparsersyntax.html
+++ b/lucene/src/site/build/site/queryparsersyntax.html
@ -168,9 +168,6 @@ document.write("Last Published: " + document.lastModified);
 <a href="api/contrib-spellchecker/index.html">Spellchecker</a>
 </div>
 <div class="menuitem">
-<a href="api/contrib-wordnet/index.html">Wordnet</a>
-</div>
-<div class="menuitem">
 <a href="api/contrib-xml-query-parser/index.html">XML Query Parser</a>
 </div>
 </div>
--- a/lucene/src/site/build/site/scoring.html
+++ b/lucene/src/site/build/site/scoring.html
@ -168,9 +168,6 @@ document.write("Last Published: " + document.lastModified);
 <a href="api/contrib-spellchecker/index.html">Spellchecker</a>
 </div>
 <div class="menuitem">
-<a href="api/contrib-wordnet/index.html">Wordnet</a>
-</div>
-<div class="menuitem">
 <a href="api/contrib-xml-query-parser/index.html">XML Query Parser</a>
 </div>
 </div>
--- a/lucene/src/site/build/site/systemrequirements.html
+++ b/lucene/src/site/build/site/systemrequirements.html
@ -166,9 +166,6 @@ document.write("Last Published: " + document.lastModified);
 <a href="api/contrib-spellchecker/index.html">Spellchecker</a>
 </div>
 <div class="menuitem">
-<a href="api/contrib-wordnet/index.html">Wordnet</a>
-</div>
-<div class="menuitem">
 <a href="api/contrib-xml-query-parser/index.html">XML Query Parser</a>
 </div>
 </div>
--- a/lucene/src/site/src/documentation/content/xdocs/lucene-contrib/index.xml
+++ b/lucene/src/site/src/documentation/content/xdocs/lucene-contrib/index.xml
@ -106,11 +106,6 @@
                <p>See <a href="../api/contrib-spellchecker/index.html">spellchecker javadoc</a></p>
            </section>                     
            
-            <section id="wordnet"><title>wordnet</title>
-                <p>Tools to help utilize wordnet synonyms with Lucene</p>
-                <p>See <a href="../api/contrib-wordnet/index.html">wordnet javadoc</a></p>
-            </section>  
-            
            <section id="xml-query-parser"><title>xml-query-parser</title>
                <p>A QueryParser that can read queries written in an XML format.</p>
                <p>See <a href="../api/contrib-wordnet/index.html">xml-query-parser javadoc</a></p>
--- a/lucene/src/site/src/documentation/content/xdocs/site.xml
+++ b/lucene/src/site/src/documentation/content/xdocs/site.xml
@ -66,7 +66,6 @@ See http://forrest.apache.org/docs/linking.html for more info
 		    <javadoc-contrib-remote label="Remote" href="ext:javadocs-contrib-remote"/>
 		    <javadoc-contrib-spatial label="Spatial" href="ext:javadocs-contrib-spatial"/>
 		    <javadoc-contrib-spellchecker label="Spellchecker" href="ext:javadocs-contrib-spellchecker"/>						    
-			<javadoc-contrib-wordnet label="Wordnet" href="ext:javadocs-contrib-wordnet"/>			
 		    <javadoc-contrib-xml-query-parser label="XML Query Parser" href="ext:javadocs-contrib-xml-query-parser"/>			
 		 </javadoc-contrib>
 	  </javadoc>
@ -106,7 +105,6 @@ See http://forrest.apache.org/docs/linking.html for more info
 	<javadocs-contrib-remote href="api/contrib-remote/index.html"/>
 	<javadocs-contrib-spatial href="api/contrib-spatial/index.html"/>
 	<javadocs-contrib-spellchecker href="api/contrib-spellchecker/index.html"/>
-	<javadocs-contrib-wordnet href="api/contrib-wordnet/index.html"/>
 	<javadocs-contrib-xml-query-parser href="api/contrib-xml-query-parser/index.html"/>
 	
    <forrest href="http://forrest.apache.org/">
--- a/lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
+++ b/lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
@ -261,6 +261,10 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
          text = _TestUtil.randomUnicodeString(random, maxWordLength);
      }

+      if (VERBOSE) {
+        System.out.println("NOTE: BaseTokenStreamTestCase: get first token stream now text=" + text);
+      }
+
      TokenStream ts = a.reusableTokenStream("dummy", new StringReader(text));
      assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class));
      CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
@ -286,6 +290,9 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
      ts.close();
      // verify reusing is "reproducable" and also get the normal tokenstream sanity checks
      if (!tokens.isEmpty()) {
+        if (VERBOSE) {
+          System.out.println("NOTE: BaseTokenStreamTestCase: re-run analysis");
+        }
        if (typeAtt != null && posIncAtt != null && offsetAtt != null) {
          // offset + pos + type
          assertAnalyzesToReuse(a, text, 
--- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterCommit.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterCommit.java
@ -31,6 +31,7 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TermQuery;
@ -166,6 +167,13 @@ public class TestIndexWriterCommit extends LuceneTestCase {
   * measure max temp disk space used.
   */
  public void testCommitOnCloseDiskUsage() throws IOException {
+    // MemoryCodec, since it uses FST, is not necessarily
+    // "additive", ie if you add up N small FSTs, then merge
+    // them, the merged result can easily be larger than the
+    // sum because the merged FST may use array encoding for
+    // some arcs (which uses more space):
+    assumeFalse("This test cannot run with Memory codec", CodecProvider.getDefault().getFieldCodec("id").equals("Memory"));
+    assumeFalse("This test cannot run with Memory codec", CodecProvider.getDefault().getFieldCodec("content").equals("Memory"));
    MockDirectoryWrapper dir = newDirectory();
    Analyzer analyzer;
    if (random.nextBoolean()) {
--- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterOnDiskFull.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterOnDiskFull.java
@ -23,6 +23,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TermQuery;
@ -142,6 +143,14 @@ public class TestIndexWriterOnDiskFull extends LuceneTestCase {
   */
  public void testAddIndexOnDiskFull() throws IOException
  {
+    // MemoryCodec, since it uses FST, is not necessarily
+    // "additive", ie if you add up N small FSTs, then merge
+    // them, the merged result can easily be larger than the
+    // sum because the merged FST may use array encoding for
+    // some arcs (which uses more space):
+    assumeFalse("This test cannot run with Memory codec", CodecProvider.getDefault().getFieldCodec("id").equals("Memory"));
+    assumeFalse("This test cannot run with Memory codec", CodecProvider.getDefault().getFieldCodec("content").equals("Memory"));
+
    int START_COUNT = 57;
    int NUM_DIR = TEST_NIGHTLY ? 50 : 5;
    int END_COUNT = START_COUNT + NUM_DIR* (TEST_NIGHTLY ? 25 : 5);
--- a/lucene/src/test/org/apache/lucene/util/TestCharsRef.java
+++ b/lucene/src/test/org/apache/lucene/util/TestCharsRef.java
@ -0,0 +1,41 @@
+package org.apache.lucene.util;
+
+import java.util.Arrays;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TestCharsRef extends LuceneTestCase {
+  public void testUTF16InUTF8Order() {
+    final int numStrings = atLeast(1000);
+    BytesRef utf8[] = new BytesRef[numStrings];
+    CharsRef utf16[] = new CharsRef[numStrings];
+    
+    for (int i = 0; i < numStrings; i++) {
+      String s = _TestUtil.randomUnicodeString(random);
+      utf8[i] = new BytesRef(s);
+      utf16[i] = new CharsRef(s);
+    }
+    
+    Arrays.sort(utf8);
+    Arrays.sort(utf16, CharsRef.getUTF16SortedAsUTF8Comparator());
+    
+    for (int i = 0; i < numStrings; i++) {
+      assertEquals(utf8[i].utf8ToString(), utf16[i].toString());
+    }
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SolrSynonymParser.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SolrSynonymParser.java
@ -0,0 +1,179 @@
+package org.apache.lucene.analysis.synonym;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.LineNumberReader;
+import java.io.Reader;
+import java.text.ParseException;
+import java.util.ArrayList;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.util.CharsRef;
+
+/**
+ * Parser for the Solr synonyms format.
+ * <ol>
+ *   <li> Blank lines and lines starting with '#' are comments.
+ *   <li> Explicit mappings match any token sequence on the LHS of "=>"
+ *        and replace with all alternatives on the RHS.  These types of mappings
+ *        ignore the expand parameter in the constructor.
+ *        Example:
+ *        <blockquote>i-pod, i pod => ipod</blockquote>
+ *   <li> Equivalent synonyms may be separated with commas and give
+ *        no explicit mapping.  In this case the mapping behavior will
+ *        be taken from the expand parameter in the constructor.  This allows
+ *        the same synonym file to be used in different synonym handling strategies.
+ *        Example:
+ *        <blockquote>ipod, i-pod, i pod</blockquote>
+ * 
+ *   <li> Multiple synonym mapping entries are merged.
+ *        Example:
+ *        <blockquote>
+ *         foo => foo bar<br>
+ *         foo => baz<br><br>
+ *         is equivalent to<br><br>
+ *         foo => foo bar, baz
+ *        </blockquote>
+ *  </ol>
+ * @lucene.experimental
+ */
+public class SolrSynonymParser extends SynonymMap.Builder {
+  private final boolean expand;
+  private final Analyzer analyzer;
+  
+  public SolrSynonymParser(boolean dedup, boolean expand, Analyzer analyzer) {
+    super(dedup);
+    this.expand = expand;
+    this.analyzer = analyzer;
+  }
+  
+  public void add(Reader in) throws IOException, ParseException {
+    LineNumberReader br = new LineNumberReader(in);
+    try {
+      addInternal(br);
+    } catch (IllegalArgumentException e) {
+      ParseException ex = new ParseException("Invalid synonym rule at line " + br.getLineNumber(), 0);
+      ex.initCause(e);
+      throw ex;
+    } finally {
+      br.close();
+    }
+  }
+  
+  private void addInternal(BufferedReader in) throws IOException {
+    String line = null;
+    while ((line = in.readLine()) != null) {
+      if (line.length() == 0 || line.charAt(0) == '#') {
+        continue; // ignore empty lines and comments
+      }
+      
+      CharsRef inputs[];
+      CharsRef outputs[];
+      
+      // TODO: we could process this more efficiently.
+      String sides[] = split(line, "=>");
+      if (sides.length > 1) { // explicit mapping
+        if (sides.length != 2) {
+          throw new IllegalArgumentException("more than one explicit mapping specified on the same line");
+        }
+        String inputStrings[] = split(sides[0], ",");
+        inputs = new CharsRef[inputStrings.length];
+        for (int i = 0; i < inputs.length; i++) {
+          inputs[i] = analyze(analyzer, unescape(inputStrings[i]).trim(), new CharsRef());
+        }
+        
+        String outputStrings[] = split(sides[1], ",");
+        outputs = new CharsRef[outputStrings.length];
+        for (int i = 0; i < outputs.length; i++) {
+          outputs[i] = analyze(analyzer, unescape(outputStrings[i]).trim(), new CharsRef());
+        }
+      } else {
+        String inputStrings[] = split(line, ",");
+        inputs = new CharsRef[inputStrings.length];
+        for (int i = 0; i < inputs.length; i++) {
+          inputs[i] = analyze(analyzer, unescape(inputStrings[i]).trim(), new CharsRef());
+        }
+        if (expand) {
+          outputs = inputs;
+        } else {
+          outputs = new CharsRef[1];
+          outputs[0] = inputs[0];
+        }
+      }
+      
+      // currently we include the term itself in the map,
+      // and use includeOrig = false always.
+      // this is how the existing filter does it, but its actually a bug,
+      // especially if combined with ignoreCase = true
+      for (int i = 0; i < inputs.length; i++) {
+        for (int j = 0; j < outputs.length; j++) {
+          add(inputs[i], outputs[j], false);
+        }
+      }
+    }
+  }
+  
+  private static String[] split(String s, String separator) {
+    ArrayList<String> list = new ArrayList<String>(2);
+    StringBuilder sb = new StringBuilder();
+    int pos=0, end=s.length();
+    while (pos < end) {
+      if (s.startsWith(separator,pos)) {
+        if (sb.length() > 0) {
+          list.add(sb.toString());
+          sb=new StringBuilder();
+        }
+        pos+=separator.length();
+        continue;
+      }
+
+      char ch = s.charAt(pos++);
+      if (ch=='\\') {
+        sb.append(ch);
+        if (pos>=end) break;  // ERROR, or let it go?
+        ch = s.charAt(pos++);
+      }
+
+      sb.append(ch);
+    }
+
+    if (sb.length() > 0) {
+      list.add(sb.toString());
+    }
+
+    return list.toArray(new String[list.size()]);
+  }
+  
+  private String unescape(String s) {
+    if (s.indexOf("\\") >= 0) {
+      StringBuilder sb = new StringBuilder();
+      for (int i = 0; i < s.length(); i++) {
+        char ch = s.charAt(i);
+        if (ch == '\\' && i < s.length() - 1) {
+          sb.append(s.charAt(++i));
+        } else {
+          sb.append(ch);
+        }
+      }
+      return sb.toString();
+    }
+    return s;
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
@ -1,3 +1,5 @@
+package org.apache.lucene.analysis.synonym;
+
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
@ -15,245 +17,550 @@
 * limitations under the License.
 */

-package org.apache.lucene.analysis.synonym;
+import java.io.IOException;

-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.fst.FST;

-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.LinkedList;
-
-/** SynonymFilter handles multi-token synonyms with variable position increment offsets.
- * <p>
- * The matched tokens from the input stream may be optionally passed through (includeOrig=true)
- * or discarded.  If the original tokens are included, the position increments may be modified
- * to retain absolute positions after merging with the synonym tokenstream.
- * <p>
- * Generated synonyms will start at the same position as the first matched source token.
+/**
+ * Matches single or multi word synonyms in a token stream.
+ * This token stream cannot properly handle position
+ * increments != 1, ie, you should place this filter before
+ * filtering out stop words.
+ * 
+ * <p>Note that with the current implementation, parsing is
+ * greedy, so whenever multiple parses would apply, the rule
+ * starting the earliest and parsing the most tokens wins.
+ * For example if you have these rules:
+ *      
+ * <pre>
+ *   a -> x
+ *   a b -> y
+ *   b c d -> z
+ * </pre>
+ *
+ * Then input <code>a b c d e</code> parses to <code>y b c
+ * d</code>, ie the 2nd rule "wins" because it started
+ * earliest and matched the most input tokens of other rules
+ * starting at that point.</p>
+ *
+ * <p>A future improvement to this filter could allow
+ * non-greedy parsing, such that the 3rd rule would win, and
+ * also separately allow multiple parses, such that all 3
+ * rules would match, perhaps even on a rule by rule
+ * basis.</p>
+ *
+ * <p><b>NOTE</b>: when a match occurs, the output tokens
+ * associated with the matching rule are "stacked" on top of
+ * the input stream (if the rule had
+ * <code>keepOrig=true</code>) and also on top of aother
+ * matched rule's output tokens.  This is not a correct
+ * solution, as really the output should be an abitrary
+ * graph/lattice.  For example, with the above match, you
+ * would expect an exact <code>PhraseQuery</code> <code>"y b
+ * c"</code> to match the parsed tokens, but it will fail to
+ * do so.  This limitations is necessary because Lucene's
+ * TokenStream (and index) cannot yet represent an arbitrary
+ * graph.</p>
+ *
+ * <p><b>NOTE</b>: If multiple incoming tokens arrive on the
+ * same position, only the first token at that position is
+ * used for parsing.  Subsequent tokens simply pass through
+ * and are not parsed.  A future improvement would be to
+ * allow these tokens to also be matched.</p>
 */ 
+
+// TODO: maybe we should resolve token -> wordID then run
+// FST on wordIDs, for better perf?
+
+// TODO: a more efficient approach would be Aho/Corasick's
+// algorithm
+// http://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_string_matching_algorithm
+// It improves over the current approach here
+// because it does not fully re-start matching at every
+// token.  For exampl,e if one pattern is "a b c x"
+// and another is "b c d" and the input is "a b c d", on
+// trying to parse "a b c x" but failing when you got to x,
+// rather than starting over again your really should
+// immediately recognize that "b c d" matches at the next
+// input.  I suspect this won't matter that much in
+// practice, but it's possible on some set of synonyms it
+// will.  We'd have to modify Aho/Corasick to enforce our
+// conflict resolving (eg greedy matching) because that algo
+// finds all matches.
+
 public final class SynonymFilter extends TokenFilter {

-  private final SynonymMap map;  // Map<String, SynonymMap>
-  private Iterator<AttributeSource> replacement;  // iterator over generated tokens
+  public static final String TYPE_SYNONYM = "SYNONYM";

-  public SynonymFilter(TokenStream in, SynonymMap map) {
-    super(in);
-    if (map == null)
-      throw new IllegalArgumentException("map is required");
+  private final SynonymMap synonyms;

-    this.map = map;
-    // just ensuring these attributes exist...
-    addAttribute(CharTermAttribute.class);
-    addAttribute(PositionIncrementAttribute.class);
-    addAttribute(OffsetAttribute.class);
-    addAttribute(TypeAttribute.class);
+  private final boolean ignoreCase;
+  private final int rollBufferSize;
+
+  private int captureCount;
+
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+
+  // How many future input tokens have already been matched
+  // to a synonym; because the matching is "greedy" we don't
+  // try to do any more matching for such tokens:
+  private int inputSkipCount;
+
+  // Hold all buffered (read ahead) stacked input tokens for
+  // a future position.  When multiple tokens are at the
+  // same position, we only store (and match against) the
+  // term for the first token at the position, but capture
+  // state for (and enumerate) all other tokens at this
+  // position:
+  private static class PendingInput {
+    final CharsRef term = new CharsRef();
+    AttributeSource.State state;
+    boolean keepOrig;
+    boolean consumed = true;
+    int startOffset;
+    int endOffset;
+    
+    public void reset() {
+      state = null;
+      consumed = true;
+      keepOrig = false;
+    }
+  };
+
+  // Rolling buffer, holding pending input tokens we had to
+  // clone because we needed to look ahead, indexed by
+  // position:
+  private final PendingInput[] futureInputs;
+
+  // Holds pending output synonyms for one future position:
+  private static class PendingOutputs {
+    CharsRef[] outputs;
+    int upto;
+    int count;
+    int posIncr = 1;
+
+    public PendingOutputs() {
+      outputs = new CharsRef[1];
    }

-
-  /*
-   * Need to worry about multiple scenarios:
-   *  - need to go for the longest match
-   *    a b => foo      #shouldn't match if "a b" is followed by "c d"
-   *    a b c d => bar
-   *  - need to backtrack - retry matches for tokens already read
-   *     a b c d => foo
-   *       b c => bar
-   *     If the input stream is "a b c x", one will consume "a b c d"
-   *     trying to match the first rule... all but "a" should be
-   *     pushed back so a match may be made on "b c".
-   *  - don't try and match generated tokens (thus need separate queue)
-   *    matching is not recursive.
-   *  - handle optional generation of original tokens in all these cases,
-   *    merging token streams to preserve token positions.
-   *  - preserve original positionIncrement of first matched token
-   */
-  @Override
-  public boolean incrementToken() throws IOException {
-    while (true) {
-      // if there are any generated tokens, return them... don't try any
-      // matches against them, as we specifically don't want recursion.
-      if (replacement!=null && replacement.hasNext()) {
-        copy(this, replacement.next());
-        return true;
+    public void reset() {
+      upto = count = 0;
+      posIncr = 1;
    }

-      // common case fast-path of first token not matching anything
-      AttributeSource firstTok = nextTok();
-      if (firstTok == null) return false;
-      CharTermAttribute termAtt = firstTok.addAttribute(CharTermAttribute.class);
-      SynonymMap result = map.submap!=null ? map.submap.get(termAtt.buffer(), 0, termAtt.length()) : null;
-      if (result == null) {
-        copy(this, firstTok);
-        return true;
+    public CharsRef pullNext() {
+      assert upto < count;
+      final CharsRef result = outputs[upto++];
+      posIncr = 0;
+      if (upto == count) {
+        reset();
      }
-
-      // fast-path failed, clone ourselves if needed
-      if (firstTok == this)
-        firstTok = cloneAttributes();
-      // OK, we matched a token, so find the longest match.
-
-      matched = new LinkedList<AttributeSource>();
-
-      result = match(result);
-
-      if (result==null) {
-        // no match, simply return the first token read.
-        copy(this, firstTok);
-        return true;
-      }
-
-      // reuse, or create new one each time?
-      ArrayList<AttributeSource> generated = new ArrayList<AttributeSource>(result.synonyms.length + matched.size() + 1);
-
-      //
-      // there was a match... let's generate the new tokens, merging
-      // in the matched tokens (position increments need adjusting)
-      //
-      AttributeSource lastTok = matched.isEmpty() ? firstTok : matched.getLast();
-      boolean includeOrig = result.includeOrig();
-
-      AttributeSource origTok = includeOrig ? firstTok : null;
-      PositionIncrementAttribute firstPosIncAtt = firstTok.addAttribute(PositionIncrementAttribute.class);
-      int origPos = firstPosIncAtt.getPositionIncrement();  // position of origTok in the original stream
-      int repPos=0; // curr position in replacement token stream
-      int pos=0;  // current position in merged token stream
-
-      for (int i=0; i<result.synonyms.length; i++) {
-        Token repTok = result.synonyms[i];
-        AttributeSource newTok = firstTok.cloneAttributes();
-        CharTermAttribute newTermAtt = newTok.addAttribute(CharTermAttribute.class);
-        OffsetAttribute newOffsetAtt = newTok.addAttribute(OffsetAttribute.class);
-        PositionIncrementAttribute newPosIncAtt = newTok.addAttribute(PositionIncrementAttribute.class);
-
-        OffsetAttribute lastOffsetAtt = lastTok.addAttribute(OffsetAttribute.class);
-
-        newOffsetAtt.setOffset(newOffsetAtt.startOffset(), lastOffsetAtt.endOffset());
-        newTermAtt.copyBuffer(repTok.buffer(), 0, repTok.length());
-        repPos += repTok.getPositionIncrement();
-        if (i==0) repPos=origPos;  // make position of first token equal to original
-
-        // if necessary, insert original tokens and adjust position increment
-        while (origTok != null && origPos <= repPos) {
-          PositionIncrementAttribute origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
-          origPosInc.setPositionIncrement(origPos-pos);
-          generated.add(origTok);
-          pos += origPosInc.getPositionIncrement();
-          origTok = matched.isEmpty() ? null : matched.removeFirst();
-          if (origTok != null) {
-            origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
-            origPos += origPosInc.getPositionIncrement();
-          }
-        }
-
-        newPosIncAtt.setPositionIncrement(repPos - pos);
-        generated.add(newTok);
-        pos += newPosIncAtt.getPositionIncrement();
-      }
-
-      // finish up any leftover original tokens
-      while (origTok!=null) {
-        PositionIncrementAttribute origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
-        origPosInc.setPositionIncrement(origPos-pos);
-        generated.add(origTok);
-        pos += origPosInc.getPositionIncrement();
-        origTok = matched.isEmpty() ? null : matched.removeFirst();
-        if (origTok != null) {
-          origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
-          origPos += origPosInc.getPositionIncrement();
-        }
-      }
-
-      // what if we replaced a longer sequence with a shorter one?
-      // a/0 b/5 =>  foo/0
-      // should I re-create the gap on the next buffered token?
-
-      replacement = generated.iterator();
-      // Now return to the top of the loop to read and return the first
-      // generated token.. The reason this is done is that we may have generated
-      // nothing at all, and may need to continue with more matching logic.
-    }
-  }
-
-
-  //
-  // Defer creation of the buffer until the first time it is used to
-  // optimize short fields with no matches.
-  //
-  private LinkedList<AttributeSource> buffer;
-  private LinkedList<AttributeSource> matched;
-
-  private boolean exhausted;
-
-  private AttributeSource nextTok() throws IOException {
-    if (buffer!=null && !buffer.isEmpty()) {
-      return buffer.removeFirst();
-    } else {
-      if (!exhausted && input.incrementToken()) {
-        return this;
-      } else {
-        exhausted = true;
-        return null;
-      }
-    }
-  }
-
-  private void pushTok(AttributeSource t) {
-    if (buffer==null) buffer=new LinkedList<AttributeSource>();
-    buffer.addFirst(t);
-  }
-
-  private SynonymMap match(SynonymMap map) throws IOException {
-    SynonymMap result = null;
-
-    if (map.submap != null) {
-      AttributeSource tok = nextTok();
-      if (tok != null) {
-        // clone ourselves.
-        if (tok == this)
-          tok = cloneAttributes();
-        // check for positionIncrement!=1?  if>1, should not match, if==0, check multiple at this level?
-        CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class);
-        SynonymMap subMap = map.submap.get(termAtt.buffer(), 0, termAtt.length());
-
-        if (subMap != null) {
-          // recurse
-          result = match(subMap);
-        }
-
-        if (result != null) {
-          matched.addFirst(tok);
-        } else {
-          // push back unmatched token
-          pushTok(tok);
-        }
-      }
-    }
-
-    // if no longer sequence matched, so if this node has synonyms, it's the match.
-    if (result==null && map.synonyms!=null) {
-      result = map;
-    }
-
      return result;
    }

-  private void copy(AttributeSource target, AttributeSource source) {
-    if (target != source)
-      source.copyTo(target);
+    public void add(char[] output, int offset, int len) {
+      if (count == outputs.length) {
+        final CharsRef[] next = new CharsRef[ArrayUtil.oversize(1+count, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+        System.arraycopy(outputs, 0, next, 0, count);
+        outputs = next;
+      }
+      if (outputs[count] == null) {
+        outputs[count] = new CharsRef();
+      }
+      outputs[count].copy(output, offset, len);
+      count++;
+    }
+  };
+
+  private final ByteArrayDataInput bytesReader = new ByteArrayDataInput();
+
+  // Rolling buffer, holding stack of pending synonym
+  // outputs, indexed by position:
+  private final PendingOutputs[] futureOutputs;
+
+  // Where (in rolling buffers) to write next input saved state:
+  private int nextWrite;
+
+  // Where (in rolling buffers) to read next input saved state:
+  private int nextRead;
+
+  // True once we've read last token
+  private boolean finished;
+
+  private final FST.Arc<BytesRef> scratchArc;
+
+  private final FST<BytesRef> fst;
+
+  private final BytesRef scratchBytes = new BytesRef();
+  private final CharsRef scratchChars = new CharsRef();
+
+  /**
+   * @param input input tokenstream
+   * @param synonyms synonym map
+   * @param ignoreCase case-folds input for matching with {@link Character#toLowerCase(int)}.
+   *                   Note, if you set this to true, its your responsibility to lowercase
+   *                   the input entries when you create the {@link SynonymMap}
+   */
+  public SynonymFilter(TokenStream input, SynonymMap synonyms, boolean ignoreCase) {
+    super(input);
+    this.synonyms = synonyms;
+    this.ignoreCase = ignoreCase;
+    this.fst = synonyms.fst;
+
+    if (fst == null) {
+      throw new IllegalArgumentException("fst must be non-null");
+    }
+
+    // Must be 1+ so that when roll buffer is at full
+    // lookahead we can distinguish this full buffer from
+    // the empty buffer:
+    rollBufferSize = 1+synonyms.maxHorizontalContext;
+
+    futureInputs = new PendingInput[rollBufferSize];
+    futureOutputs = new PendingOutputs[rollBufferSize];
+    for(int pos=0;pos<rollBufferSize;pos++) {
+      futureInputs[pos] = new PendingInput();
+      futureOutputs[pos] = new PendingOutputs();
+    }
+
+    //System.out.println("FSTFilt maxH=" + synonyms.maxHorizontalContext);
+
+    scratchArc = new FST.Arc<BytesRef>();
+  }
+
+  private void capture() {
+    captureCount++;
+    //System.out.println("  capture slot=" + nextWrite);
+    final PendingInput input = futureInputs[nextWrite];
+
+    input.state = captureState();
+    input.consumed = false;
+    input.term.copy(termAtt.buffer(), 0, termAtt.length());
+
+    nextWrite = rollIncr(nextWrite);
+
+    // Buffer head should never catch up to tail:
+    assert nextWrite != nextRead;
+  }
+
+  /*
+   This is the core of this TokenFilter: it locates the
+   synonym matches and buffers up the results into
+   futureInputs/Outputs.
+
+   NOTE: this calls input.incrementToken and does not
+   capture the state if no further tokens were checked.  So
+   caller must then forward state to our caller, or capture:
+  */
+
+  private void parse() throws IOException {
+    //System.out.println("\nS: parse");
+
+    assert inputSkipCount == 0;
+
+    int curNextRead = nextRead;
+
+    // Holds the longest match we've seen so far:
+    BytesRef matchOutput = null;
+    int matchInputLength = 0;
+
+    BytesRef pendingOutput = fst.outputs.getNoOutput();
+    fst.getFirstArc(scratchArc);
+
+    assert scratchArc.output == fst.outputs.getNoOutput();
+
+    int tokenCount = 0;
+
+    byToken:
+    while(true) {
+      
+      // Pull next token's chars:
+      final char[] buffer;
+      final int bufferLen;
+      //System.out.println("  cycle nextRead=" + curNextRead + " nextWrite=" + nextWrite);
+
+      if (curNextRead == nextWrite) {
+
+        // We used up our lookahead buffer of input tokens
+        // -- pull next real input token:
+
+        if (finished) {
+          break;
+        } else  {
+          //System.out.println("  input.incrToken");
+          assert futureInputs[nextWrite].consumed;
+          // Not correct: a syn match whose output is longer
+          // than its input can set future inputs keepOrig
+          // to true:
+          //assert !futureInputs[nextWrite].keepOrig;
+          if (input.incrementToken()) {
+            buffer = termAtt.buffer();
+            bufferLen = termAtt.length();
+            final PendingInput input = futureInputs[nextWrite];
+            input.startOffset = offsetAtt.startOffset();
+            input.endOffset = offsetAtt.endOffset();
+            //System.out.println("  new token=" + new String(buffer, 0, bufferLen));
+            if (nextRead != nextWrite) {
+              capture();
+            } else {
+              input.consumed = false;
+            }
+
+          } else {
+            // No more input tokens
+            //System.out.println("      set end");
+            finished = true;
+            break;
+          }
+        }
+      } else {
+        // Still in our lookahead
+        buffer = futureInputs[curNextRead].term.chars;
+        bufferLen = futureInputs[curNextRead].term.length;
+        //System.out.println("  old token=" + new String(buffer, 0, bufferLen));
+      }
+
+      tokenCount++;
+
+      // Run each char in this token through the FST:
+      int bufUpto = 0;
+      while(bufUpto < bufferLen) {
+        final int codePoint = Character.codePointAt(buffer, bufUpto, bufferLen);
+        if (fst.findTargetArc(ignoreCase ? Character.toLowerCase(codePoint) : codePoint, scratchArc, scratchArc) == null) {
+          //System.out.println("    stop");
+          break byToken;
+        }
+
+        // Accum the output
+        pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output);
+        //System.out.println("    char=" + buffer[bufUpto] + " output=" + pendingOutput + " arc.output=" + scratchArc.output);
+        bufUpto += Character.charCount(codePoint);
+      }
+
+      // OK, entire token matched; now see if this is a final
+      // state:
+      if (scratchArc.isFinal()) {
+        matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput);
+        matchInputLength = tokenCount;
+        //System.out.println("  found matchLength=" + matchInputLength + " output=" + matchOutput);
+      }
+
+      // See if the FST wants to continue matching (ie, needs to
+      // see the next input token):
+      if (fst.findTargetArc(SynonymMap.WORD_SEPARATOR, scratchArc, scratchArc) == null) {
+        // No further rules can match here; we're done
+        // searching for matching rules starting at the
+        // current input position.
+        break;
+      } else {
+        // More matching is possible -- accum the output (if
+        // any) of the WORD_SEP arc:
+        pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output);
+        if (nextRead == nextWrite) {
+          capture();
+        }
+      }
+
+      curNextRead = rollIncr(curNextRead);
+    }
+
+    if (nextRead == nextWrite && !finished) {
+      //System.out.println("  skip write slot=" + nextWrite);
+      nextWrite = rollIncr(nextWrite);
+    }
+
+    if (matchOutput != null) {
+      //System.out.println("  add matchLength=" + matchInputLength + " output=" + matchOutput);
+      inputSkipCount = matchInputLength;
+      addOutput(matchOutput);
+    } else if (nextRead != nextWrite) {
+      // Even though we had no match here, we set to 1
+      // because we need to skip current input token before
+      // trying to match again:
+      inputSkipCount = 1;
+    } else {
+      assert finished;
+    }
+
+    //System.out.println("  parse done inputSkipCount=" + inputSkipCount + " nextRead=" + nextRead + " nextWrite=" + nextWrite);
+  }
+
+  // Interleaves all output tokens onto the futureOutputs:
+  private void addOutput(BytesRef bytes) {
+    bytesReader.reset(bytes.bytes, bytes.offset, bytes.length);
+
+    final int code = bytesReader.readVInt();
+    final boolean keepOrig = (code & 0x1) == 0;
+    final int count = code >>> 1;
+    //System.out.println("  addOutput count=" + count + " keepOrig=" + keepOrig);
+    for(int outputIDX=0;outputIDX<count;outputIDX++) {
+      synonyms.words.get(bytesReader.readVInt(),
+                         scratchBytes);
+      //System.out.println("    outIDX=" + outputIDX + " bytes=" + scratchBytes.length);
+      UnicodeUtil.UTF8toUTF16(scratchBytes, scratchChars);
+      int lastStart = scratchChars.offset;
+      final int chEnd = lastStart + scratchChars.length;
+      int outputUpto = nextRead;
+      for(int chIDX=lastStart;chIDX<=chEnd;chIDX++) {
+        if (chIDX == chEnd || scratchChars.chars[chIDX] == SynonymMap.WORD_SEPARATOR) {
+          final int outputLen = chIDX - lastStart;
+          // Caller is not allowed to have empty string in
+          // the output:
+          assert outputLen > 0: "output contains empty string: " + scratchChars;
+          futureOutputs[outputUpto].add(scratchChars.chars, lastStart, outputLen);
+          //System.out.println("      " + new String(scratchChars.chars, lastStart, outputLen) + " outputUpto=" + outputUpto);
+          lastStart = 1+chIDX;
+          futureInputs[outputUpto].keepOrig |= keepOrig;
+          //System.out.println("  slot=" + outputUpto + " keepOrig=" + keepOrig);
+          outputUpto = rollIncr(outputUpto);
+          assert futureOutputs[outputUpto].posIncr == 1: "outputUpto=" + outputUpto + " vs nextWrite=" + nextWrite;
+        }
+      }
+    }
+  }
+
+  // ++ mod rollBufferSize
+  private int rollIncr(int count) {
+    count++;
+    if (count == rollBufferSize) {
+      return 0;
+    } else {
+      return count;
+    }
+  }
+
+  // for testing
+  int getCaptureCount() {
+    return captureCount;
+  }
+
+  @Override
+  public boolean incrementToken() throws IOException {
+
+    //System.out.println("\nS: incrToken inputSkipCount=" + inputSkipCount + " nextRead=" + nextRead + " nextWrite=" + nextWrite);
+
+    while(true) {
+
+      // First play back any buffered future inputs/outputs
+      // w/o running parsing again:
+      while (inputSkipCount != 0) {
+        
+        // At each position, we first output the original
+        // token
+
+        // TODO: maybe just a PendingState class, holding
+        // both input & outputs?
+        final PendingInput input = futureInputs[nextRead];
+        final PendingOutputs outputs = futureOutputs[nextRead];
+        
+        //System.out.println("  cycle nextRead=" + nextRead + " nextWrite=" + nextWrite + " inputSkipCount="+ inputSkipCount + " input.keepOrig=" + input.keepOrig + " input.consumed=" + input.consumed + " input.state=" + input.state);
+
+        if (!input.consumed && (input.keepOrig || outputs.count == 0)) {
+          if (input.state != null) {
+            // Return a previously saved token (because we
+            // had to lookahead):
+            restoreState(input.state);
+          } else {
+            // Pass-through case: return token we just pulled
+            // but didn't capture:
+            assert inputSkipCount == 1: "inputSkipCount=" + inputSkipCount + " nextRead=" + nextRead;
+          }
+          input.reset();
+          if (outputs.count > 0) {
+            outputs.posIncr = 0;
+          } else {
+            nextRead = rollIncr(nextRead);
+            inputSkipCount--;
+          }
+          //System.out.println("  return token=" + termAtt.toString());
+          return true;
+        } else if (outputs.upto < outputs.count) {
+          // Still have pending outputs to replay at this
+          // position
+          input.reset();
+          final int posIncr = outputs.posIncr;
+          final CharsRef output = outputs.pullNext();
+          clearAttributes();
+          termAtt.copyBuffer(output.chars, output.offset, output.length);
+          typeAtt.setType(TYPE_SYNONYM);
+          offsetAtt.setOffset(input.startOffset, input.endOffset);
+          posIncrAtt.setPositionIncrement(posIncr);
+          if (outputs.count == 0) {
+            // Done with the buffered input and all outputs at
+            // this position
+            nextRead = rollIncr(nextRead);
+            inputSkipCount--;
+          }
+          //System.out.println("  return token=" + termAtt.toString());
+          return true;
+        } else {
+          // Done with the buffered input and all outputs at
+          // this position
+          input.reset();
+          nextRead = rollIncr(nextRead);
+          inputSkipCount--;
+        }
+      }
+
+      if (finished && nextRead == nextWrite) {
+        // End case: if any output syns went beyond end of
+        // input stream, enumerate them now:
+        final PendingOutputs outputs = futureOutputs[nextRead];
+        if (outputs.upto < outputs.count) {
+          final int posIncr = outputs.posIncr;
+          final CharsRef output = outputs.pullNext();
+          futureInputs[nextRead].reset();
+          if (outputs.count == 0) {
+            nextWrite = nextRead = rollIncr(nextRead);
+          }
+          clearAttributes();
+          termAtt.copyBuffer(output.chars, output.offset, output.length);
+          typeAtt.setType(TYPE_SYNONYM);
+          //System.out.println("  set posIncr=" + outputs.posIncr + " outputs=" + outputs);
+          posIncrAtt.setPositionIncrement(posIncr);
+          //System.out.println("  return token=" + termAtt.toString());
+          return true;
+        } else {
+          return false;
+        }
+      }
+
+      // Find new synonym matches:
+      parse();
+    }
  }

  @Override
  public void reset() throws IOException {
+    super.reset();
+    captureCount = 0;
+    finished = false;
+
+    // In normal usage these resets would not be needed,
+    // since they reset-as-they-are-consumed, but the app
+    // may not consume all input tokens in which case we
+    // have leftover state here:
+    for (PendingInput input : futureInputs) {
      input.reset();
-    replacement = null;
-    exhausted = false;
+    }
+    for (PendingOutputs output : futureOutputs) {
+      output.reset();
+    }
  }
 }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
@ -1,3 +1,5 @@
+package org.apache.lucene.analysis.synonym;
+
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
@ -15,146 +17,301 @@
 * limitations under the License.
 */

-package org.apache.lucene.analysis.synonym;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;

-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.util.CharArrayMap;
-import org.apache.lucene.util.Version;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.fst.ByteSequenceOutputs;
+import org.apache.lucene.util.fst.FST;

-import java.util.*;
-
-/** Mapping rules for use with {@link SynonymFilter}
+/**
+ * A map of synonyms, keys and values are phrases.
+ * @lucene.experimental
 */
 public class SynonymMap {
-  /** @lucene.internal */
-  public CharArrayMap<SynonymMap> submap; // recursive: Map<String, SynonymMap>
-  /** @lucene.internal */
-  public Token[] synonyms;
-  int flags;
+  /** for multiword support, you must separate words with this separator */
+  public static final char WORD_SEPARATOR = 0;
+  /** map<input word, list<ord>> */
+  public final FST<BytesRef> fst;
+  /** map<ord, outputword> */
+  public final BytesRefHash words;
+  /** maxHorizontalContext: maximum context we need on the tokenstream */
+  public final int maxHorizontalContext;

-  static final int INCLUDE_ORIG=0x01;
-  static final int IGNORE_CASE=0x02;
-
-  public SynonymMap() {}
-  public SynonymMap(boolean ignoreCase) {
-    if (ignoreCase) flags |= IGNORE_CASE;
+  public SynonymMap(FST<BytesRef> fst, BytesRefHash words, int maxHorizontalContext) {
+    this.fst = fst;
+    this.words = words;
+    this.maxHorizontalContext = maxHorizontalContext;
  }
  
-  public boolean includeOrig() { return (flags & INCLUDE_ORIG) != 0; }
-  public boolean ignoreCase() { return (flags & IGNORE_CASE) != 0; }
-
  /**
-   * @param singleMatch  List<String>, the sequence of strings to match
-   * @param replacement  List<Token> the list of tokens to use on a match
-   * @param includeOrig  sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens
-   * @param mergeExisting merge the replacement tokens with any other mappings that exist
+   * Builds an FSTSynonymMap.
+   * <p>
+   * Call add() until you have added all the mappings, then call build() to get an FSTSynonymMap
+   * @lucene.experimental
   */
-  public void add(List<String> singleMatch, List<Token> replacement, boolean includeOrig, boolean mergeExisting) {
-    SynonymMap currMap = this;
-    for (String str : singleMatch) {
-      if (currMap.submap==null) {
-        // for now hardcode at 4.0, as its what the old code did.
-        // would be nice to fix, but shouldn't store a version in each submap!!!
-        currMap.submap = new CharArrayMap<SynonymMap>(Version.LUCENE_40, 1, ignoreCase());
+  public static class Builder {
+    private final HashMap<CharsRef,MapEntry> workingSet = new HashMap<CharsRef,MapEntry>();
+    private final BytesRefHash words = new BytesRefHash();
+    private final BytesRef utf8Scratch = new BytesRef(8);
+    private int maxHorizontalContext;
+    private final boolean dedup;
+
+    /** If dedup is true then identical rules (same input,
+     *  same output) will be added only once. */
+    public Builder(boolean dedup) {
+      this.dedup = dedup;
    }

-      SynonymMap map = currMap.submap.get(str);
-      if (map==null) {
-        map = new SynonymMap();
-        map.flags |= flags & IGNORE_CASE;
-        currMap.submap.put(str, map);
+    private static class MapEntry {
+      boolean includeOrig;
+      // we could sort for better sharing ultimately, but it could confuse people
+      ArrayList<Integer> ords = new ArrayList<Integer>();
    }

-      currMap = map;
+    /** Sugar: just joins the provided terms with {@link
+     *  SynonymMap#WORD_SEPARATOR}.  reuse and its chars
+     *  must not be null. */
+    public static CharsRef join(String[] words, CharsRef reuse) {
+      int upto = 0;
+      char[] buffer = reuse.chars;
+      for(String word : words) {
+        if (upto > 0) {
+          if (upto >= buffer.length) {
+            reuse.grow(upto);
+            buffer = reuse.chars;
+          }
+          buffer[upto++] = SynonymMap.WORD_SEPARATOR;
        }

-    if (currMap.synonyms != null && !mergeExisting) {
-      throw new RuntimeException("SynonymFilter: there is already a mapping for " + singleMatch);
-    }
-    List<Token> superset = currMap.synonyms==null ? replacement :
-          mergeTokens(Arrays.asList(currMap.synonyms), replacement);
-    currMap.synonyms = superset.toArray(new Token[superset.size()]);
-    if (includeOrig) currMap.flags |= INCLUDE_ORIG;
+        final int wordLen =  word.length();
+        final int needed = upto + wordLen;
+        if (needed > buffer.length) {
+          reuse.grow(needed);
+          buffer = reuse.chars;
        }

-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("<");
-    if (synonyms!=null) {
-      sb.append("[");
-      for (int i=0; i<synonyms.length; i++) {
-        if (i!=0) sb.append(',');
-        sb.append(synonyms[i]);
-      }
-      if ((flags & INCLUDE_ORIG)!=0) {
-        sb.append(",ORIG");
-      }
-      sb.append("],");
-    }
-    sb.append(submap);
-    sb.append(">");
-    return sb.toString();
+        word.getChars(0, wordLen, buffer, upto);
+        upto += wordLen;
      }

-
-
-  /** Produces a List<Token> from a List<String> */
-  public static List<Token> makeTokens(List<String> strings) {
-    List<Token> ret = new ArrayList<Token>(strings.size());
-    for (String str : strings) {
-      //Token newTok = new Token(str,0,0,"SYNONYM");
-      Token newTok = new Token(str, 0,0,"SYNONYM");
-      ret.add(newTok);
-    }
-    return ret;
+      return reuse;
    }
    
+    /** Sugar: analyzes the text with the analyzer and
+     *  separates by {@link SynonymMap#WORD_SEPARATOR}.
+     *  reuse and its chars must not be null. */
+    public static CharsRef analyze(Analyzer analyzer, String text, CharsRef reuse) throws IOException {
+      TokenStream ts = analyzer.reusableTokenStream("", new StringReader(text));
+      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+      PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
+      ts.reset();
+      reuse.length = 0;
+      while (ts.incrementToken()) {
+        int length = termAtt.length();
+        if (length == 0) {
+          throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token");
+        }
+        if (posIncAtt.getPositionIncrement() != 1) {
+          throw new IllegalArgumentException("term: " + text + " analyzed to a token with posinc != 1");
+        }
+        reuse.grow(reuse.length + length + 1); /* current + word + separator */
+        int end = reuse.offset + reuse.length;
+        if (reuse.length > 0) {
+          reuse.chars[end++] = SynonymMap.WORD_SEPARATOR;
+          reuse.length++;
+        }
+        System.arraycopy(termAtt.buffer(), 0, reuse.chars, end, length);
+        reuse.length += length;
+      }
+      ts.end();
+      ts.close();
+      if (reuse.length == 0) {
+        throw new IllegalArgumentException("term: " + text + " was completely eliminated by analyzer");
+      }
+      return reuse;
+    }
+
+    /** only used for asserting! */
+    private boolean hasHoles(CharsRef chars) {
+      final int end = chars.offset + chars.length;
+      for(int idx=chars.offset+1;idx<end;idx++) {
+        if (chars.chars[idx] == SynonymMap.WORD_SEPARATOR && chars.chars[idx-1] == SynonymMap.WORD_SEPARATOR) {
+          return true;
+        }
+      }
+      if (chars.chars[chars.offset] == '\u0000') {
+        return true;
+      }
+      if (chars.chars[chars.offset + chars.length - 1] == '\u0000') {
+        return true;
+      }
+
+      return false;
+    }
+
+    // NOTE: while it's tempting to make this public, since
+    // caller's parser likely knows the
+    // numInput/numOutputWords, sneaky exceptions, much later
+    // on, will result if these values are wrong; so we always
+    // recompute ourselves to be safe:
+    private void add(CharsRef input, int numInputWords, CharsRef output, int numOutputWords, boolean includeOrig) {
+      // first convert to UTF-8
+      if (numInputWords <= 0) {
+        throw new IllegalArgumentException("numInputWords must be > 0 (got " + numInputWords + ")");
+      }
+      if (input.length <= 0) {
+        throw new IllegalArgumentException("input.length must be > 0 (got " + input.length + ")");
+      }
+      if (numOutputWords <= 0) {
+        throw new IllegalArgumentException("numOutputWords must be > 0 (got " + numOutputWords + ")");
+      }
+      if (output.length <= 0) {
+        throw new IllegalArgumentException("output.length must be > 0 (got " + output.length + ")");
+      }
+
+      assert !hasHoles(input): "input has holes: " + input;
+      assert !hasHoles(output): "output has holes: " + output;
+
+      //System.out.println("fmap.add input=" + input + " numInputWords=" + numInputWords + " output=" + output + " numOutputWords=" + numOutputWords);
+      final int hashCode = UnicodeUtil.UTF16toUTF8WithHash(output.chars, output.offset, output.length, utf8Scratch);
+      // lookup in hash
+      int ord = words.add(utf8Scratch, hashCode);
+      if (ord < 0) {
+        // already exists in our hash
+        ord = (-ord)-1;
+        //System.out.println("  output=" + output + " old ord=" + ord);
+      } else {
+        //System.out.println("  output=" + output + " new ord=" + ord);
+      }
+      
+      MapEntry e = workingSet.get(input);
+      if (e == null) {
+        e = new MapEntry();
+        workingSet.put(new CharsRef(input), e); // make a copy, since we will keep around in our map    
+      }
+      
+      e.ords.add(ord);
+      e.includeOrig |= includeOrig;
+      maxHorizontalContext = Math.max(maxHorizontalContext, numInputWords);
+      maxHorizontalContext = Math.max(maxHorizontalContext, numOutputWords);
+    }
+
+    private int countWords(CharsRef chars) {
+      int wordCount = 1;
+      int upto = chars.offset;
+      final int limit = chars.offset + chars.length;
+      while(upto < limit) {
+        if (chars.chars[upto++] == SynonymMap.WORD_SEPARATOR) {
+          wordCount++;
+        }
+      }
+      return wordCount;
+    }
    
    /**
-   * Merge two lists of tokens, producing a single list with manipulated positionIncrements so that
-   * the tokens end up at the same position.
-   *
-   * Example:  [a b] merged with [c d] produces [a/b c/d]  ('/' denotes tokens in the same position)
-   * Example:  [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2]  (a,n means a has posInc=n)
+     * Add a phrase->phrase synonym mapping.
+     * Phrases are character sequences where words are
+     * separated with character zero (\u0000).  Empty words
+     * (two \u0000s in a row) are not allowed in the input nor
+     * the output!
     * 
+     * @param input input phrase
+     * @param output output phrase
+     * @param includeOrig true if the original should be included
     */
-  public static List<Token> mergeTokens(List<Token> lst1, List<Token> lst2) {
-    ArrayList<Token> result = new ArrayList<Token>();
-    if (lst1 ==null || lst2 ==null) {
-      if (lst2 != null) result.addAll(lst2);
-      if (lst1 != null) result.addAll(lst1);
-      return result;
+    public void add(CharsRef input, CharsRef output, boolean includeOrig) {
+      add(input, countWords(input), output, countWords(output), includeOrig);
    }
    
-    int pos=0;
-    Iterator<Token> iter1=lst1.iterator();
-    Iterator<Token> iter2=lst2.iterator();
-    Token tok1 = iter1.hasNext() ? iter1.next() : null;
-    Token tok2 = iter2.hasNext() ? iter2.next() : null;
-    int pos1 = tok1!=null ? tok1.getPositionIncrement() : 0;
-    int pos2 = tok2!=null ? tok2.getPositionIncrement() : 0;
-    while(tok1!=null || tok2!=null) {
-      while (tok1 != null && (pos1 <= pos2 || tok2==null)) {
-        Token tok = new Token(tok1.startOffset(), tok1.endOffset(), tok1.type());
-        tok.copyBuffer(tok1.buffer(), 0, tok1.length());
-        tok.setPositionIncrement(pos1-pos);
-        result.add(tok);
-        pos=pos1;
-        tok1 = iter1.hasNext() ? iter1.next() : null;
-        pos1 += tok1!=null ? tok1.getPositionIncrement() : 0;
-      }
-      while (tok2 != null && (pos2 <= pos1 || tok1==null)) {
-        Token tok = new Token(tok2.startOffset(), tok2.endOffset(), tok2.type());
-        tok.copyBuffer(tok2.buffer(), 0, tok2.length());
-        tok.setPositionIncrement(pos2-pos);
-        result.add(tok);
-        pos=pos2;
-        tok2 = iter2.hasNext() ? iter2.next() : null;
-        pos2 += tok2!=null ? tok2.getPositionIncrement() : 0;
-      }
-    }
-    return result;
+    /**
+     * Builds an {@link SynonymMap} and returns it.
+     */
+    public SynonymMap build() throws IOException {
+      ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
+      // TODO: are we using the best sharing options?
+      org.apache.lucene.util.fst.Builder<BytesRef> builder = 
+        new org.apache.lucene.util.fst.Builder<BytesRef>(FST.INPUT_TYPE.BYTE4, 0, 0, true, outputs);
+      
+      BytesRef scratch = new BytesRef(64);
+      ByteArrayDataOutput scratchOutput = new ByteArrayDataOutput();
+
+      final Set<Integer> dedupSet;
+
+      if (dedup) {
+        dedupSet = new HashSet<Integer>();
+      } else {
+        dedupSet = null;
      }

+      final byte[] spare = new byte[5];
+      
+      Set<CharsRef> keys = workingSet.keySet();
+      CharsRef sortedKeys[] = keys.toArray(new CharsRef[keys.size()]);
+      Arrays.sort(sortedKeys, CharsRef.getUTF16SortedAsUTF8Comparator());
+      
+      //System.out.println("fmap.build");
+      for (int keyIdx = 0; keyIdx < sortedKeys.length; keyIdx++) {
+        CharsRef input = sortedKeys[keyIdx];
+        MapEntry output = workingSet.get(input);
+
+        int numEntries = output.ords.size();
+        // output size, assume the worst case
+        int estimatedSize = 5 + numEntries * 5; // numEntries + one ord for each entry
+        
+        scratch.grow(estimatedSize);
+        scratchOutput.reset(scratch.bytes, scratch.offset, scratch.bytes.length);
+        assert scratch.offset == 0;
+
+        // now write our output data:
+        int count = 0;
+        for (int i = 0; i < numEntries; i++) {
+          if (dedupSet != null) {
+            // box once
+            final Integer ent = output.ords.get(i);
+            if (dedupSet.contains(ent)) {
+              continue;
+            }
+            dedupSet.add(ent);
+          }
+          scratchOutput.writeVInt(output.ords.get(i));   
+          count++;
+        }
+
+        final int pos = scratchOutput.getPosition();
+        scratchOutput.writeVInt(count << 1 | (output.includeOrig ? 0 : 1));
+        final int pos2 = scratchOutput.getPosition();
+        final int vIntLen = pos2-pos;
+
+        // Move the count + includeOrig to the front of the byte[]:
+        System.arraycopy(scratch.bytes, pos, spare, 0, vIntLen);
+        System.arraycopy(scratch.bytes, 0, scratch.bytes, vIntLen, pos);
+        System.arraycopy(spare, 0, scratch.bytes, 0, vIntLen);
+
+        if (dedupSet != null) {
+          dedupSet.clear();
+        }
+        
+        scratch.length = scratchOutput.getPosition() - scratch.offset;
+        //System.out.println("  add input=" + input + " output=" + scratch + " offset=" + scratch.offset + " length=" + scratch.length + " count=" + count);
+        builder.add(input, new BytesRef(scratch));
+      }
+      
+      FST<BytesRef> fst = builder.finish();
+      return new SynonymMap(fst, words, maxHorizontalContext);
+    }
+  }
 }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/WordnetSynonymParser.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/WordnetSynonymParser.java
@ -0,0 +1,112 @@
+package org.apache.lucene.analysis.synonym;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.LineNumberReader;
+import java.io.Reader;
+import java.text.ParseException;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.util.CharsRef;
+
+/**
+ * Parser for wordnet prolog format
+ * <p>
+ * See http://wordnet.princeton.edu/man/prologdb.5WN.html for a description of the format.
+ * @lucene.experimental
+ */
+// TODO: allow you to specify syntactic categories (e.g. just nouns, etc)
+public class WordnetSynonymParser extends SynonymMap.Builder {
+  private final boolean expand;
+  private final Analyzer analyzer;
+  
+  public WordnetSynonymParser(boolean dedup, boolean expand, Analyzer analyzer) {
+    super(dedup);
+    this.expand = expand;
+    this.analyzer = analyzer;
+  }
+  
+  public void add(Reader in) throws IOException, ParseException {
+    LineNumberReader br = new LineNumberReader(in);
+    try {
+      String line = null;
+      String lastSynSetID = "";
+      CharsRef synset[] = new CharsRef[8];
+      int synsetSize = 0;
+      
+      while ((line = br.readLine()) != null) {
+        String synSetID = line.substring(2, 11);
+
+        if (!synSetID.equals(lastSynSetID)) {
+          addInternal(synset, synsetSize);
+          synsetSize = 0;
+        }
+
+        if (synset.length <= synsetSize+1) {
+          CharsRef larger[] = new CharsRef[synset.length * 2];
+          System.arraycopy(synset, 0, larger, 0, synsetSize);
+          synset = larger;
+        }
+        
+        synset[synsetSize] = parseSynonym(line, synset[synsetSize]);
+        synsetSize++;
+        lastSynSetID = synSetID;
+      }
+      
+      // final synset in the file
+      addInternal(synset, synsetSize);
+    } catch (IllegalArgumentException e) {
+      ParseException ex = new ParseException("Invalid synonym rule at line " + br.getLineNumber(), 0);
+      ex.initCause(e);
+      throw ex;
+    } finally {
+      br.close();
+    }
+  }
+ 
+  private CharsRef parseSynonym(String line, CharsRef reuse) throws IOException {
+    if (reuse == null) {
+      reuse = new CharsRef(8);
+    }
+    
+    int start = line.indexOf('\'')+1;
+    int end = line.lastIndexOf('\'');
+    
+    String text = line.substring(start, end).replace("''", "'");
+    return analyze(analyzer, text, reuse);
+  }
+  
+  private void addInternal(CharsRef synset[], int size) throws IOException {
+    if (size <= 1) {
+      return; // nothing to do
+    }
+    
+    if (expand) {
+      for (int i = 0; i < size; i++) {
+        for (int j = 0; j < size; j++) {
+          add(synset[i], synset[j], false);
+        }
+      }
+    } else {
+      for (int i = 0; i < size; i++) {
+        add(synset[i], synset[0], false);
+      }
+    }
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/package.html
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/package.html
@ -1,3 +1,4 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 <!--
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
@ -14,13 +15,8 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
-<html>
-  <head>
-    <title>
-    wordnet
-    </title>
-  </head>
-  <body>
-  wordnet
-  </body>
+<html><head></head>
+<body>
+Analysis components for Synonyms.
+</body>
 </html>
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSolrSynonymParser.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSolrSynonymParser.java
@ -0,0 +1,144 @@
+package org.apache.lucene.analysis.synonym;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.text.ParseException;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+import org.junit.Test;
+
+/**
+ * Tests parser for the Solr synonyms format
+ * @lucene.experimental
+ */
+public class TestSolrSynonymParser extends BaseTokenStreamTestCase {
+  
+  /** Tests some simple examples from the solr wiki */
+  public void testSimple() throws Exception {
+    String testFile = 
+    "i-pod, ipod, ipoooood\n" + 
+    "foo => foo bar\n" +
+    "foo => baz\n" +
+    "this test, that testing";
+    
+    SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random));
+    parser.add(new StringReader(testFile));
+    final SynonymMap map = parser.build();
+    
+    Analyzer analyzer = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
+        return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true));
+      }
+    };
+    
+    assertAnalyzesTo(analyzer, "ball", 
+        new String[] { "ball" },
+        new int[] { 1 });
+    
+    assertAnalyzesTo(analyzer, "i-pod",
+        new String[] { "i-pod", "ipod", "ipoooood" },
+        new int[] { 1, 0, 0 });
+    
+    assertAnalyzesTo(analyzer, "foo",
+        new String[] { "foo", "baz", "bar" },
+        new int[] { 1, 0, 1 });
+    
+    assertAnalyzesTo(analyzer, "this test",
+        new String[] { "this", "that", "test", "testing" },
+        new int[] { 1, 0, 1, 0 });
+  }
+  
+  /** parse a syn file with bad syntax */
+  @Test(expected=ParseException.class)
+  public void testInvalidDoubleMap() throws Exception {
+    String testFile = "a => b => c"; 
+    SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random));
+    parser.add(new StringReader(testFile));
+  }
+  
+  /** parse a syn file with bad syntax */
+  @Test(expected=ParseException.class)
+  public void testInvalidAnalyzesToNothingOutput() throws Exception {
+    String testFile = "a => 1"; 
+    SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random, MockTokenizer.SIMPLE, false));
+    parser.add(new StringReader(testFile));
+  }
+  
+  /** parse a syn file with bad syntax */
+  @Test(expected=ParseException.class)
+  public void testInvalidAnalyzesToNothingInput() throws Exception {
+    String testFile = "1 => a"; 
+    SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random, MockTokenizer.SIMPLE, false));
+    parser.add(new StringReader(testFile));
+  }
+  
+  /** parse a syn file with bad syntax */
+  @Test(expected=ParseException.class)
+  public void testInvalidPositionsInput() throws Exception {
+    String testFile = "testola => the test";
+    SolrSynonymParser parser = new SolrSynonymParser(true, true, new EnglishAnalyzer(TEST_VERSION_CURRENT));
+    parser.add(new StringReader(testFile));
+  }
+  
+  /** parse a syn file with bad syntax */
+  @Test(expected=ParseException.class)
+  public void testInvalidPositionsOutput() throws Exception {
+    String testFile = "the test => testola";
+    SolrSynonymParser parser = new SolrSynonymParser(true, true, new EnglishAnalyzer(TEST_VERSION_CURRENT));
+    parser.add(new StringReader(testFile));
+  }
+  
+  /** parse a syn file with some escaped syntax chars */
+  public void testEscapedStuff() throws Exception {
+    String testFile = 
+      "a\\=>a => b\\=>b\n" +
+      "a\\,a => b\\,b";
+    SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random, MockTokenizer.KEYWORD, false));
+    parser.add(new StringReader(testFile));
+    final SynonymMap map = parser.build();
+    Analyzer analyzer = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
+        return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
+      }
+    };
+    
+    assertAnalyzesTo(analyzer, "ball", 
+        new String[] { "ball" },
+        new int[] { 1 });
+    
+    assertAnalyzesTo(analyzer, "a=>a",
+        new String[] { "b=>b" },
+        new int[] { 1 });
+    
+    assertAnalyzesTo(analyzer, "a,a",
+        new String[] { "b,b" },
+        new int[] { 1 });
+  }
+}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java
@ -0,0 +1,393 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.synonym;
+
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.*;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util._TestUtil;
+
+public class TestSynonymMapFilter extends BaseTokenStreamTestCase {
+
+  private SynonymMap.Builder b;
+  private Tokenizer tokensIn;
+  private SynonymFilter tokensOut;
+  private CharTermAttribute termAtt;
+  private PositionIncrementAttribute posIncrAtt;
+  private OffsetAttribute offsetAtt;
+
+  private void add(String input, String output, boolean keepOrig) {
+    b.add(new CharsRef(input.replaceAll(" +", "\u0000")),
+          new CharsRef(output.replaceAll(" +", "\u0000")),
+          keepOrig);
+  }
+
+  private void assertEquals(CharTermAttribute term, String expected) {
+    assertEquals(expected.length(), term.length());
+    final char[] buffer = term.buffer();
+    for(int chIDX=0;chIDX<expected.length();chIDX++) {
+      assertEquals(expected.charAt(chIDX), buffer[chIDX]);
+    }
+  }
+
+  private void verify(String input, String output) throws Exception {
+    if (VERBOSE) {
+      System.out.println("TEST: verify input=" + input + " expectedOutput=" + output);
+    }
+
+    tokensIn.reset(new StringReader(input));
+    tokensOut.reset();
+    final String[] expected = output.split(" ");
+    int expectedUpto = 0;
+    while(tokensOut.incrementToken()) {
+
+      if (VERBOSE) {
+        System.out.println("  incr token=" + termAtt.toString() + " posIncr=" + posIncrAtt.getPositionIncrement());
+      }
+
+      assertTrue(expectedUpto < expected.length);
+      final int startOffset = offsetAtt.startOffset();
+      final int endOffset = offsetAtt.endOffset();
+
+      final String[] expectedAtPos = expected[expectedUpto++].split("/");
+      for(int atPos=0;atPos<expectedAtPos.length;atPos++) {
+        if (atPos > 0) {
+          assertTrue(tokensOut.incrementToken());
+          if (VERBOSE) {
+            System.out.println("  incr token=" + termAtt.toString() + " posIncr=" + posIncrAtt.getPositionIncrement());
+          }
+        }
+        assertEquals(termAtt, expectedAtPos[atPos]);
+        assertEquals(atPos == 0 ? 1 : 0,
+                     posIncrAtt.getPositionIncrement());
+        // start/end offset of all tokens at same pos should
+        // be the same:
+        assertEquals(startOffset, offsetAtt.startOffset());
+        assertEquals(endOffset, offsetAtt.endOffset());
+      }
+    }
+    tokensOut.end();
+    tokensOut.close();
+    if (VERBOSE) {
+      System.out.println("  incr: END");
+    }
+    assertEquals(expectedUpto, expected.length);
+  }
+
+  public void testBasic() throws Exception {
+    b = new SynonymMap.Builder(true);
+    add("a", "foo", true);
+    add("a b", "bar fee", true);
+    add("b c", "dog collar", true);
+    add("c d", "dog harness holder extras", true);
+    add("m c e", "dog barks loudly", false);
+
+    add("e f", "foo bar", false);
+    add("e f", "baz bee", false);
+
+    add("z", "boo", false);
+    add("y", "bee", true);
+
+    tokensIn = new MockTokenizer(new StringReader("a"),
+                                 MockTokenizer.WHITESPACE,
+                                 true);
+    tokensIn.reset();
+    assertTrue(tokensIn.incrementToken());
+    assertFalse(tokensIn.incrementToken());
+    tokensIn.end();
+    tokensIn.close();
+
+    tokensOut = new SynonymFilter(tokensIn,
+                                     b.build(),
+                                     true);
+    termAtt = tokensOut.addAttribute(CharTermAttribute.class);
+    posIncrAtt = tokensOut.addAttribute(PositionIncrementAttribute.class);
+    offsetAtt = tokensOut.addAttribute(OffsetAttribute.class);
+
+    verify("a b c", "a/bar b/fee c");
+
+    // syn output extends beyond input tokens
+    verify("x a b c d", "x a/bar b/fee c/dog d/harness holder extras");
+
+    verify("a b a", "a/bar b/fee a/foo");
+
+    // outputs that add to one another:
+    verify("c d c d", "c/dog d/harness c/holder/dog d/extras/harness holder extras");
+
+    // two outputs for same input
+    verify("e f", "foo/baz bar/bee");
+
+    // mixed keepOrig true/false:
+    verify("a m c e x", "a/foo dog barks loudly x");
+    verify("c d m c e x", "c/dog d/harness m/holder/dog c/extras/barks loudly x");
+    assertTrue(tokensOut.getCaptureCount() > 0);
+
+    // no captureStates when no syns matched
+    verify("p q r s t", "p q r s t");
+    assertEquals(0, tokensOut.getCaptureCount());
+
+    // no captureStates when only single-input syns, w/ no
+    // lookahead needed, matched
+    verify("p q z y t", "p q boo y/bee t");
+    assertEquals(0, tokensOut.getCaptureCount());
+  }
+
+  private String getRandomString(char start, int alphabetSize, int length) {
+    assert alphabetSize <= 26;
+    char[] s = new char[2*length];
+    for(int charIDX=0;charIDX<length;charIDX++) {
+      s[2*charIDX] = (char) (start + random.nextInt(alphabetSize));
+      s[2*charIDX+1] = ' ';
+    }
+    return new String(s);
+  }
+
+  private static class OneSyn {
+    String in;
+    List<String> out;
+    boolean keepOrig;
+  }
+
+  public String slowSynMatcher(String doc, List<OneSyn> syns, int maxOutputLength) {
+    assertTrue(doc.length() % 2 == 0);
+    final int numInputs = doc.length()/2;
+    boolean[] keepOrigs = new boolean[numInputs];
+    Arrays.fill(keepOrigs, false);
+    String[] outputs = new String[numInputs + maxOutputLength];
+    OneSyn[] matches = new OneSyn[numInputs];
+    for(OneSyn syn : syns) {
+      int idx = -1;
+      while(true) {
+        idx = doc.indexOf(syn.in, 1+idx);
+        if (idx == -1) {
+          break;
+        }
+        assertTrue(idx % 2 == 0);
+        final int matchIDX = idx/2;
+        assertTrue(syn.in.length() % 2 == 1);
+        if (matches[matchIDX] == null) {
+          matches[matchIDX] = syn;
+        } else if (syn.in.length() > matches[matchIDX].in.length()) {
+          // Greedy conflict resolution: longer match wins:
+          matches[matchIDX] = syn;
+        } else {
+          assertTrue(syn.in.length() < matches[matchIDX].in.length());
+        }
+      }
+    }
+
+    // Greedy conflict resolution: if syn matches a range of inputs,
+    // it prevents other syns from matching that range
+    for(int inputIDX=0;inputIDX<numInputs;inputIDX++) {
+      final OneSyn match = matches[inputIDX];
+      if (match != null) {
+        final int synInLength = (1+match.in.length())/2;
+        for(int nextInputIDX=inputIDX+1;nextInputIDX<numInputs && nextInputIDX<(inputIDX+synInLength);nextInputIDX++) {
+          matches[nextInputIDX] = null;
+        }
+      }
+    }
+
+    // Fill overlapping outputs:
+    for(int inputIDX=0;inputIDX<numInputs;inputIDX++) {
+      final OneSyn syn = matches[inputIDX];
+      if (syn == null) {
+        continue;
+      }
+      for(String synOut : syn.out) {
+        final String[] synOutputs = synOut.split(" ");
+        assertEquals(synOutputs.length, (1+synOut.length())/2);
+        final int matchEnd = inputIDX + synOutputs.length;
+        int synUpto = 0;
+        for(int matchIDX=inputIDX;matchIDX<matchEnd;matchIDX++) {
+          if (outputs[matchIDX] == null) {
+            outputs[matchIDX] = synOutputs[synUpto++];
+          } else {
+            outputs[matchIDX] = outputs[matchIDX] + "/" + synOutputs[synUpto++];
+          }
+          if (matchIDX < numInputs) {
+            keepOrigs[matchIDX] |= syn.keepOrig;
+          }
+        }
+      }
+    }
+
+    StringBuilder sb = new StringBuilder();
+    String[] inputTokens = doc.split(" ");
+    final int limit = inputTokens.length + maxOutputLength;
+    for(int inputIDX=0;inputIDX<limit;inputIDX++) {
+      boolean posHasOutput = false;
+      if (inputIDX >= numInputs && outputs[inputIDX] == null) {
+        break;
+      }
+      if (inputIDX < numInputs && (outputs[inputIDX] == null || keepOrigs[inputIDX])) {
+        sb.append(inputTokens[inputIDX]);
+        posHasOutput = true;
+      }
+      
+      if (outputs[inputIDX] != null) {
+        if (posHasOutput) {
+          sb.append('/');
+        }
+        sb.append(outputs[inputIDX]);
+      }
+      if (inputIDX < limit-1) {
+        sb.append(' ');
+      }
+    }
+
+    return sb.toString();
+  }
+
+  public void testRandom() throws Exception {
+    
+    final int alphabetSize = _TestUtil.nextInt(random, 2, 7);
+
+    final int docLen = atLeast(3000);
+    //final int docLen = 50;
+
+    final String document = getRandomString('a', alphabetSize, docLen);
+
+    if (VERBOSE) {
+      System.out.println("TEST: doc=" + document);
+    }
+
+    final int numSyn = atLeast(5);
+    //final int numSyn = 2;
+
+    final Map<String,OneSyn> synMap = new HashMap<String,OneSyn>();
+    final List<OneSyn> syns = new ArrayList<OneSyn>();
+    final boolean dedup = random.nextBoolean();
+    if (VERBOSE) {
+      System.out.println("  dedup=" + dedup);
+    }
+    b = new SynonymMap.Builder(dedup);
+    for(int synIDX=0;synIDX<numSyn;synIDX++) {
+      final String synIn = getRandomString('a', alphabetSize, _TestUtil.nextInt(random, 1, 5)).trim();
+      OneSyn s = synMap.get(synIn);
+      if (s == null) {
+        s = new OneSyn();
+        s.in = synIn;
+        syns.add(s);
+        s.out = new ArrayList<String>();
+        synMap.put(synIn, s);
+        s.keepOrig = random.nextBoolean();
+      }
+      final String synOut = getRandomString('0', 10, _TestUtil.nextInt(random, 1, 5)).trim();
+      s.out.add(synOut);
+      add(synIn, synOut, s.keepOrig);
+      if (VERBOSE) {
+        System.out.println("  syns[" + synIDX + "] = " + s.in + " -> " + s.out + " keepOrig=" + s.keepOrig);
+      }
+    }
+
+    tokensIn = new MockTokenizer(new StringReader("a"),
+                                 MockTokenizer.WHITESPACE,
+                                 true);
+    tokensIn.reset();
+    assertTrue(tokensIn.incrementToken());
+    assertFalse(tokensIn.incrementToken());
+    tokensIn.end();
+    tokensIn.close();
+
+    tokensOut = new SynonymFilter(tokensIn,
+                                     b.build(),
+                                     true);
+    termAtt = tokensOut.addAttribute(CharTermAttribute.class);
+    posIncrAtt = tokensOut.addAttribute(PositionIncrementAttribute.class);
+    offsetAtt = tokensOut.addAttribute(OffsetAttribute.class);
+
+    if (dedup) {
+      pruneDups(syns);
+    }
+
+    final String expected = slowSynMatcher(document, syns, 5);
+
+    if (VERBOSE) {
+      System.out.println("TEST: expected=" + expected);
+    }
+
+    verify(document, expected);
+  }
+
+  private void pruneDups(List<OneSyn> syns) {
+    Set<String> seen = new HashSet<String>();
+    for(OneSyn syn : syns) {
+      int idx = 0;
+      while(idx < syn.out.size()) {
+        String out = syn.out.get(idx);
+        if (!seen.contains(out)) {
+          seen.add(out);
+          idx++;
+        } else {
+          syn.out.remove(idx);
+        }
+      }
+      seen.clear();
+    }
+  }
+
+  private String randomNonEmptyString() {
+    while(true) {
+      final String s = _TestUtil.randomUnicodeString(random).trim();
+      if (s.length() != 0 && s.indexOf('\u0000') == -1) {
+        return s;
+      }
+    }
+  }
+
+  /** simple random test, doesn't verify correctness.
+   *  does verify it doesnt throw exceptions, or that the stream doesn't misbehave
+   */
+  public void testRandom2() throws Exception {
+    final int numIters = atLeast(10);
+    for (int i = 0; i < numIters; i++) {
+      b = new SynonymMap.Builder(random.nextBoolean());
+      final int numEntries = atLeast(10);
+      for (int j = 0; j < numEntries; j++) {
+        add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
+      }
+      final SynonymMap map = b.build();
+      final boolean ignoreCase = random.nextBoolean();
+      
+      final Analyzer analyzer = new ReusableAnalyzerBase() {
+        @Override
+        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+          Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+          return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
+        }
+      };
+
+      checkRandomData(random, analyzer, 1000*RANDOM_MULTIPLIER);
+    }
+  }
+}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestWordnetSynonymParser.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestWordnetSynonymParser.java
@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.synonym;
+
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+
+public class TestWordnetSynonymParser extends BaseTokenStreamTestCase {
+  Analyzer analyzer;
+
+  String synonymsFile = 
+    "s(100000001,1,'woods',n,1,0).\n" +
+    "s(100000001,2,'wood',n,1,0).\n" +
+    "s(100000001,3,'forest',n,1,0).\n" +
+    "s(100000002,1,'wolfish',n,1,0).\n" +
+    "s(100000002,2,'ravenous',n,1,0).\n" +
+    "s(100000003,1,'king',n,1,1).\n" +
+    "s(100000003,2,'baron',n,1,1).\n" +
+    "s(100000004,1,'king''s evil',n,1,1).\n" +
+    "s(100000004,2,'king''s meany',n,1,1).\n";
+  
+  public void testSynonyms() throws Exception {
+    WordnetSynonymParser parser = new WordnetSynonymParser(true, true, new MockAnalyzer(random));
+    parser.add(new StringReader(synonymsFile));
+    final SynonymMap map = parser.build();
+    
+    Analyzer analyzer = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+        return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
+      }
+    };
+    
+    /* all expansions */
+    assertAnalyzesTo(analyzer, "Lost in the woods",
+        new String[] { "Lost", "in", "the", "woods", "wood", "forest" },
+        new int[] { 0, 5, 8, 12, 12, 12 },
+        new int[] { 4, 7, 11, 17, 17, 17 },
+        new int[] { 1, 1, 1, 1, 0, 0 });
+    
+    /* single quote */
+    assertAnalyzesTo(analyzer, "king",
+        new String[] { "king", "baron" });
+    
+    /* multi words */
+    assertAnalyzesTo(analyzer, "king's evil",
+        new String[] { "king's", "king's", "evil", "meany" });
+  }
+}
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTLookup.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTLookup.java
@ -90,6 +90,10 @@ import org.apache.lucene.store.OutputStreamDataOutput;
 * 
 * <p>"alphabetically" in any of the documentation above indicates utf16 codepoint order, 
 * nothing else.
+ *
+ * <b>NOTE</b>: the FST file format is experimental and
+ * subject to suddenly change, requiring you to rebuild the
+ * FST suggest index.
 */
 public class FSTLookup extends Lookup {

--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -320,6 +320,9 @@ New Features
 Optimizations
 ----------------------

+* LUCENE-3233: Improved memory usage, build time, and performance of 
+  SynonymFilterFactory.  (Mike McCandless, Robert Muir)
+
 Bug Fixes
 ----------------------

--- a/solr/core/src/java/org/apache/solr/analysis/FSTSynonymFilterFactory.java
+++ b/solr/core/src/java/org/apache/solr/analysis/FSTSynonymFilterFactory.java
@ -0,0 +1,157 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
+import java.text.ParseException;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.synonym.SynonymFilter;
+import org.apache.lucene.analysis.synonym.SynonymMap;
+import org.apache.lucene.analysis.synonym.SolrSynonymParser;
+import org.apache.lucene.analysis.synonym.WordnetSynonymParser;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+import org.apache.lucene.util.Version;
+import org.apache.solr.common.ResourceLoader;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.StrUtils;
+import org.apache.solr.util.plugin.ResourceLoaderAware;
+
+/**
+ * @deprecated (3.4) use {@link SynonymFilterFactory} instead. this is only a backwards compatibility
+ *                   mechanism that will be removed in Lucene 5.0
+ */
+// NOTE: rename this to "SynonymFilterFactory" and nuke that delegator in Lucene 5.0!
+@Deprecated
+final class FSTSynonymFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
+  private SynonymMap map;
+  private boolean ignoreCase;
+  
+  @Override
+  public TokenStream create(TokenStream input) {
+    return new SynonymFilter(input, map, ignoreCase);
+  }
+
+  @Override
+  public void inform(ResourceLoader loader) {
+    final boolean ignoreCase = getBoolean("ignoreCase", false); 
+    this.ignoreCase = ignoreCase;
+
+    String tf = args.get("tokenizerFactory");
+
+    final TokenizerFactory factory = tf == null ? null : loadTokenizerFactory(loader, tf, args);
+    
+    Analyzer analyzer = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_31, reader) : factory.create(reader);
+        TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_31, tokenizer) : tokenizer;
+        return new TokenStreamComponents(tokenizer, stream);
+      }
+    };
+
+    String format = args.get("format");
+    try {
+      if (format == null || format.equals("solr")) {
+        // TODO: expose dedup as a parameter?
+        map = loadSolrSynonyms(loader, true, analyzer);
+      } else if (format.equals("wordnet")) {
+        map = loadWordnetSynonyms(loader, true, analyzer);
+      } else {
+        // TODO: somehow make this more pluggable
+        throw new RuntimeException("Unrecognized synonyms format: " + format);
+      }
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+  
+  /**
+   * Load synonyms from the solr format, "format=solr".
+   */
+  private SynonymMap loadSolrSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
+    final boolean expand = getBoolean("expand", true);
+    String synonyms = args.get("synonyms");
+    if (synonyms == null)
+      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Missing required argument 'synonyms'.");
+    
+    CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
+      .onMalformedInput(CodingErrorAction.REPORT)
+      .onUnmappableCharacter(CodingErrorAction.REPORT);
+    
+    SolrSynonymParser parser = new SolrSynonymParser(dedup, expand, analyzer);
+    File synonymFile = new File(synonyms);
+    if (synonymFile.exists()) {
+      decoder.reset();
+      parser.add(new InputStreamReader(loader.openResource(synonyms), decoder));
+    } else {
+      List<String> files = StrUtils.splitFileNames(synonyms);
+      for (String file : files) {
+        decoder.reset();
+        parser.add(new InputStreamReader(loader.openResource(file), decoder));
+      }
+    }
+    return parser.build();
+  }
+  
+  /**
+   * Load synonyms from the wordnet format, "format=wordnet".
+   */
+  private SynonymMap loadWordnetSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
+    final boolean expand = getBoolean("expand", true);
+    String synonyms = args.get("synonyms");
+    if (synonyms == null)
+      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Missing required argument 'synonyms'.");
+    
+    CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
+      .onMalformedInput(CodingErrorAction.REPORT)
+      .onUnmappableCharacter(CodingErrorAction.REPORT);
+    
+    WordnetSynonymParser parser = new WordnetSynonymParser(dedup, expand, analyzer);
+    File synonymFile = new File(synonyms);
+    if (synonymFile.exists()) {
+      decoder.reset();
+      parser.add(new InputStreamReader(loader.openResource(synonyms), decoder));
+    } else {
+      List<String> files = StrUtils.splitFileNames(synonyms);
+      for (String file : files) {
+        decoder.reset();
+        parser.add(new InputStreamReader(loader.openResource(file), decoder));
+      }
+    }
+    return parser.build();
+  }
+  
+  private static TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname, Map<String,String> args){
+    TokenizerFactory tokFactory = (TokenizerFactory) loader.newInstance(cname);
+    tokFactory.init(args);
+    return tokFactory;
+  }
+}
--- a/solr/core/src/java/org/apache/solr/analysis/SlowSynonymFilter.java
+++ b/solr/core/src/java/org/apache/solr/analysis/SlowSynonymFilter.java
@ -0,0 +1,261 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.analysis;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.AttributeSource;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.LinkedList;
+
+/** SynonymFilter handles multi-token synonyms with variable position increment offsets.
+ * <p>
+ * The matched tokens from the input stream may be optionally passed through (includeOrig=true)
+ * or discarded.  If the original tokens are included, the position increments may be modified
+ * to retain absolute positions after merging with the synonym tokenstream.
+ * <p>
+ * Generated synonyms will start at the same position as the first matched source token.
+ * @deprecated (3.4) use {@link SynonymFilterFactory} instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0
+ */
+@Deprecated
+final class SlowSynonymFilter extends TokenFilter {
+
+  private final SlowSynonymMap map;  // Map<String, SynonymMap>
+  private Iterator<AttributeSource> replacement;  // iterator over generated tokens
+
+  public SlowSynonymFilter(TokenStream in, SlowSynonymMap map) {
+    super(in);
+    if (map == null)
+      throw new IllegalArgumentException("map is required");
+
+    this.map = map;
+    // just ensuring these attributes exist...
+    addAttribute(CharTermAttribute.class);
+    addAttribute(PositionIncrementAttribute.class);
+    addAttribute(OffsetAttribute.class);
+    addAttribute(TypeAttribute.class);
+  }
+
+
+  /*
+   * Need to worry about multiple scenarios:
+   *  - need to go for the longest match
+   *    a b => foo      #shouldn't match if "a b" is followed by "c d"
+   *    a b c d => bar
+   *  - need to backtrack - retry matches for tokens already read
+   *     a b c d => foo
+   *       b c => bar
+   *     If the input stream is "a b c x", one will consume "a b c d"
+   *     trying to match the first rule... all but "a" should be
+   *     pushed back so a match may be made on "b c".
+   *  - don't try and match generated tokens (thus need separate queue)
+   *    matching is not recursive.
+   *  - handle optional generation of original tokens in all these cases,
+   *    merging token streams to preserve token positions.
+   *  - preserve original positionIncrement of first matched token
+   */
+  @Override
+  public boolean incrementToken() throws IOException {
+    while (true) {
+      // if there are any generated tokens, return them... don't try any
+      // matches against them, as we specifically don't want recursion.
+      if (replacement!=null && replacement.hasNext()) {
+        copy(this, replacement.next());
+        return true;
+      }
+
+      // common case fast-path of first token not matching anything
+      AttributeSource firstTok = nextTok();
+      if (firstTok == null) return false;
+      CharTermAttribute termAtt = firstTok.addAttribute(CharTermAttribute.class);
+      SlowSynonymMap result = map.submap!=null ? map.submap.get(termAtt.buffer(), 0, termAtt.length()) : null;
+      if (result == null) {
+        copy(this, firstTok);
+        return true;
+      }
+
+      // fast-path failed, clone ourselves if needed
+      if (firstTok == this)
+        firstTok = cloneAttributes();
+      // OK, we matched a token, so find the longest match.
+
+      matched = new LinkedList<AttributeSource>();
+
+      result = match(result);
+
+      if (result==null) {
+        // no match, simply return the first token read.
+        copy(this, firstTok);
+        return true;
+      }
+
+      // reuse, or create new one each time?
+      ArrayList<AttributeSource> generated = new ArrayList<AttributeSource>(result.synonyms.length + matched.size() + 1);
+
+      //
+      // there was a match... let's generate the new tokens, merging
+      // in the matched tokens (position increments need adjusting)
+      //
+      AttributeSource lastTok = matched.isEmpty() ? firstTok : matched.getLast();
+      boolean includeOrig = result.includeOrig();
+
+      AttributeSource origTok = includeOrig ? firstTok : null;
+      PositionIncrementAttribute firstPosIncAtt = firstTok.addAttribute(PositionIncrementAttribute.class);
+      int origPos = firstPosIncAtt.getPositionIncrement();  // position of origTok in the original stream
+      int repPos=0; // curr position in replacement token stream
+      int pos=0;  // current position in merged token stream
+
+      for (int i=0; i<result.synonyms.length; i++) {
+        Token repTok = result.synonyms[i];
+        AttributeSource newTok = firstTok.cloneAttributes();
+        CharTermAttribute newTermAtt = newTok.addAttribute(CharTermAttribute.class);
+        OffsetAttribute newOffsetAtt = newTok.addAttribute(OffsetAttribute.class);
+        PositionIncrementAttribute newPosIncAtt = newTok.addAttribute(PositionIncrementAttribute.class);
+
+        OffsetAttribute lastOffsetAtt = lastTok.addAttribute(OffsetAttribute.class);
+
+        newOffsetAtt.setOffset(newOffsetAtt.startOffset(), lastOffsetAtt.endOffset());
+        newTermAtt.copyBuffer(repTok.buffer(), 0, repTok.length());
+        repPos += repTok.getPositionIncrement();
+        if (i==0) repPos=origPos;  // make position of first token equal to original
+
+        // if necessary, insert original tokens and adjust position increment
+        while (origTok != null && origPos <= repPos) {
+          PositionIncrementAttribute origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
+          origPosInc.setPositionIncrement(origPos-pos);
+          generated.add(origTok);
+          pos += origPosInc.getPositionIncrement();
+          origTok = matched.isEmpty() ? null : matched.removeFirst();
+          if (origTok != null) {
+            origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
+            origPos += origPosInc.getPositionIncrement();
+          }
+        }
+
+        newPosIncAtt.setPositionIncrement(repPos - pos);
+        generated.add(newTok);
+        pos += newPosIncAtt.getPositionIncrement();
+      }
+
+      // finish up any leftover original tokens
+      while (origTok!=null) {
+        PositionIncrementAttribute origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
+        origPosInc.setPositionIncrement(origPos-pos);
+        generated.add(origTok);
+        pos += origPosInc.getPositionIncrement();
+        origTok = matched.isEmpty() ? null : matched.removeFirst();
+        if (origTok != null) {
+          origPosInc = origTok.addAttribute(PositionIncrementAttribute.class);
+          origPos += origPosInc.getPositionIncrement();
+        }
+      }
+
+      // what if we replaced a longer sequence with a shorter one?
+      // a/0 b/5 =>  foo/0
+      // should I re-create the gap on the next buffered token?
+
+      replacement = generated.iterator();
+      // Now return to the top of the loop to read and return the first
+      // generated token.. The reason this is done is that we may have generated
+      // nothing at all, and may need to continue with more matching logic.
+    }
+  }
+
+
+  //
+  // Defer creation of the buffer until the first time it is used to
+  // optimize short fields with no matches.
+  //
+  private LinkedList<AttributeSource> buffer;
+  private LinkedList<AttributeSource> matched;
+
+  private boolean exhausted;
+
+  private AttributeSource nextTok() throws IOException {
+    if (buffer!=null && !buffer.isEmpty()) {
+      return buffer.removeFirst();
+    } else {
+      if (!exhausted && input.incrementToken()) {
+        return this;
+      } else {
+        exhausted = true;
+        return null;
+      }
+    }
+  }
+
+  private void pushTok(AttributeSource t) {
+    if (buffer==null) buffer=new LinkedList<AttributeSource>();
+    buffer.addFirst(t);
+  }
+
+  private SlowSynonymMap match(SlowSynonymMap map) throws IOException {
+    SlowSynonymMap result = null;
+
+    if (map.submap != null) {
+      AttributeSource tok = nextTok();
+      if (tok != null) {
+        // clone ourselves.
+        if (tok == this)
+          tok = cloneAttributes();
+        // check for positionIncrement!=1?  if>1, should not match, if==0, check multiple at this level?
+        CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class);
+        SlowSynonymMap subMap = map.submap.get(termAtt.buffer(), 0, termAtt.length());
+
+        if (subMap != null) {
+          // recurse
+          result = match(subMap);
+        }
+
+        if (result != null) {
+          matched.addFirst(tok);
+        } else {
+          // push back unmatched token
+          pushTok(tok);
+        }
+      }
+    }
+
+    // if no longer sequence matched, so if this node has synonyms, it's the match.
+    if (result==null && map.synonyms!=null) {
+      result = map;
+    }
+
+    return result;
+  }
+
+  private void copy(AttributeSource target, AttributeSource source) {
+    if (target != source)
+      source.copyTo(target);
+  }
+
+  @Override
+  public void reset() throws IOException {
+    input.reset();
+    replacement = null;
+    exhausted = false;
+  }
+}
--- a/solr/core/src/java/org/apache/solr/analysis/SlowSynonymFilterFactory.java
+++ b/solr/core/src/java/org/apache/solr/analysis/SlowSynonymFilterFactory.java
@ -0,0 +1,188 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.analysis;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.solr.common.ResourceLoader;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.StrUtils;
+import org.apache.solr.util.plugin.ResourceLoaderAware;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Factory for {@link SlowSynonymFilter} (only used with luceneMatchVersion < 3.4)
+ * <pre class="prettyprint" >
+ * &lt;fieldType name="text_synonym" class="solr.TextField" positionIncrementGap="100"&gt;
+ *   &lt;analyzer&gt;
+ *     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+ *     &lt;filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="false"
+ *             expand="true" tokenizerFactory="solr.WhitespaceTokenizerFactory"/&gt;
+ *   &lt;/analyzer&gt;
+ * &lt;/fieldType&gt;</pre>
+ * @deprecated (3.4) use {@link SynonymFilterFactory} instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0
+ */
+@Deprecated
+final class SlowSynonymFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
+
+  public void inform(ResourceLoader loader) {
+    String synonyms = args.get("synonyms");
+    if (synonyms == null)
+      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Missing required argument 'synonyms'.");
+    boolean ignoreCase = getBoolean("ignoreCase", false);
+    boolean expand = getBoolean("expand", true);
+
+    String tf = args.get("tokenizerFactory");
+    TokenizerFactory tokFactory = null;
+    if( tf != null ){
+      tokFactory = loadTokenizerFactory( loader, tf, args );
+    }
+
+    Iterable<String> wlist=loadRules( synonyms, loader );
+    
+    synMap = new SlowSynonymMap(ignoreCase);
+    parseRules(wlist, synMap, "=>", ",", expand,tokFactory);
+  }
+  
+  /**
+   * @return a list of all rules
+   */
+  protected Iterable<String> loadRules( String synonyms, ResourceLoader loader ) {
+    List<String> wlist=null;
+    try {
+      File synonymFile = new File(synonyms);
+      if (synonymFile.exists()) {
+        wlist = loader.getLines(synonyms);
+      } else  {
+        List<String> files = StrUtils.splitFileNames(synonyms);
+        wlist = new ArrayList<String>();
+        for (String file : files) {
+          List<String> lines = loader.getLines(file.trim());
+          wlist.addAll(lines);
+        }
+      }
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+    return wlist;
+  }
+
+  private SlowSynonymMap synMap;
+
+  static void parseRules(Iterable<String> rules, SlowSynonymMap map, String mappingSep,
+    String synSep, boolean expansion, TokenizerFactory tokFactory) {
+    int count=0;
+    for (String rule : rules) {
+      // To use regexes, we need an expression that specifies an odd number of chars.
+      // This can't really be done with string.split(), and since we need to
+      // do unescaping at some point anyway, we wouldn't be saving any effort
+      // by using regexes.
+
+      List<String> mapping = StrUtils.splitSmart(rule, mappingSep, false);
+
+      List<List<String>> source;
+      List<List<String>> target;
+
+      if (mapping.size() > 2) {
+        throw new RuntimeException("Invalid Synonym Rule:" + rule);
+      } else if (mapping.size()==2) {
+        source = getSynList(mapping.get(0), synSep, tokFactory);
+        target = getSynList(mapping.get(1), synSep, tokFactory);
+      } else {
+        source = getSynList(mapping.get(0), synSep, tokFactory);
+        if (expansion) {
+          // expand to all arguments
+          target = source;
+        } else {
+          // reduce to first argument
+          target = new ArrayList<List<String>>(1);
+          target.add(source.get(0));
+        }
+      }
+
+      boolean includeOrig=false;
+      for (List<String> fromToks : source) {
+        count++;
+        for (List<String> toToks : target) {
+          map.add(fromToks,
+                  SlowSynonymMap.makeTokens(toToks),
+                  includeOrig,
+                  true
+          );
+        }
+      }
+    }
+  }
+
+  // a , b c , d e f => [[a],[b,c],[d,e,f]]
+  private static List<List<String>> getSynList(String str, String separator, TokenizerFactory tokFactory) {
+    List<String> strList = StrUtils.splitSmart(str, separator, false);
+    // now split on whitespace to get a list of token strings
+    List<List<String>> synList = new ArrayList<List<String>>();
+    for (String toks : strList) {
+      List<String> tokList = tokFactory == null ?
+        StrUtils.splitWS(toks, true) : splitByTokenizer(toks, tokFactory);
+      synList.add(tokList);
+    }
+    return synList;
+  }
+
+  private static List<String> splitByTokenizer(String source, TokenizerFactory tokFactory){
+    StringReader reader = new StringReader( source );
+    TokenStream ts = loadTokenizer(tokFactory, reader);
+    List<String> tokList = new ArrayList<String>();
+    try {
+      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+      while (ts.incrementToken()){
+        if( termAtt.length() > 0 )
+          tokList.add( termAtt.toString() );
+      }
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+    finally{
+      reader.close();
+    }
+    return tokList;
+  }
+
+  private static TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname, Map<String,String> args){
+    TokenizerFactory tokFactory = (TokenizerFactory)loader.newInstance( cname );
+    tokFactory.init( args );
+    return tokFactory;
+  }
+
+  private static TokenStream loadTokenizer(TokenizerFactory tokFactory, Reader reader){
+    return tokFactory.create( reader );
+  }
+
+  public SlowSynonymMap getSynonymMap() {
+    return synMap;
+  }
+
+  public SlowSynonymFilter create(TokenStream input) {
+    return new SlowSynonymFilter(input,synMap);
+  }
+}
--- a/solr/core/src/java/org/apache/solr/analysis/SlowSynonymMap.java
+++ b/solr/core/src/java/org/apache/solr/analysis/SlowSynonymMap.java
@ -0,0 +1,162 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.analysis;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.util.CharArrayMap;
+import org.apache.lucene.util.Version;
+
+import java.util.*;
+
+/** Mapping rules for use with {@link SlowSynonymFilter}
+ * @deprecated (3.4) use {@link SynonymFilterFactory} instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0
+ */
+@Deprecated
+class SlowSynonymMap {
+  /** @lucene.internal */
+  public CharArrayMap<SlowSynonymMap> submap; // recursive: Map<String, SynonymMap>
+  /** @lucene.internal */
+  public Token[] synonyms;
+  int flags;
+
+  static final int INCLUDE_ORIG=0x01;
+  static final int IGNORE_CASE=0x02;
+
+  public SlowSynonymMap() {}
+  public SlowSynonymMap(boolean ignoreCase) {
+    if (ignoreCase) flags |= IGNORE_CASE;
+  }
+
+  public boolean includeOrig() { return (flags & INCLUDE_ORIG) != 0; }
+  public boolean ignoreCase() { return (flags & IGNORE_CASE) != 0; }
+
+  /**
+   * @param singleMatch  List<String>, the sequence of strings to match
+   * @param replacement  List<Token> the list of tokens to use on a match
+   * @param includeOrig  sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens
+   * @param mergeExisting merge the replacement tokens with any other mappings that exist
+   */
+  public void add(List<String> singleMatch, List<Token> replacement, boolean includeOrig, boolean mergeExisting) {
+    SlowSynonymMap currMap = this;
+    for (String str : singleMatch) {
+      if (currMap.submap==null) {
+        // for now hardcode at 4.0, as its what the old code did.
+        // would be nice to fix, but shouldn't store a version in each submap!!!
+        currMap.submap = new CharArrayMap<SlowSynonymMap>(Version.LUCENE_40, 1, ignoreCase());
+      }
+
+      SlowSynonymMap map = currMap.submap.get(str);
+      if (map==null) {
+        map = new SlowSynonymMap();
+        map.flags |= flags & IGNORE_CASE;
+        currMap.submap.put(str, map);
+      }
+
+      currMap = map;
+    }
+
+    if (currMap.synonyms != null && !mergeExisting) {
+      throw new RuntimeException("SynonymFilter: there is already a mapping for " + singleMatch);
+    }
+    List<Token> superset = currMap.synonyms==null ? replacement :
+          mergeTokens(Arrays.asList(currMap.synonyms), replacement);
+    currMap.synonyms = superset.toArray(new Token[superset.size()]);
+    if (includeOrig) currMap.flags |= INCLUDE_ORIG;
+  }
+
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("<");
+    if (synonyms!=null) {
+      sb.append("[");
+      for (int i=0; i<synonyms.length; i++) {
+        if (i!=0) sb.append(',');
+        sb.append(synonyms[i]);
+      }
+      if ((flags & INCLUDE_ORIG)!=0) {
+        sb.append(",ORIG");
+      }
+      sb.append("],");
+    }
+    sb.append(submap);
+    sb.append(">");
+    return sb.toString();
+  }
+
+
+
+  /** Produces a List<Token> from a List<String> */
+  public static List<Token> makeTokens(List<String> strings) {
+    List<Token> ret = new ArrayList<Token>(strings.size());
+    for (String str : strings) {
+      //Token newTok = new Token(str,0,0,"SYNONYM");
+      Token newTok = new Token(str, 0,0,"SYNONYM");
+      ret.add(newTok);
+    }
+    return ret;
+  }
+
+
+  /**
+   * Merge two lists of tokens, producing a single list with manipulated positionIncrements so that
+   * the tokens end up at the same position.
+   *
+   * Example:  [a b] merged with [c d] produces [a/b c/d]  ('/' denotes tokens in the same position)
+   * Example:  [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2]  (a,n means a has posInc=n)
+   *
+   */
+  public static List<Token> mergeTokens(List<Token> lst1, List<Token> lst2) {
+    ArrayList<Token> result = new ArrayList<Token>();
+    if (lst1 ==null || lst2 ==null) {
+      if (lst2 != null) result.addAll(lst2);
+      if (lst1 != null) result.addAll(lst1);
+      return result;
+    }
+
+    int pos=0;
+    Iterator<Token> iter1=lst1.iterator();
+    Iterator<Token> iter2=lst2.iterator();
+    Token tok1 = iter1.hasNext() ? iter1.next() : null;
+    Token tok2 = iter2.hasNext() ? iter2.next() : null;
+    int pos1 = tok1!=null ? tok1.getPositionIncrement() : 0;
+    int pos2 = tok2!=null ? tok2.getPositionIncrement() : 0;
+    while(tok1!=null || tok2!=null) {
+      while (tok1 != null && (pos1 <= pos2 || tok2==null)) {
+        Token tok = new Token(tok1.startOffset(), tok1.endOffset(), tok1.type());
+        tok.copyBuffer(tok1.buffer(), 0, tok1.length());
+        tok.setPositionIncrement(pos1-pos);
+        result.add(tok);
+        pos=pos1;
+        tok1 = iter1.hasNext() ? iter1.next() : null;
+        pos1 += tok1!=null ? tok1.getPositionIncrement() : 0;
+      }
+      while (tok2 != null && (pos2 <= pos1 || tok1==null)) {
+        Token tok = new Token(tok2.startOffset(), tok2.endOffset(), tok2.type());
+        tok.copyBuffer(tok2.buffer(), 0, tok2.length());
+        tok.setPositionIncrement(pos2-pos);
+        result.add(tok);
+        pos=pos2;
+        tok2 = iter2.hasNext() ? iter2.next() : null;
+        pos2 += tok2!=null ? tok2.getPositionIncrement() : 0;
+      }
+    }
+    return result;
+  }
+
+}
--- a/solr/core/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
+++ b/solr/core/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
@ -1,189 +1,54 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
 package org.apache.solr.analysis;

+import java.util.Map;
+
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.synonym.SynonymFilter;
-import org.apache.lucene.analysis.synonym.SynonymMap;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.util.Version;
 import org.apache.solr.common.ResourceLoader;
-import org.apache.solr.common.SolrException;
-import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.util.plugin.ResourceLoaderAware;

-import java.io.File;
-import java.io.IOException;
-import java.io.Reader;
-import java.io.StringReader;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
 /**
 * Factory for {@link SynonymFilter}.
 * <pre class="prettyprint" >
 * &lt;fieldType name="text_synonym" class="solr.TextField" positionIncrementGap="100"&gt;
 *   &lt;analyzer&gt;
 *     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
- *     &lt;filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="false"
- *             expand="true" tokenizerFactory="solr.WhitespaceTokenizerFactory"/&gt;
+ *     &lt;filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" 
+ *             format="solr" ignoreCase="false" expand="true" 
+ *             tokenizerFactory="solr.WhitespaceTokenizerFactory"/&gt;
 *   &lt;/analyzer&gt;
 * &lt;/fieldType&gt;</pre>
- *
 */
 public class SynonymFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
+  private BaseTokenFilterFactory delegator;

+  @Override
+  public void init(Map<String,String> args) {
+    super.init(args);
+    assureMatchVersion();
+    if (luceneMatchVersion.onOrAfter(Version.LUCENE_34)) {
+      delegator = new FSTSynonymFilterFactory();
+    } else {
+      // check if you use the new optional arg "format". this makes no sense for the old one, 
+      // as its wired to solr's synonyms format only.
+      if (args.containsKey("format") && !args.get("format").equals("solr")) {
+        throw new IllegalArgumentException("You must specify luceneMatchVersion >= 3.4 to use alternate synonyms formats");
+      }
+      delegator = new SlowSynonymFilterFactory();
+    }
+    delegator.init(args);
+  }
+
+  @Override
+  public TokenStream create(TokenStream input) {
+    assert delegator != null : "init() was not called!";
+    return delegator.create(input);
+  }
+
+  @Override
  public void inform(ResourceLoader loader) {
-    String synonyms = args.get("synonyms");
-    if (synonyms == null)
-      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Missing required argument 'synonyms'.");
-    boolean ignoreCase = getBoolean("ignoreCase", false);
-    boolean expand = getBoolean("expand", true);
-
-    String tf = args.get("tokenizerFactory");
-    TokenizerFactory tokFactory = null;
-    if( tf != null ){
-      tokFactory = loadTokenizerFactory( loader, tf, args );
-    }
-
-    Iterable<String> wlist=loadRules( synonyms, loader );
-    
-    synMap = new SynonymMap(ignoreCase);
-    parseRules(wlist, synMap, "=>", ",", expand,tokFactory);
-  }
-  
-  /**
-   * @return a list of all rules
-   */
-  protected Iterable<String> loadRules( String synonyms, ResourceLoader loader ) {
-    List<String> wlist=null;
-    try {
-      File synonymFile = new File(synonyms);
-      if (synonymFile.exists()) {
-        wlist = loader.getLines(synonyms);
-      } else  {
-        List<String> files = StrUtils.splitFileNames(synonyms);
-        wlist = new ArrayList<String>();
-        for (String file : files) {
-          List<String> lines = loader.getLines(file.trim());
-          wlist.addAll(lines);
-        }
-      }
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-    return wlist;
-  }
-
-  private SynonymMap synMap;
-
-  static void parseRules(Iterable<String> rules, SynonymMap map, String mappingSep,
-    String synSep, boolean expansion, TokenizerFactory tokFactory) {
-    int count=0;
-    for (String rule : rules) {
-      // To use regexes, we need an expression that specifies an odd number of chars.
-      // This can't really be done with string.split(), and since we need to
-      // do unescaping at some point anyway, we wouldn't be saving any effort
-      // by using regexes.
-
-      List<String> mapping = StrUtils.splitSmart(rule, mappingSep, false);
-
-      List<List<String>> source;
-      List<List<String>> target;
-
-      if (mapping.size() > 2) {
-        throw new RuntimeException("Invalid Synonym Rule:" + rule);
-      } else if (mapping.size()==2) {
-        source = getSynList(mapping.get(0), synSep, tokFactory);
-        target = getSynList(mapping.get(1), synSep, tokFactory);
-      } else {
-        source = getSynList(mapping.get(0), synSep, tokFactory);
-        if (expansion) {
-          // expand to all arguments
-          target = source;
-        } else {
-          // reduce to first argument
-          target = new ArrayList<List<String>>(1);
-          target.add(source.get(0));
-        }
-      }
-
-      boolean includeOrig=false;
-      for (List<String> fromToks : source) {
-        count++;
-        for (List<String> toToks : target) {
-          map.add(fromToks,
-                  SynonymMap.makeTokens(toToks),
-                  includeOrig,
-                  true
-          );
-        }
-      }
-    }
-  }
-
-  // a , b c , d e f => [[a],[b,c],[d,e,f]]
-  private static List<List<String>> getSynList(String str, String separator, TokenizerFactory tokFactory) {
-    List<String> strList = StrUtils.splitSmart(str, separator, false);
-    // now split on whitespace to get a list of token strings
-    List<List<String>> synList = new ArrayList<List<String>>();
-    for (String toks : strList) {
-      List<String> tokList = tokFactory == null ?
-        StrUtils.splitWS(toks, true) : splitByTokenizer(toks, tokFactory);
-      synList.add(tokList);
-    }
-    return synList;
-  }
-
-  private static List<String> splitByTokenizer(String source, TokenizerFactory tokFactory){
-    StringReader reader = new StringReader( source );
-    TokenStream ts = loadTokenizer(tokFactory, reader);
-    List<String> tokList = new ArrayList<String>();
-    try {
-      CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
-      while (ts.incrementToken()){
-        if( termAtt.length() > 0 )
-          tokList.add( termAtt.toString() );
-      }
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-    finally{
-      reader.close();
-    }
-    return tokList;
-  }
-
-  private static TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname, Map<String,String> args){
-    TokenizerFactory tokFactory = (TokenizerFactory)loader.newInstance( cname );
-    tokFactory.init( args );
-    return tokFactory;
-  }
-
-  private static TokenStream loadTokenizer(TokenizerFactory tokFactory, Reader reader){
-    return tokFactory.create( reader );
-  }
-
-  public SynonymMap getSynonymMap() {
-    return synMap;
-  }
-
-  public SynonymFilter create(TokenStream input) {
-    return new SynonymFilter(input,synMap);
+    assert delegator != null : "init() was not called!";
+    ((ResourceLoaderAware) delegator).inform(loader);
  }
 }
--- a/solr/core/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java
+++ b/solr/core/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java
@ -17,30 +17,69 @@

 package org.apache.solr.analysis;

+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.apache.lucene.analysis.synonym.SynonymFilter;
-import org.apache.lucene.analysis.synonym.SynonymMap;
-import org.junit.Test;
+import org.apache.solr.common.ResourceLoader;

+import java.io.ByteArrayInputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.StringReader;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;

 /**
 * @since solr 1.4
 */
 public class TestMultiWordSynonyms extends BaseTokenTestCase {

-  @Test
-  public void testMultiWordSynonyms() throws IOException {
+  /**
+   * @deprecated Remove this test in 5.0
+   */
+  @Deprecated
+  public void testMultiWordSynonymsOld() throws IOException {
    List<String> rules = new ArrayList<String>();
    rules.add("a b c,d");
-    SynonymMap synMap = new SynonymMap(true);
-    SynonymFilterFactory.parseRules(rules, synMap, "=>", ",", true, null);
+    SlowSynonymMap synMap = new SlowSynonymMap(true);
+    SlowSynonymFilterFactory.parseRules(rules, synMap, "=>", ",", true, null);

-    SynonymFilter ts = new SynonymFilter(new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("a e")), synMap);
+    SlowSynonymFilter ts = new SlowSynonymFilter(new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("a e")), synMap);
    // This fails because ["e","e"] is the value of the token stream
    assertTokenStreamContents(ts, new String[] { "a", "e" });
  }
+  
+  public void testMultiWordSynonyms() throws IOException {
+    SynonymFilterFactory factory = new SynonymFilterFactory();
+    Map<String,String> args = new HashMap<String,String>();
+    args.putAll(DEFAULT_VERSION_PARAM);
+    args.put("synonyms", "synonyms.txt");
+    factory.init(args);
+    factory.inform(new StringMockSolrResourceLoader("a b c,d"));
+    TokenStream ts = factory.create(new MockTokenizer(new StringReader("a e"), MockTokenizer.WHITESPACE, false));
+    // This fails because ["e","e"] is the value of the token stream
+    assertTokenStreamContents(ts, new String[] { "a", "e" });
+  }
+  
+  private class StringMockSolrResourceLoader implements ResourceLoader {
+    String text;
+
+    StringMockSolrResourceLoader(String text) {
+      this.text = text;
+    }
+
+    public List<String> getLines(String resource) throws IOException {
+      return null;
+    }
+
+    public Object newInstance(String cname, String... subpackages) {
+      return null;
+    }
+
+    public InputStream openResource(String resource) throws IOException {
+      return new ByteArrayInputStream(text.getBytes("UTF-8"));
+    }
+  }
 }
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java
@ -15,7 +15,7 @@
 * limitations under the License.
 */

-package org.apache.lucene.analysis.synonym;
+package org.apache.solr.analysis;

 import java.io.IOException;
 import java.io.StringReader;
@ -29,51 +29,52 @@ import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.tokenattributes.*;

 /**
+ * @deprecated Remove this test in Lucene 5.0
 */
-public class TestSynonymFilter extends BaseTokenStreamTestCase {
+@Deprecated
+public class TestSlowSynonymFilter extends BaseTokenStreamTestCase {

  static List<String> strings(String str) {
    String[] arr = str.split(" ");
    return Arrays.asList(arr);
  }

-  static void assertTokenizesTo(SynonymMap dict, String input,
+  static void assertTokenizesTo(SlowSynonymMap dict, String input,
      String expected[]) throws IOException {
    Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
-    SynonymFilter stream = new SynonymFilter(tokenizer, dict);
+    SlowSynonymFilter stream = new SlowSynonymFilter(tokenizer, dict);
    assertTokenStreamContents(stream, expected);
  }
  
-  static void assertTokenizesTo(SynonymMap dict, String input,
+  static void assertTokenizesTo(SlowSynonymMap dict, String input,
      String expected[], int posIncs[]) throws IOException {
    Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
-    SynonymFilter stream = new SynonymFilter(tokenizer, dict);
+    SlowSynonymFilter stream = new SlowSynonymFilter(tokenizer, dict);
    assertTokenStreamContents(stream, expected, posIncs);
  }
  
-  static void assertTokenizesTo(SynonymMap dict, List<Token> input,
+  static void assertTokenizesTo(SlowSynonymMap dict, List<Token> input,
      String expected[], int posIncs[])
      throws IOException {
    TokenStream tokenizer = new IterTokenStream(input);
-    SynonymFilter stream = new SynonymFilter(tokenizer, dict);
+    SlowSynonymFilter stream = new SlowSynonymFilter(tokenizer, dict);
    assertTokenStreamContents(stream, expected, posIncs);
  }
  
-  static void assertTokenizesTo(SynonymMap dict, List<Token> input,
+  static void assertTokenizesTo(SlowSynonymMap dict, List<Token> input,
      String expected[], int startOffsets[], int endOffsets[], int posIncs[])
      throws IOException {
    TokenStream tokenizer = new IterTokenStream(input);
-    SynonymFilter stream = new SynonymFilter(tokenizer, dict);
+    SlowSynonymFilter stream = new SlowSynonymFilter(tokenizer, dict);
    assertTokenStreamContents(stream, expected, startOffsets, endOffsets,
        posIncs);
  }
  
  public void testMatching() throws IOException {
-    SynonymMap map = new SynonymMap();
+    SlowSynonymMap map = new SlowSynonymMap();

    boolean orig = false;
    boolean merge = true;
@ -110,7 +111,7 @@ public class TestSynonymFilter extends BaseTokenStreamTestCase {
  }

  public void testIncludeOrig() throws IOException {
-    SynonymMap map = new SynonymMap();
+    SlowSynonymMap map = new SlowSynonymMap();

    boolean orig = true;
    boolean merge = true;
@ -167,7 +168,7 @@ public class TestSynonymFilter extends BaseTokenStreamTestCase {


  public void testMapMerge() throws IOException {
-    SynonymMap map = new SynonymMap();
+    SlowSynonymMap map = new SlowSynonymMap();

    boolean orig = false;
    boolean merge = true;
@ -206,7 +207,7 @@ public class TestSynonymFilter extends BaseTokenStreamTestCase {


  public void testOverlap() throws IOException {
-    SynonymMap map = new SynonymMap();
+    SlowSynonymMap map = new SlowSynonymMap();

    boolean orig = false;
    boolean merge = true;
@ -229,7 +230,7 @@ public class TestSynonymFilter extends BaseTokenStreamTestCase {
  }

  public void testPositionIncrements() throws IOException {
-    SynonymMap map = new SynonymMap();
+    SlowSynonymMap map = new SlowSynonymMap();

    boolean orig = false;
    boolean merge = true;
@ -264,7 +265,7 @@ public class TestSynonymFilter extends BaseTokenStreamTestCase {


  public void testPositionIncrementsWithOrig() throws IOException {
-    SynonymMap map = new SynonymMap();
+    SlowSynonymMap map = new SlowSynonymMap();

    boolean orig = true;
    boolean merge = true;
@ -304,7 +305,7 @@ public class TestSynonymFilter extends BaseTokenStreamTestCase {
    // x=>y
    // analysing "a x" causes "y" to have a bad offset (end less than start)
    // SOLR-167
-    SynonymMap map = new SynonymMap();
+    SlowSynonymMap map = new SlowSynonymMap();

    boolean orig = false;
    boolean merge = true;
--- a/solr/core/src/test/org/apache/solr/analysis/TestSynonymFilterFactory.java
+++ b/solr/core/src/test/org/apache/solr/analysis/TestSynonymFilterFactory.java
@ -0,0 +1,62 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.synonym.SynonymFilter;
+import org.apache.lucene.util.Version;
+import org.apache.solr.core.SolrResourceLoader;
+
+public class TestSynonymFilterFactory extends BaseTokenTestCase {
+  /** test that we can parse and use the solr syn file */
+  public void testSynonyms() throws Exception {
+    SynonymFilterFactory factory = new SynonymFilterFactory();
+    Map<String,String> args = new HashMap<String,String>();
+    args.putAll(DEFAULT_VERSION_PARAM);
+    args.put("synonyms", "synonyms.txt");
+    factory.init(args);
+    factory.inform(new SolrResourceLoader(null, null));
+    TokenStream ts = factory.create(new MockTokenizer(new StringReader("GB"), MockTokenizer.WHITESPACE, false));
+    assertTrue(ts instanceof SynonymFilter);
+    assertTokenStreamContents(ts, 
+        new String[] { "GB", "gib", "gigabyte", "gigabytes" },
+        new int[] { 1, 0, 0, 0 });
+  }
+  
+  /** test that we can parse and use the solr syn file, with the old impl
+   * @deprecated Remove this test in Lucene 5.0 */
+  @Deprecated
+  public void testSynonymsOld() throws Exception {
+    SynonymFilterFactory factory = new SynonymFilterFactory();
+    Map<String,String> args = new HashMap<String,String>();
+    args.put("luceneMatchVersion", Version.LUCENE_33.toString());
+    args.put("synonyms", "synonyms.txt");
+    factory.init(args);
+    factory.inform(new SolrResourceLoader(null, null));
+    TokenStream ts = factory.create(new MockTokenizer(new StringReader("GB"), MockTokenizer.WHITESPACE, false));
+    assertTrue(ts instanceof SlowSynonymFilter);
+    assertTokenStreamContents(ts, 
+        new String[] { "GB", "gib", "gigabyte", "gigabytes" },
+        new int[] { 1, 0, 0, 0 });
+  }
+}
--- a/solr/core/src/test/org/apache/solr/analysis/TestSynonymMap.java
+++ b/solr/core/src/test/org/apache/solr/analysis/TestSynonymMap.java
@ -25,32 +25,35 @@ import java.util.List;
 import java.util.Map;

 import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.synonym.SynonymMap;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.solr.common.ResourceLoader;


+/**
+ * @deprecated Remove this test in Lucene 5.0
+ */
+@Deprecated
 public class TestSynonymMap extends LuceneTestCase {

  public void testInvalidMappingRules() throws Exception {
-    SynonymMap synMap = new SynonymMap( true );
+    SlowSynonymMap synMap = new SlowSynonymMap( true );
    List<String> rules = new ArrayList<String>( 1 );
    rules.add( "a=>b=>c" );
    try{
-        SynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
+        SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
        fail( "RuntimeException must be thrown." );
    }
    catch( RuntimeException expected ){}
  }
  
  public void testReadMappingRules() throws Exception {
-	SynonymMap synMap;
+	SlowSynonymMap synMap;

    // (a)->[b]
    List<String> rules = new ArrayList<String>();
    rules.add( "a=>b" );
-    synMap = new SynonymMap( true );
-    SynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
+    synMap = new SlowSynonymMap( true );
+    SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
    assertEquals( 1, synMap.submap.size() );
    assertTokIncludes( synMap, "a", "b" );

@ -58,8 +61,8 @@ public class TestSynonymMap extends LuceneTestCase {
    // (b)->[c]
    rules.clear();
    rules.add( "a,b=>c" );
-    synMap = new SynonymMap( true );
-    SynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
+    synMap = new SlowSynonymMap( true );
+    SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
    assertEquals( 2, synMap.submap.size() );
    assertTokIncludes( synMap, "a", "c" );
    assertTokIncludes( synMap, "b", "c" );
@ -67,8 +70,8 @@ public class TestSynonymMap extends LuceneTestCase {
    // (a)->[b][c]
    rules.clear();
    rules.add( "a=>b,c" );
-    synMap = new SynonymMap( true );
-    SynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
+    synMap = new SlowSynonymMap( true );
+    SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
    assertEquals( 1, synMap.submap.size() );
    assertTokIncludes( synMap, "a", "b" );
    assertTokIncludes( synMap, "a", "c" );
@ -78,8 +81,8 @@ public class TestSynonymMap extends LuceneTestCase {
    rules.clear();
    rules.add( "a=>a1" );
    rules.add( "a b=>a2" );
-    synMap = new SynonymMap( true );
-    SynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
+    synMap = new SlowSynonymMap( true );
+    SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
    assertEquals( 1, synMap.submap.size() );
    assertTokIncludes( synMap, "a", "a1" );
    assertEquals( 1, getSubSynonymMap( synMap, "a" ).submap.size() );
@ -92,8 +95,8 @@ public class TestSynonymMap extends LuceneTestCase {
    rules.add( "a=>a1" );
    rules.add( "a b=>a2" );
    rules.add( "a c=>a3" );
-    synMap = new SynonymMap( true );
-    SynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
+    synMap = new SlowSynonymMap( true );
+    SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
    assertEquals( 1, synMap.submap.size() );
    assertTokIncludes( synMap, "a", "a1" );
    assertEquals( 2, getSubSynonymMap( synMap, "a" ).submap.size() );
@ -109,8 +112,8 @@ public class TestSynonymMap extends LuceneTestCase {
    rules.add( "a b=>a2" );
    rules.add( "b=>b1" );
    rules.add( "b c=>b2" );
-    synMap = new SynonymMap( true );
-    SynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
+    synMap = new SlowSynonymMap( true );
+    SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
    assertEquals( 2, synMap.submap.size() );
    assertTokIncludes( synMap, "a", "a1" );
    assertEquals( 1, getSubSynonymMap( synMap, "a" ).submap.size() );
@ -121,14 +124,14 @@ public class TestSynonymMap extends LuceneTestCase {
  }
  
  public void testRead1waySynonymRules() throws Exception {
-    SynonymMap synMap;
+    SlowSynonymMap synMap;

    // (a)->[a]
    // (b)->[a]
    List<String> rules = new ArrayList<String>();
    rules.add( "a,b" );
-    synMap = new SynonymMap( true );
-    SynonymFilterFactory.parseRules( rules, synMap, "=>", ",", false, null);
+    synMap = new SlowSynonymMap( true );
+    SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", false, null);
    assertEquals( 2, synMap.submap.size() );
    assertTokIncludes( synMap, "a", "a" );
    assertTokIncludes( synMap, "b", "a" );
@ -138,8 +141,8 @@ public class TestSynonymMap extends LuceneTestCase {
    // (c)->[a]
    rules.clear();
    rules.add( "a,b,c" );
-    synMap = new SynonymMap( true );
-    SynonymFilterFactory.parseRules( rules, synMap, "=>", ",", false, null);
+    synMap = new SlowSynonymMap( true );
+    SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", false, null);
    assertEquals( 3, synMap.submap.size() );
    assertTokIncludes( synMap, "a", "a" );
    assertTokIncludes( synMap, "b", "a" );
@ -149,8 +152,8 @@ public class TestSynonymMap extends LuceneTestCase {
    // (b1)->(b2)->[a]
    rules.clear();
    rules.add( "a,b1 b2" );
-    synMap = new SynonymMap( true );
-    SynonymFilterFactory.parseRules( rules, synMap, "=>", ",", false, null);
+    synMap = new SlowSynonymMap( true );
+    SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", false, null);
    assertEquals( 2, synMap.submap.size() );
    assertTokIncludes( synMap, "a", "a" );
    assertEquals( 1, getSubSynonymMap( synMap, "b1" ).submap.size() );
@ -160,8 +163,8 @@ public class TestSynonymMap extends LuceneTestCase {
    // (b)->[a1][a2]
    rules.clear();
    rules.add( "a1 a2,b" );
-    synMap = new SynonymMap( true );
-    SynonymFilterFactory.parseRules( rules, synMap, "=>", ",", false, null);
+    synMap = new SlowSynonymMap( true );
+    SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", false, null);
    assertEquals( 2, synMap.submap.size() );
    assertEquals( 1, getSubSynonymMap( synMap, "a1" ).submap.size() );
    assertTokIncludes( getSubSynonymMap( synMap, "a1" ), "a2", "a1" );
@ -171,14 +174,14 @@ public class TestSynonymMap extends LuceneTestCase {
  }
  
  public void testRead2waySynonymRules() throws Exception {
-    SynonymMap synMap;
+    SlowSynonymMap synMap;

    // (a)->[a][b]
    // (b)->[a][b]
    List<String> rules = new ArrayList<String>();
    rules.add( "a,b" );
-    synMap = new SynonymMap( true );
-    SynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
+    synMap = new SlowSynonymMap( true );
+    SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
    assertEquals( 2, synMap.submap.size() );
    assertTokIncludes( synMap, "a", "a" );
    assertTokIncludes( synMap, "a", "b" );
@ -190,8 +193,8 @@ public class TestSynonymMap extends LuceneTestCase {
    // (c)->[a][b][c]
    rules.clear();
    rules.add( "a,b,c" );
-    synMap = new SynonymMap( true );
-    SynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
+    synMap = new SlowSynonymMap( true );
+    SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
    assertEquals( 3, synMap.submap.size() );
    assertTokIncludes( synMap, "a", "a" );
    assertTokIncludes( synMap, "a", "b" );
@ -209,8 +212,8 @@ public class TestSynonymMap extends LuceneTestCase {
    //             [b1][b2]
    rules.clear();
    rules.add( "a,b1 b2" );
-    synMap = new SynonymMap( true );
-    SynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
+    synMap = new SlowSynonymMap( true );
+    SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
    assertEquals( 2, synMap.submap.size() );
    assertTokIncludes( synMap, "a", "a" );
    assertTokIncludes( synMap, "a", "b1" );
@ -226,8 +229,8 @@ public class TestSynonymMap extends LuceneTestCase {
    //      [b]
    rules.clear();
    rules.add( "a1 a2,b" );
-    synMap = new SynonymMap( true );
-    SynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
+    synMap = new SlowSynonymMap( true );
+    SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, null);
    assertEquals( 2, synMap.submap.size() );
    assertEquals( 1, getSubSynonymMap( synMap, "a1" ).submap.size() );
    assertTokIncludes( getSubSynonymMap( synMap, "a1" ), "a2", "a1" );
@ -239,7 +242,7 @@ public class TestSynonymMap extends LuceneTestCase {
  }
  
  public void testBigramTokenizer() throws Exception {
-	SynonymMap synMap;
+	SlowSynonymMap synMap;
 	
 	// prepare bi-gram tokenizer factory
 	BaseTokenizerFactory tf = new NGramTokenizerFactory();
@ -251,8 +254,8 @@ public class TestSynonymMap extends LuceneTestCase {
    // (ab)->(bc)->(cd)->[ef][fg][gh]
    List<String> rules = new ArrayList<String>();
    rules.add( "abcd=>efgh" );
-    synMap = new SynonymMap( true );
-    SynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, tf);
+    synMap = new SlowSynonymMap( true );
+    SlowSynonymFilterFactory.parseRules( rules, synMap, "=>", ",", true, tf);
    assertEquals( 1, synMap.submap.size() );
    assertEquals( 1, getSubSynonymMap( synMap, "ab" ).submap.size() );
    assertEquals( 1, getSubSynonymMap( getSubSynonymMap( synMap, "ab" ), "bc" ).submap.size() );
@ -265,7 +268,7 @@ public class TestSynonymMap extends LuceneTestCase {
  public void testLoadRules() throws Exception {
    Map<String, String> args = new HashMap<String, String>();
    args.put( "synonyms", "something.txt" );
-    SynonymFilterFactory ff = new SynonymFilterFactory();
+    SlowSynonymFilterFactory ff = new SlowSynonymFilterFactory();
    ff.init(args);
    ff.inform( new ResourceLoader() {
      @Override
@ -289,7 +292,7 @@ public class TestSynonymMap extends LuceneTestCase {
      }
    });
    
-    SynonymMap synMap = ff.getSynonymMap();
+    SlowSynonymMap synMap = ff.getSynonymMap();
    assertEquals( 2, synMap.submap.size() );
    assertTokIncludes( synMap, "a", "a" );
    assertTokIncludes( synMap, "a", "b" );
@ -298,7 +301,7 @@ public class TestSynonymMap extends LuceneTestCase {
  }
  
  
-  private void assertTokIncludes( SynonymMap map, String src, String exp ) throws Exception {
+  private void assertTokIncludes( SlowSynonymMap map, String src, String exp ) throws Exception {
    Token[] tokens = map.submap.get( src ).synonyms;
    boolean inc = false;
    for( Token token : tokens ){
@ -308,7 +311,7 @@ public class TestSynonymMap extends LuceneTestCase {
    assertTrue( inc );
  }
  
-  private SynonymMap getSubSynonymMap( SynonymMap map, String src ){
+  private SlowSynonymMap getSubSynonymMap( SlowSynonymMap map, String src ){
    return map.submap.get( src );
  }
 }