diff --git a/.hgignore b/.hgignore
index 3cb9afd9fae..1e17ec787de 100644
--- a/.hgignore
+++ b/.hgignore
@@ -1,2 +1,4 @@
 syntax: glob
 */build/*
+*.class
+
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
index 95ed9554c67..61297284a2a 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
@@ -31,15 +31,16 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.automaton.fst.Builder;
+import org.apache.lucene.util.automaton.fst.BytesRefFSTEnum;
+import org.apache.lucene.util.automaton.fst.FST;
+import org.apache.lucene.util.automaton.fst.PositiveIntOutputs;
+import org.apache.lucene.util.automaton.fst.PairOutputs;
 
 import java.io.IOException;
 import java.util.Comparator;
 import java.util.Map;
-import java.util.Set;
 import java.util.HashMap;
-import java.util.TreeMap;
-import java.util.SortedMap;
-import java.util.Iterator;
 
 class SimpleTextFieldsReader extends FieldsProducer {
 
@@ -116,73 +117,39 @@ class SimpleTextFieldsReader extends FieldsProducer {
   private class SimpleTextTermsEnum extends TermsEnum {
     private final IndexInput in;
     private final boolean omitTF;
-    private BytesRef current;
     private int docFreq;
     private long docsStart;
     private boolean ended;
-    private final TreeMap<BytesRef,TermData> allTerms;
-    private Iterator<Map.Entry<BytesRef,TermData>> iter;
+    private final BytesRefFSTEnum<PairOutputs.Pair<Long,Long>> fstEnum;
 
-    public SimpleTextTermsEnum(TreeMap<BytesRef,TermData> allTerms, boolean omitTF) throws IOException {
+    public SimpleTextTermsEnum(FST<PairOutputs.Pair<Long,Long>> fst, boolean omitTF) throws IOException {
       this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
-      this.allTerms = allTerms;
       this.omitTF = omitTF;
-      iter = allTerms.entrySet().iterator();
+      fstEnum = new BytesRefFSTEnum<PairOutputs.Pair<Long,Long>>(fst);
     }
 
     public SeekStatus seek(BytesRef text, boolean useCache /* ignored */) throws IOException {
-      
-      final SortedMap<BytesRef,TermData> tailMap = allTerms.tailMap(text);
 
-      if (tailMap.isEmpty()) {
-        current = null;
+      fstEnum.reset();
+      //System.out.println("seek to text=" + text.utf8ToString());
+      final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,Long>> result = fstEnum.advance(text);
+      if (result == null) {
+        //System.out.println("  end");
         return SeekStatus.END;
       } else {
-        current = tailMap.firstKey();
-        final TermData td = tailMap.get(current);
-        docsStart = td.docsStart;
-        docFreq = td.docFreq;
-        iter = tailMap.entrySet().iterator();
-        assert iter.hasNext();
-        iter.next();
-        if (current.equals(text)) {
+        //System.out.println("  got text=" + term.utf8ToString());
+        PairOutputs.Pair<Long,Long> pair = result.output;
+        docsStart = pair.output1;
+        docFreq = pair.output2.intValue();
+
+        if (result.input.equals(text)) {
+          //System.out.println("  match docsStart=" + docsStart);
           return SeekStatus.FOUND;
         } else {
+          //System.out.println("  not match docsStart=" + docsStart);
           return SeekStatus.NOT_FOUND;
         }
       }
-
-      /*
-      if (current != null) {
-        final int cmp = current.compareTo(text);
-        if (cmp == 0) {
-          return SeekStatus.FOUND;
-        } else if (cmp > 0) {
-          ended = false;
-          in.seek(fieldStart);
-        }
-      } else {
-        ended = false;
-        in.seek(fieldStart);
-      }
-
-      // Naive!!  This just scans... would be better to do
-      // up-front scan to build in-RAM index
-      BytesRef b;
-      while((b = next()) != null) {
-        final int cmp = b.compareTo(text);
-        if (cmp == 0) {
-          ended = false;
-          return SeekStatus.FOUND;
-        } else if (cmp > 0) {
-          ended = false;
-          return SeekStatus.NOT_FOUND;
-        }
-      }
-      current = null;
-      ended = true;
-      return SeekStatus.END;
-      */
     }
 
     @Override
@@ -192,56 +159,20 @@ class SimpleTextFieldsReader extends FieldsProducer {
     @Override
     public BytesRef next() throws IOException {
       assert !ended;
-
-      if (iter.hasNext()) {
-        Map.Entry<BytesRef,TermData> ent = iter.next();
-        current = ent.getKey();
-        TermData td = ent.getValue();
-        docFreq = td.docFreq;
-        docsStart = td.docsStart;
-        return current;
+      final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,Long>> result = fstEnum.next();
+      if (result != null) {
+        final PairOutputs.Pair<Long,Long> pair = result.output;
+        docsStart = pair.output1;
+        docFreq = pair.output2.intValue();
+        return result.input;
       } else {
-        current = null;
         return null;
       }
-
-      /*
-      readLine(in, scratch);
-      if (scratch.equals(END) || scratch.startsWith(FIELD)) {
-        ended = true;
-        current = null;
-        return null;
-      } else {
-        assert scratch.startsWith(TERM): "got " + scratch.utf8ToString();
-        docsStart = in.getFilePointer();
-        final int len = scratch.length - TERM.length;
-        if (len > scratch2.length) {
-          scratch2.grow(len);
-        }
-        System.arraycopy(scratch.bytes, TERM.length, scratch2.bytes, 0, len);
-        scratch2.length = len;
-        current = scratch2;
-        docFreq = 0;
-        long lineStart = 0;
-        while(true) {
-          lineStart = in.getFilePointer();
-          readLine(in, scratch);
-          if (scratch.equals(END) || scratch.startsWith(FIELD) || scratch.startsWith(TERM)) {
-            break;
-          }
-          if (scratch.startsWith(DOC)) {
-            docFreq++;
-          }
-        }
-        in.seek(lineStart);
-        return current;
-      }
-      */
     }
 
     @Override
     public BytesRef term() {
-      return current;
+      return fstEnum.current().input;
     }
 
     @Override
@@ -512,10 +443,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
     private final String field;
     private final long termsStart;
     private final boolean omitTF;
-
-    // NOTE: horribly, horribly RAM consuming, but then
-    // SimpleText should never be used in production
-    private final TreeMap<BytesRef,TermData> allTerms = new TreeMap<BytesRef,TermData>();
+    private FST<PairOutputs.Pair<Long,Long>> fst;
 
     private final BytesRef scratch = new BytesRef(10);
 
@@ -527,6 +455,8 @@ class SimpleTextFieldsReader extends FieldsProducer {
     }
 
     private void loadTerms() throws IOException {
+      PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton(false);
+      Builder<PairOutputs.Pair<Long,Long>> b = new Builder<PairOutputs.Pair<Long,Long>>(FST.INPUT_TYPE.BYTE1, 0, 0, true, new PairOutputs<Long,Long>(posIntOutputs, posIntOutputs));
       IndexInput in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
       in.seek(termsStart);
       final BytesRef lastTerm = new BytesRef(10);
@@ -536,16 +466,14 @@ class SimpleTextFieldsReader extends FieldsProducer {
         readLine(in, scratch);
         if (scratch.equals(END) || scratch.startsWith(FIELD)) {
           if (lastDocsStart != -1) {
-            allTerms.put(new BytesRef(lastTerm),
-                         new TermData(lastDocsStart, docFreq));
+            b.add(lastTerm, new PairOutputs.Pair<Long,Long>(lastDocsStart, Long.valueOf(docFreq)));
           }
           break;
         } else if (scratch.startsWith(DOC)) {
           docFreq++;
         } else if (scratch.startsWith(TERM)) {
           if (lastDocsStart != -1) {
-            allTerms.put(new BytesRef(lastTerm),
-                         new TermData(lastDocsStart, docFreq));
+            b.add(lastTerm, new PairOutputs.Pair<Long,Long>(lastDocsStart, Long.valueOf(docFreq)));
           }
           lastDocsStart = in.getFilePointer();
           final int len = scratch.length - TERM.length;
@@ -557,11 +485,23 @@ class SimpleTextFieldsReader extends FieldsProducer {
           docFreq = 0;
         }
       }
+      fst = b.finish();
+      /*
+      PrintStream ps = new PrintStream("out.dot");
+      fst.toDot(ps);
+      ps.close();
+      System.out.println("SAVED out.dot");
+      */
+      //System.out.println("FST " + fst.sizeInBytes());
     }
 
     @Override
     public TermsEnum iterator() throws IOException {
-      return new SimpleTextTermsEnum(allTerms, omitTF);
+      if (fst != null) {
+        return new SimpleTextTermsEnum(fst, omitTF);
+      } else {
+        return TermsEnum.EMPTY;
+      }
     }
 
     @Override
diff --git a/lucene/src/java/org/apache/lucene/util/ArrayUtil.java b/lucene/src/java/org/apache/lucene/util/ArrayUtil.java
index d9dd51e956f..fbf62507bae 100644
--- a/lucene/src/java/org/apache/lucene/util/ArrayUtil.java
+++ b/lucene/src/java/org/apache/lucene/util/ArrayUtil.java
@@ -19,6 +19,7 @@ package org.apache.lucene.util;
 
 import java.util.Collection;
 import java.util.Comparator;
+import java.lang.reflect.Array;
 
 /**
  * Methods for manipulating arrays.
@@ -392,7 +393,7 @@ public final class ArrayUtil {
   }
 
   /**
-   * Returns hash of chars in range start (inclusive) to
+   * Returns hash of bytes in range start (inclusive) to
    * end (inclusive)
    */
   public static int hashCode(byte[] array, int start, int end) {
@@ -429,6 +430,31 @@ public final class ArrayUtil {
     return false;
   }
 
+  public static <T> T[] grow(T[] array, int minSize) {
+    if (array.length < minSize) {
+      @SuppressWarnings("unchecked") final T[] newArray =
+        (T[]) Array.newInstance(array.getClass().getComponentType(), oversize(minSize, RamUsageEstimator.NUM_BYTES_OBJ_REF));
+      System.arraycopy(array, 0, newArray, 0, array.length);
+      return newArray;
+    } else
+      return array;
+  }
+
+  public static <T> T[] grow(T[] array) {
+    return grow(array, 1 + array.length);
+  }
+
+  public static <T> T[] shrink(T[] array, int targetSize) {
+    final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_OBJ_REF);
+    if (newSize != array.length) {
+      @SuppressWarnings("unchecked") final T[] newArray =
+        (T[]) Array.newInstance(array.getClass().getComponentType(), newSize);
+      System.arraycopy(array, 0, newArray, 0, newSize);
+      return newArray;
+    } else
+      return array;
+  }
+
   // Since Arrays.equals doesn't implement offsets for equals
   /**
    * See if two array slices are the same.
diff --git a/lucene/src/java/org/apache/lucene/util/IntsRef.java b/lucene/src/java/org/apache/lucene/util/IntsRef.java
index 33a486b09c0..1f284b5ea51 100644
--- a/lucene/src/java/org/apache/lucene/util/IntsRef.java
+++ b/lucene/src/java/org/apache/lucene/util/IntsRef.java
@@ -21,7 +21,7 @@ package org.apache.lucene.util;
  *  existing int[].
  *
  *  @lucene.internal */
-public final class IntsRef {
+public final class IntsRef implements Comparable<IntsRef> {
 
   public int[] ints;
   public int offset;
@@ -81,6 +81,31 @@ public final class IntsRef {
     }
   }
 
+  /** Signed int order comparison */
+  public int compareTo(IntsRef other) {
+    if (this == other) return 0;
+
+    final int[] aInts = this.ints;
+    int aUpto = this.offset;
+    final int[] bInts = other.ints;
+    int bUpto = other.offset;
+
+    final int aStop = aUpto + Math.min(this.length, other.length);
+
+    while(aUpto < aStop) {
+      int aInt = aInts[aUpto++];
+      int bInt = bInts[bUpto++];
+      if (aInt > bInt) {
+        return 1;
+      } else if (aInt < bInt) {
+        return -1;
+      }
+    }
+
+    // One is a prefix of the other, or, they are equal:
+    return this.length - other.length;
+  }
+
   public void copy(IntsRef other) {
     if (ints == null) {
       ints = new int[other.length];
@@ -97,4 +122,18 @@ public final class IntsRef {
       ints = ArrayUtil.grow(ints, newLength);
     }
   }
+
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append('[');
+    final int end = offset + length;
+    for(int i=offset;i<end;i++) {
+      if (i > offset) {
+        sb.append(' ');
+      }
+      sb.append(Integer.toHexString(ints[i]));
+    }
+    sb.append(']');
+    return sb.toString();
+  }
 }
diff --git a/lucene/src/java/org/apache/lucene/util/RecyclingByteBlockAllocator.java b/lucene/src/java/org/apache/lucene/util/RecyclingByteBlockAllocator.java
index 5346f9fcc3a..af1a48f18b1 100644
--- a/lucene/src/java/org/apache/lucene/util/RecyclingByteBlockAllocator.java
+++ b/lucene/src/java/org/apache/lucene/util/RecyclingByteBlockAllocator.java
@@ -93,13 +93,7 @@ public final class RecyclingByteBlockAllocator extends ByteBlockPool.Allocator {
   @Override
   public synchronized void recycleByteBlocks(byte[][] blocks, int start, int end) {
     final int numBlocks = Math.min(maxBufferedBlocks - freeBlocks, end - start);
-    final int size = freeBlocks + numBlocks;
-    if (size >= freeByteBlocks.length) {
-      final byte[][] newBlocks = new byte[ArrayUtil.oversize(size,
-          RamUsageEstimator.NUM_BYTES_OBJ_REF)][];
-      System.arraycopy(freeByteBlocks, 0, newBlocks, 0, freeBlocks);
-      freeByteBlocks = newBlocks;
-    }
+    freeByteBlocks = ArrayUtil.grow(freeByteBlocks, freeBlocks + numBlocks);
     final int stop = start + numBlocks;
     for (int i = start; i < stop; i++) {
       freeByteBlocks[freeBlocks++] = blocks[i];
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/Automaton.java b/lucene/src/java/org/apache/lucene/util/automaton/Automaton.java
index b4a893039a2..990dbf58298 100644
--- a/lucene/src/java/org/apache/lucene/util/automaton/Automaton.java
+++ b/lucene/src/java/org/apache/lucene/util/automaton/Automaton.java
@@ -40,7 +40,6 @@ import java.util.List;
 import java.util.Set;
 
 import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.RamUsageEstimator;
 
 /**
  * Finite-state automaton with regular expression operations.
@@ -281,9 +280,7 @@ public class Automaton implements Serializable, Cloneable {
             worklist.add(t.to);
             t.to.number = upto;
             if (upto == numberedStates.length) {
-              final State[] newArray = new State[ArrayUtil.oversize(1+upto, RamUsageEstimator.NUM_BYTES_OBJ_REF)];
-              System.arraycopy(numberedStates, 0, newArray, 0, upto);
-              numberedStates = newArray;
+              numberedStates = ArrayUtil.grow(numberedStates);
             }
             numberedStates[upto] = t.to;
             upto++;
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/BasicOperations.java b/lucene/src/java/org/apache/lucene/util/automaton/BasicOperations.java
index 5061ec315d4..965ef6025f6 100644
--- a/lucene/src/java/org/apache/lucene/util/automaton/BasicOperations.java
+++ b/lucene/src/java/org/apache/lucene/util/automaton/BasicOperations.java
@@ -30,7 +30,6 @@
 package org.apache.lucene.util.automaton;
 
 import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.RamUsageEstimator;
 
 import java.util.ArrayList;
 import java.util.BitSet;
@@ -459,9 +458,7 @@ final public class BasicOperations {
 
     public void add(Transition t) {
       if (transitions.length == count) {
-        Transition[] newArray = new Transition[ArrayUtil.oversize(1+count, RamUsageEstimator.NUM_BYTES_OBJ_REF)];
-        System.arraycopy(transitions, 0, newArray, 0, count);
-        transitions = newArray;
+        transitions = ArrayUtil.grow(transitions);
       }
       transitions[count++] = t;
     }
@@ -503,9 +500,7 @@ final public class BasicOperations {
     private PointTransitions next(int point) {
       // 1st time we are seeing this point
       if (count == points.length) {
-        final PointTransitions[] newArray = new PointTransitions[ArrayUtil.oversize(1+count, RamUsageEstimator.NUM_BYTES_OBJ_REF)];
-        System.arraycopy(points, 0, newArray, 0, count);
-        points = newArray;
+        points = ArrayUtil.grow(points);
       }
       PointTransitions points0 = points[count];
       if (points0 == null) {
@@ -650,9 +645,7 @@ final public class BasicOperations {
             final SortedIntSet.FrozenIntSet p = statesSet.freeze(q);
             worklist.add(p);
             if (newStateUpto == newStatesArray.length) {
-              final State[] newArray = new State[ArrayUtil.oversize(1+newStateUpto, RamUsageEstimator.NUM_BYTES_OBJ_REF)];
-              System.arraycopy(newStatesArray, 0, newArray, 0, newStateUpto);
-              newStatesArray = newArray;
+              newStatesArray = ArrayUtil.grow(newStatesArray);
             }
             newStatesArray[newStateUpto] = q;
             q.number = newStateUpto;
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/State.java b/lucene/src/java/org/apache/lucene/util/automaton/State.java
index b4040c9b583..148d946d4d0 100644
--- a/lucene/src/java/org/apache/lucene/util/automaton/State.java
+++ b/lucene/src/java/org/apache/lucene/util/automaton/State.java
@@ -29,7 +29,6 @@
 
 package org.apache.lucene.util.automaton;
 import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.RamUsageEstimator;
 
 import java.io.Serializable;
 import java.util.Collection;
@@ -111,9 +110,7 @@ public class State implements Serializable, Comparable<State> {
    */
   public void addTransition(Transition t) {
     if (numTransitions == transitionsArray.length) {
-      final Transition[] newArray = new Transition[ArrayUtil.oversize(1+numTransitions, RamUsageEstimator.NUM_BYTES_OBJ_REF)];
-      System.arraycopy(transitionsArray, 0, newArray, 0, numTransitions);
-      transitionsArray = newArray;
+      transitionsArray = ArrayUtil.grow(transitionsArray);
     }
     transitionsArray[numTransitions++] = t;
   }
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8.java b/lucene/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8.java
index 9a2bee79917..ee252f25873 100644
--- a/lucene/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8.java
+++ b/lucene/src/java/org/apache/lucene/util/automaton/UTF32ToUTF8.java
@@ -17,7 +17,6 @@ package org.apache.lucene.util.automaton;
  * limitations under the License.
  */
 
-import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.ArrayUtil;
 
 import java.util.List;
@@ -299,9 +298,7 @@ final class UTF32ToUTF8 {
   private State newUTF8State() {
     State s = new State();
     if (utf8StateCount == utf8States.length) {
-      final State[] newArray = new State[ArrayUtil.oversize(1+utf8StateCount, RamUsageEstimator.NUM_BYTES_OBJ_REF)];
-      System.arraycopy(utf8States, 0, newArray, 0, utf8StateCount);
-      utf8States = newArray;
+      utf8States = ArrayUtil.grow(utf8States);
     }
     utf8States[utf8StateCount] = s;
     s.number = utf8StateCount;
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java
new file mode 100644
index 00000000000..2445e40f06d
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java
@@ -0,0 +1,506 @@
+package org.apache.lucene.util.automaton.fst;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IntsRef;
+
+import java.io.IOException;
+
+/**
+ * Builds a compact FST (maps an IntsRef term to an arbitrary
+ * output) from pre-sorted terms with outputs (the FST
+ * becomes an FSA if you use NoOutputs).  The FST is written
+ * on-the-fly into a compact serialized format byte array, which can
+ * be saved to / loaded from a Directory or used directly
+ * for traversal.  The FST is always finite (no cycles).
+ *
+ * <p>NOTE: The algorithm is described at
+ * http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.24.3698</p>
+ *
+ * If your outputs are ByteSequenceOutput then the final FST
+ * will be minimal, but if you use PositiveIntOutput then
+ * it's only "near minimal".  For example, aa/0, aab/1, bbb/2
+ * will produce 6 states when a 5 state fst is also
+ * possible.
+ *
+ * The parameterized type T is the output type.  See the
+ * subclasses of {@link Outputs}.
+ *
+ * @lucene.experimental
+ */
+
+public class Builder<T> {
+  private final NodeHash<T> dedupHash;
+  private final FST<T> fst;
+  private final T NO_OUTPUT;
+
+  // simplistic pruning: we prune node (and all following
+  // nodes) if less than this number of terms go through it:
+  private final int minSuffixCount1;
+
+  // better pruning: we prune node (and all following
+  // nodes) if the prior node has less than this number of
+  // terms go through it:
+  private final int minSuffixCount2;
+
+  private final IntsRef lastInput = new IntsRef();
+
+  // NOTE: cutting this over to ArrayList instead loses ~6%
+  // in build performance on 9.8M Wikipedia terms; so we
+  // left this as an array:
+  // current "frontier"
+  private UnCompiledNode<T>[] frontier;
+
+  public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doMinSuffix, Outputs<T> outputs) {
+    this.minSuffixCount1 = minSuffixCount1;
+    this.minSuffixCount2 = minSuffixCount2;
+    fst = new FST<T>(inputType, outputs);
+    if (doMinSuffix) {
+      dedupHash = new NodeHash<T>(fst);
+    } else {
+      dedupHash = null;
+    }
+    NO_OUTPUT = outputs.getNoOutput();
+
+    @SuppressWarnings("unchecked") final UnCompiledNode<T>[] f = (UnCompiledNode<T>[]) new UnCompiledNode[10];
+    frontier = f;
+    for(int idx=0;idx<frontier.length;idx++) {
+      frontier[idx] = new UnCompiledNode<T>(this);
+    }
+  }
+
+  public int getTotStateCount() {
+    return fst.nodeCount;
+  }
+
+  public int getTermCount() {
+    return frontier[0].inputCount;
+  }
+
+  public int getMappedStateCount() {
+    return dedupHash == null ? 0 : fst.nodeCount;
+  }
+
+  private CompiledNode compileNode(UnCompiledNode<T> n) throws IOException {
+
+    final int address;
+    if (dedupHash != null) {
+      if (n.numArcs == 0) {
+        address = fst.addNode(n);
+      } else {
+        address = dedupHash.add(n);
+      }
+    } else {
+      address = fst.addNode(n);
+    }
+    assert address != -2;
+
+    n.clear();
+
+    final CompiledNode fn = new CompiledNode();
+    fn.address = address;
+    return fn;
+  }
+
+  private void compilePrevTail(int prefixLenPlus1) throws IOException {
+    assert prefixLenPlus1 >= 1;
+    //System.out.println("  compileTail " + prefixLenPlus1);
+    for(int idx=lastInput.length; idx >= prefixLenPlus1; idx--) {
+      boolean doPrune = false;
+      boolean doCompile = false;
+
+      final UnCompiledNode<T> node = frontier[idx];
+      final UnCompiledNode<T> parent = frontier[idx-1];
+
+      if (node.inputCount < minSuffixCount1) {
+        doPrune = true;
+        doCompile = true;
+      } else if (idx > prefixLenPlus1) {
+        // prune if parent's inputCount is less than suffixMinCount2
+        if (parent.inputCount < minSuffixCount2 || minSuffixCount2 == 1 && parent.inputCount == 1) {
+          // my parent, about to be compiled, doesn't make the cut, so
+          // I'm definitely pruned 
+
+          // if pruneCount2 is 1, we keep only up
+          // until the 'distinguished edge', ie we keep only the
+          // 'divergent' part of the FST. if my parent, about to be
+          // compiled, has inputCount 1 then we are already past the
+          // distinguished edge.  NOTE: this only works if
+          // the FST outputs are not "compressible" (simple
+          // ords ARE compressible).
+          doPrune = true;
+        } else {
+          // my parent, about to be compiled, does make the cut, so
+          // I'm definitely not pruned 
+          doPrune = false;
+        }
+        doCompile = true;
+      } else {
+        // if pruning is disabled (count is 0) we can always
+        // compile current node
+        doCompile = minSuffixCount2 == 0;
+      }
+
+      //System.out.println("    label=" + ((char) lastInput.ints[lastInput.offset+idx-1]) + " idx=" + idx + " inputCount=" + frontier[idx].inputCount + " doCompile=" + doCompile + " doPrune=" + doPrune);
+
+      if (node.inputCount < minSuffixCount2 || minSuffixCount2 == 1 && node.inputCount == 1) {
+        // drop all arcs
+        for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {
+          @SuppressWarnings("unchecked") final UnCompiledNode<T> target = (UnCompiledNode<T>) node.arcs[arcIdx].target;
+          target.clear();
+        }
+        node.numArcs = 0;
+      }
+
+      if (doPrune) {
+        // this node doesn't make it -- deref it
+        node.clear();
+        parent.deleteLast(lastInput.ints[lastInput.offset+idx-1], node);
+      } else {
+
+        if (minSuffixCount2 != 0) {
+          compileAllTargets(node);
+        }
+        final T nextFinalOutput = node.output;
+        final boolean isFinal = node.isFinal;
+
+        if (doCompile) {
+          // this node makes it and we now compile it.  first,
+          // compile any targets that were previously
+          // undecided:
+          parent.replaceLast(lastInput.ints[lastInput.offset + idx-1],
+                             compileNode(node),
+                             nextFinalOutput,
+                             isFinal);
+        } else {
+          // replaceLast just to install
+          // nextFinalOutput/isFinal onto the arc
+          parent.replaceLast(lastInput.ints[lastInput.offset + idx-1],
+                             node,
+                             nextFinalOutput,
+                             isFinal);
+          // this node will stay in play for now, since we are
+          // undecided on whether to prune it.  later, it
+          // will be either compiled or pruned, so we must
+          // allocate a new node:
+          frontier[idx] = new UnCompiledNode<T>(this);
+        }
+      }
+    }
+  }
+
+  private final IntsRef scratchIntsRef = new IntsRef(10);
+
+  public void add(BytesRef input, T output) throws IOException {
+    assert fst.getInputType() == FST.INPUT_TYPE.BYTE1;
+    scratchIntsRef.grow(input.length);
+    for(int i=0;i<input.length;i++) {
+      scratchIntsRef.ints[i] = input.bytes[i+input.offset] & 0xFF;
+    }
+    scratchIntsRef.length = input.length;
+    add(scratchIntsRef, output);
+  }
+
+  /** Sugar: adds the UTF32 chars from char[] slice.  FST
+   *  must be FST.INPUT_TYPE.BYTE4! */
+  public void add(char[] s, int offset, int length, T output) throws IOException {
+    assert fst.getInputType() == FST.INPUT_TYPE.BYTE4;
+    int charIdx = offset;
+    int intIdx = 0;
+    final int charLimit = offset + length;
+    while(charIdx < charLimit) {
+      scratchIntsRef.grow(intIdx+1);
+      final int utf32 = Character.codePointAt(s, charIdx);
+      scratchIntsRef.ints[intIdx] = utf32;
+      charIdx += Character.charCount(utf32);
+      intIdx++;
+    }
+    scratchIntsRef.length = intIdx;
+    add(scratchIntsRef, output);
+  }
+
+  /** Sugar: adds the UTF32 chars from CharSequence.  FST
+   *  must be FST.INPUT_TYPE.BYTE4! */
+  public void add(CharSequence s, T output) throws IOException {
+    assert fst.getInputType() == FST.INPUT_TYPE.BYTE4;
+    int charIdx = 0;
+    int intIdx = 0;
+    final int charLimit = s.length();
+    while(charIdx < charLimit) {
+      scratchIntsRef.grow(intIdx+1);
+      final int utf32 = Character.codePointAt(s, charIdx);
+      scratchIntsRef.ints[intIdx] = utf32;
+      charIdx += Character.charCount(utf32);
+      intIdx++;
+    }
+    scratchIntsRef.length = intIdx;
+    add(scratchIntsRef, output);
+  }
+
+  public void add(IntsRef input, T output) throws IOException {
+    //System.out.println("\nADD: " + input.utf8ToString());
+    assert lastInput.length == 0 || input.compareTo(lastInput) > 0: "inputs are added out of order lastInput=" + lastInput + " vs input=" + input;
+    assert validOutput(output);
+
+    //System.out.println("\nadd: " + input);
+    if (input.length == 0) {
+      // empty input: only allowed as first input.  we have
+      // to special case this because the packed FST
+      // format cannot represent the empty input since
+      // 'finalness' is stored on the incoming arc, not on
+      // the node
+      frontier[0].inputCount++;
+      fst.setEmptyOutput(output);
+      return;
+    }
+
+    // compare shared prefix length
+    int pos1 = 0;
+    int pos2 = input.offset;
+    final int pos1Stop = Math.min(lastInput.length, input.length);
+    while(true) {
+      //System.out.println("  incr " + pos1);
+      frontier[pos1].inputCount++;
+      if (pos1 >= pos1Stop || lastInput.ints[pos1] != input.ints[pos2]) {
+        break;
+      }
+      pos1++;
+      pos2++;
+    }
+    final int prefixLenPlus1 = pos1+1;
+      
+    if (frontier.length < input.length+1) {
+      final UnCompiledNode<T>[] next = ArrayUtil.grow(frontier, input.length+1);
+      for(int idx=frontier.length;idx<next.length;idx++) {
+        next[idx] = new UnCompiledNode<T>(this);
+      }
+      frontier = next;
+    }
+
+    // minimize/compile states from previous input's
+    // orphan'd suffix
+    compilePrevTail(prefixLenPlus1);
+
+    // init tail states for current input
+    for(int idx=prefixLenPlus1;idx<=input.length;idx++) {
+      frontier[idx-1].addArc(input.ints[input.offset + idx - 1],
+                             frontier[idx]);
+      //System.out.println("  incr tail " + idx);
+      frontier[idx].inputCount++;
+    }
+
+    final UnCompiledNode<T> lastNode = frontier[input.length];
+    lastNode.isFinal = true;
+    lastNode.output = NO_OUTPUT;
+
+    // push conflicting outputs forward, only as far as
+    // needed
+    for(int idx=1;idx<prefixLenPlus1;idx++) {
+      final UnCompiledNode<T> node = frontier[idx];
+      final UnCompiledNode<T> parentNode = frontier[idx-1];
+
+      final T lastOutput = parentNode.getLastOutput(input.ints[input.offset + idx - 1]);
+      assert validOutput(lastOutput);
+
+      final T commonOutputPrefix;
+      final T wordSuffix;
+
+      if (lastOutput != NO_OUTPUT) {
+        commonOutputPrefix = fst.outputs.common(output, lastOutput);
+        assert validOutput(commonOutputPrefix);
+        wordSuffix = fst.outputs.subtract(lastOutput, commonOutputPrefix);
+        assert validOutput(wordSuffix);
+        parentNode.setLastOutput(input.ints[input.offset + idx - 1], commonOutputPrefix);
+        node.prependOutput(wordSuffix);
+      } else {
+        commonOutputPrefix = wordSuffix = NO_OUTPUT;
+      }
+
+      output = fst.outputs.subtract(output, commonOutputPrefix);
+      assert validOutput(output);
+    }
+
+    // push remaining output:
+    frontier[prefixLenPlus1-1].setLastOutput(input.ints[input.offset + prefixLenPlus1-1], output);
+
+    // save last input
+    lastInput.copy(input);
+
+    //System.out.println("  count[0]=" + frontier[0].inputCount);
+  }
+
+  private boolean validOutput(T output) {
+    return output == NO_OUTPUT || !output.equals(NO_OUTPUT);
+  }
+
+  /** Returns final FST.  NOTE: this will return null if
+   *  nothing is accepted by the FST. */
+  public FST<T> finish() throws IOException {
+
+    // minimize nodes in the last word's suffix
+    compilePrevTail(1);
+    //System.out.println("finish: inputCount=" + frontier[0].inputCount);
+    if (frontier[0].inputCount < minSuffixCount1 || frontier[0].inputCount < minSuffixCount2 || frontier[0].numArcs == 0) {
+      if (fst.getEmptyOutput() == null) {
+        return null;
+      } else if (minSuffixCount1 > 0 || minSuffixCount2 > 0) {
+        // empty string got pruned
+        return null;
+      } else {
+        fst.finish(compileNode(frontier[0]).address);
+        //System.out.println("compile addr = " + fst.getStartNode());
+        return fst;
+      }
+    } else {
+      if (minSuffixCount2 != 0) {
+        compileAllTargets(frontier[0]);
+      }
+      //System.out.println("NOW: " + frontier[0].numArcs);
+      fst.finish(compileNode(frontier[0]).address);
+    }
+    
+    return fst;
+  }
+
+  private void compileAllTargets(UnCompiledNode<T> node) throws IOException {
+    for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {
+      final Arc<T> arc = node.arcs[arcIdx];
+      if (!arc.target.isCompiled()) {
+        // not yet compiled
+        @SuppressWarnings("unchecked") final UnCompiledNode<T> n = (UnCompiledNode<T>) arc.target;
+        arc.target = compileNode(n);
+      }
+    }
+  }
+
+  static class Arc<T> {
+    public int label;                             // really an "unsigned" byte
+    public Node target;
+    public boolean isFinal;
+    public T output;
+    public T nextFinalOutput;
+  }
+
+  // NOTE: not many instances of Node or CompiledNode are in
+  // memory while the FST is being built; it's only the
+  // current "frontier":
+
+  static interface Node {
+    boolean isCompiled();
+  }
+
+  static final class CompiledNode implements Node {
+    int address;
+    public boolean isCompiled() {
+      return true;
+    }
+  }
+
+  static final class UnCompiledNode<T> implements Node {
+    final Builder<T> owner;
+    int numArcs;
+    Arc<T>[] arcs;
+    T output;
+    boolean isFinal;
+    int inputCount;
+
+    @SuppressWarnings("unchecked")
+    public UnCompiledNode(Builder<T> owner) {
+      this.owner = owner;
+      arcs = (Arc<T>[]) new Arc[1];
+      arcs[0] = new Arc<T>();
+      output = owner.NO_OUTPUT;
+    }
+
+    public boolean isCompiled() {
+      return false;
+    }
+
+    public void clear() {
+      numArcs = 0;
+      isFinal = false;
+      output = owner.NO_OUTPUT;
+      inputCount = 0;
+    }
+
+    public T getLastOutput(int labelToMatch) {
+      assert numArcs > 0;
+      assert arcs[numArcs-1].label == labelToMatch;
+      return arcs[numArcs-1].output;
+    }
+
+    public void addArc(int label, Node target) {
+      assert label >= 0;
+      assert numArcs == 0 || label > arcs[numArcs-1].label: "arc[-1].label=" + arcs[numArcs-1].label + " new label=" + label + " numArcs=" + numArcs;
+      if (numArcs == arcs.length) {
+        final Arc<T>[] newArcs = ArrayUtil.grow(arcs);
+        for(int arcIdx=numArcs;arcIdx<newArcs.length;arcIdx++) {
+          newArcs[arcIdx] = new Arc<T>();
+        }
+        arcs = newArcs;
+      }
+      final Arc<T> arc = arcs[numArcs++];
+      arc.label = label;
+      arc.target = target;
+      arc.output = arc.nextFinalOutput = owner.NO_OUTPUT;
+      arc.isFinal = false;
+    }
+
+    public void replaceLast(int labelToMatch, Node target, T nextFinalOutput, boolean isFinal) {
+      assert numArcs > 0;
+      final Arc<T> arc = arcs[numArcs-1];
+      assert arc.label == labelToMatch: "arc.label=" + arc.label + " vs " + labelToMatch;
+      arc.target = target;
+      //assert target.address != -2;
+      arc.nextFinalOutput = nextFinalOutput;
+      arc.isFinal = isFinal;
+    }
+
+    public void deleteLast(int label, Node target) {
+      assert numArcs > 0;
+      assert label == arcs[numArcs-1].label;
+      assert target == arcs[numArcs-1].target;
+      numArcs--;
+    }
+
+    public void setLastOutput(int labelToMatch, T newOutput) {
+      assert owner.validOutput(newOutput);
+      assert numArcs > 0;
+      final Arc<T> arc = arcs[numArcs-1];
+      assert arc.label == labelToMatch;
+      arc.output = newOutput;
+    }
+
+    // pushes an output prefix forward onto all arcs
+    public void prependOutput(T outputPrefix) {
+      assert owner.validOutput(outputPrefix);
+
+      for(int arcIdx=0;arcIdx<numArcs;arcIdx++) {
+        arcs[arcIdx].output = owner.fst.outputs.add(outputPrefix, arcs[arcIdx].output);
+        assert owner.validOutput(arcs[arcIdx].output);
+      }
+
+      if (isFinal) {
+        output = owner.fst.outputs.add(outputPrefix, output);
+        assert owner.validOutput(output);
+      }
+    }
+  }
+}
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/ByteSequenceOutputs.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/ByteSequenceOutputs.java
new file mode 100644
index 00000000000..f99c80f93c1
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/ByteSequenceOutputs.java
@@ -0,0 +1,137 @@
+package org.apache.lucene.util.automaton.fst;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * Output is a sequence of bytes, for each input term.
+ * @lucene.experimental
+ */
+
+public final class ByteSequenceOutputs extends Outputs<BytesRef> {
+
+  private final static BytesRef NO_OUTPUT = new BytesRef();
+
+  private ByteSequenceOutputs() {
+  }
+
+  public static ByteSequenceOutputs getSingleton() {
+    return new ByteSequenceOutputs();
+  }
+
+  @Override
+  public BytesRef common(BytesRef output1, BytesRef output2) {
+    assert output1 != null;
+    assert output2 != null;
+
+    int pos1 = output1.offset;
+    int pos2 = output2.offset;
+    int stopAt1 = pos1 + Math.min(output1.length, output2.length);
+    while(pos1 < stopAt1) {
+      if (output1.bytes[pos1] != output2.bytes[pos2]) {
+        break;
+      }
+      pos1++;
+      pos2++;
+    }
+
+    if (pos1 == output1.offset) {
+      // no common prefix
+      return NO_OUTPUT;
+    } else if (pos1 == output1.offset + output1.length) {
+      // output1 is a prefix of output2
+      return output1;
+    } else if (pos2 == output2.offset + output2.length) {
+      // output2 is a prefix of output1
+      return output2;
+    } else {
+      return new BytesRef(output1.bytes, output1.offset, pos1-output1.offset);
+    }
+  }
+
+  @Override
+  public BytesRef subtract(BytesRef output, BytesRef inc) {
+    assert output != null;
+    assert inc != null;
+    if (inc == NO_OUTPUT) {
+      // no prefix removed
+      return output;
+    } else if (inc.length == output.length) {
+      // entire output removed
+      return NO_OUTPUT;
+    } else {
+      assert inc.length < output.length: "inc.length=" + inc.length + " vs output.length=" + output.length;
+      assert inc.length > 0;
+      return new BytesRef(output.bytes, output.offset + inc.length, output.length-inc.length);
+    }
+  }
+
+  @Override
+  public BytesRef add(BytesRef prefix, BytesRef output) {
+    assert prefix != null;
+    assert output != null;
+    if (prefix == NO_OUTPUT) {
+      return output;
+    } else if (output == NO_OUTPUT) {
+      return prefix;
+    } else {
+      assert prefix.length > 0;
+      assert output.length > 0;
+      BytesRef result = new BytesRef(prefix.length + output.length);
+      System.arraycopy(prefix.bytes, prefix.offset, result.bytes, 0, prefix.length);
+      System.arraycopy(output.bytes, output.offset, result.bytes, prefix.length, output.length);
+      result.length = prefix.length + output.length;
+      return result;
+    }
+  }
+
+  @Override
+  public void write(BytesRef prefix, DataOutput out) throws IOException {
+    assert prefix != null;
+    out.writeVInt(prefix.length);
+    out.writeBytes(prefix.bytes, prefix.offset, prefix.length);
+  }
+
+  @Override
+  public BytesRef read(DataInput in) throws IOException {
+    final int len = in.readVInt();
+    if (len == 0) {
+      return NO_OUTPUT;
+    } else {
+      final BytesRef output = new BytesRef(len);
+      in.readBytes(output.bytes, 0, len);
+      output.length = len;
+      return output;
+    }
+  }
+
+  @Override
+  public BytesRef getNoOutput() {
+    return NO_OUTPUT;
+  }
+
+  @Override
+  public String outputToString(BytesRef output) {
+    return output.utf8ToString();
+  }
+}
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/BytesRefFSTEnum.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/BytesRefFSTEnum.java
new file mode 100644
index 00000000000..150a0e7dcf6
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/BytesRefFSTEnum.java
@@ -0,0 +1,304 @@
+package org.apache.lucene.util.automaton.fst;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+
+/** Can next() and advance() through the terms in an FST
+  * @lucene.experimental
+*/
+
+public class BytesRefFSTEnum<T> {
+  private final FST<T> fst;
+
+  private BytesRef current = new BytesRef(10);
+  @SuppressWarnings("unchecked") private FST.Arc<T>[] arcs = (FST.Arc<T>[]) new FST.Arc[10];
+  // outputs are cumulative
+  @SuppressWarnings("unchecked") private T[] output = (T[]) new Object[10];
+
+  private boolean lastFinal;
+  private boolean didEmpty;
+  private final T NO_OUTPUT;
+  private final InputOutput<T> result = new InputOutput<T>();
+
+  public static class InputOutput<T> {
+    public BytesRef input;
+    public T output;
+  }
+  
+  public BytesRefFSTEnum(FST<T> fst) {
+    this.fst = fst;
+    result.input = current;
+    NO_OUTPUT = fst.outputs.getNoOutput();
+  }
+
+  public void reset() {
+    lastFinal = false;
+    didEmpty = false;
+    current.length = 0;
+    result.output = NO_OUTPUT;
+  }
+
+  /** NOTE: target must be >= where we are already
+   *  positioned */
+  public InputOutput<T> advance(BytesRef target) throws IOException {
+
+    assert target.compareTo(current) >= 0;
+
+    //System.out.println("    advance len=" + target.length + " curlen=" + current.length);
+
+    // special case empty string
+    if (current.length == 0) {
+      if (target.length == 0) {
+        final T output = fst.getEmptyOutput();      
+        if (output != null) {
+          if (!didEmpty) {
+            current.length = 0;
+            lastFinal = true;
+            result.output = output;
+            didEmpty = true;
+          }
+          return result;
+        } else {
+          return next();
+        }
+      }
+      
+      if (fst.noNodes()) {
+        return null;
+      }
+    }
+
+    // TODO: possibly caller could/should provide common
+    // prefix length?  ie this work may be redundant if
+    // caller is in fact intersecting against its own
+    // automaton
+
+    // what prefix does target share w/ current
+    int idx = 0;
+    while (idx < current.length && idx < target.length) {
+      if (current.bytes[idx] != target.bytes[target.offset + idx]) {
+        break;
+      }
+      idx++;
+    }
+
+    //System.out.println("  shared " + idx);
+
+    FST.Arc<T> arc;
+    if (current.length == 0) {
+      // new enum (no seek/next yet)
+      arc = fst.readFirstArc(fst.getStartNode(), getArc(0));
+      //System.out.println("  new enum");
+    } else if (idx < current.length) {
+      // roll back to shared point
+      lastFinal = false;
+      current.length = idx;
+      arc = arcs[idx];
+      if (arc.isLast()) {
+        if (idx == 0) {
+          return null;
+        } else {
+          return next();
+        }
+      }
+      arc = fst.readNextArc(arc);
+    } else if (idx == target.length) {
+      // degenerate case -- seek to term we are already on
+      assert target.equals(current);
+      return result;
+    } else {
+      // current is a full prefix of target
+      if (lastFinal) {
+        arc = fst.readFirstArc(arcs[current.length-1].target, getArc(current.length));
+      } else {
+        return next();
+      }
+    }
+
+    lastFinal = false;
+
+    assert arc == arcs[current.length];
+    int targetLabel = target.bytes[target.offset+current.length] & 0xFF;
+
+    while(true) {
+      //System.out.println("    cycle len=" + current.length + " target=" + ((char) targetLabel) + " vs " + ((char) arc.label));
+      if (arc.label == targetLabel) {
+        grow();
+        current.bytes[current.length] = (byte) arc.label;
+        appendOutput(arc.output);
+        current.length++;
+        grow();
+        if (current.length == target.length) {
+          result.output = output[current.length-1];
+          if (arc.isFinal()) {
+            // target is exact match
+            if (fst.hasArcs(arc.target)) {
+              // target is also a proper prefix of other terms
+              lastFinal = true;
+              appendFinalOutput(arc.nextFinalOutput);
+            }
+          } else {
+            // target is not a match but is a prefix of
+            // other terms
+            current.length--;
+            push();
+          }
+          return result;
+        } else if (!fst.hasArcs(arc.target)) {
+          // we only match a prefix of the target
+          return next();
+        } else {
+          targetLabel = target.bytes[target.offset+current.length] & 0xFF;
+          arc = fst.readFirstArc(arc.target, getArc(current.length));
+        }
+      } else if (arc.label > targetLabel) {
+        // we are now past the target
+        push();
+        return result;
+      } else if (arc.isLast()) {
+        if (current.length == 0) {
+          return null;
+        }
+        return next();
+      } else {
+        arc = fst.readNextArc(getArc(current.length));
+      }
+    }
+  }
+
+  public InputOutput<T> current() {
+    return result;
+  }
+
+  public InputOutput<T> next() throws IOException {
+    //System.out.println("  enum.next");
+
+    if (current.length == 0) {
+      final T output = fst.getEmptyOutput();
+      if (output != null) {
+        if (!didEmpty) {
+          current.length = 0;
+          lastFinal = true;
+          result.output = output;
+          didEmpty = true;
+          return result;
+        } else {
+          lastFinal = false;
+        }
+      }
+      if (fst.noNodes()) {
+        return null;
+      }
+      fst.readFirstArc(fst.getStartNode(), getArc(0));
+      push();
+    } else if (lastFinal) {
+      lastFinal = false;
+      assert current.length > 0;
+      // resume pushing
+      fst.readFirstArc(arcs[current.length-1].target, getArc(current.length));
+      push();
+    } else {
+      //System.out.println("    pop/push");
+      pop();
+      if (current.length == 0) {
+        // enum done
+        return null;
+      } else {
+        current.length--;
+        fst.readNextArc(arcs[current.length]);
+        push();
+      }
+    }
+
+    return result;
+  }
+
+  private void grow() {
+    final int l = current.length + 1;
+    current.grow(l);
+    arcs = ArrayUtil.grow(arcs, l);
+    output = ArrayUtil.grow(output, l);
+  }
+
+  private void appendOutput(T addedOutput) {
+    T newOutput;
+    if (current.length == 0) {
+      newOutput = addedOutput;
+    } else if (addedOutput == NO_OUTPUT) {
+      output[current.length] = output[current.length-1];
+      return;
+    } else {
+      newOutput = fst.outputs.add(output[current.length-1], addedOutput);
+    }
+    output[current.length] = newOutput;
+  }
+
+  private void appendFinalOutput(T addedOutput) {
+    if (current.length == 0) {
+      result.output = addedOutput;
+    } else {
+      result.output = fst.outputs.add(output[current.length-1], addedOutput);
+    }
+  }
+
+  private void push() throws IOException {
+
+    FST.Arc<T> arc = arcs[current.length];
+    assert arc != null;
+
+    while(true) {
+      grow();
+      
+      current.bytes[current.length] = (byte) arc.label;
+      appendOutput(arc.output);
+      //System.out.println("    push: append label=" + ((char) arc.label) + " output=" + fst.outputs.outputToString(arc.output));
+      current.length++;
+      grow();
+
+      if (!fst.hasArcs(arc.target)) {
+        break;
+      }
+
+      if (arc.isFinal()) {
+        appendFinalOutput(arc.nextFinalOutput);
+        lastFinal = true;
+        return;
+      }
+
+      arc = fst.readFirstArc(arc.target, getArc(current.length));
+    }
+    result.output = output[current.length-1];
+  }
+
+  private void pop() {
+    while (current.length > 0 && arcs[current.length-1].isLast()) {
+      current.length--;
+    }
+  }
+
+  private FST.Arc<T> getArc(int idx) {
+    if (arcs[idx] == null) {
+      arcs[idx] = new FST.Arc<T>();
+    }
+    return arcs[idx];
+  }
+}
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java
new file mode 100644
index 00000000000..8de2e33e747
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java
@@ -0,0 +1,922 @@
+package org.apache.lucene.util.automaton.fst;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.IntsRef;
+
+/** Represents an FST using a compact byte[] format.
+ *  <p> The format is similar to what's used by Morfologik
+ *  (http://sourceforge.net/projects/morfologik).
+ * @lucene.experimental
+ */
+public class FST<T> {
+  public static enum INPUT_TYPE {BYTE1, BYTE2, BYTE4};
+  private final INPUT_TYPE inputType;
+
+  private final static int BIT_FINAL_ARC = 1 << 0;
+  private final static int BIT_LAST_ARC = 1 << 1;
+  private final static int BIT_TARGET_NEXT = 1 << 2;
+  private final static int BIT_STOP_NODE = 1 << 3;
+  private final static int BIT_ARC_HAS_OUTPUT = 1 << 4;
+  private final static int BIT_ARC_HAS_FINAL_OUTPUT = 1 << 5;
+
+  // Arcs are stored as fixed-size (per entry) array, so
+  // that we can find an arc using binary search.  We do
+  // this when number of arcs is > NUM_ARCS_ARRAY:
+  private final static int BIT_ARCS_AS_FIXED_ARRAY = 1 << 6;
+
+  // If the node has >= this number of arcs, the arcs are
+  // stored as a fixed array.  Fixed array consumes more RAM
+  // but enables binary search on the arcs (instead of
+  // linear scan) on lookup by arc label:
+  private final static int NUM_ARCS_FIXED_ARRAY = 10;
+  private int[] bytesPerArc = new int[0];
+
+  // Increment version to change it
+  private final static String FILE_FORMAT_NAME = "FST";
+  private final static int VERSION_START = 0;
+  private final static int VERSION_CURRENT = VERSION_START;
+
+  // Never serialized; just used to represent the virtual
+  // final node w/ no arcs:
+  private final static int FINAL_END_NODE = -1;
+
+  // Never serialized; just used to represent the virtual
+  // non-final node w/ no arcs:
+  private final static int NON_FINAL_END_NODE = 0;
+
+  // if non-null, this FST accepts the empty string and
+  // produces this output
+  private T emptyOutput;
+  private byte[] emptyOutputBytes;
+
+  private byte[] bytes;
+  int byteUpto = 0;
+
+  private int startNode = -1;
+
+  public final Outputs<T> outputs;
+
+  private int lastFrozenNode;
+
+  private final T NO_OUTPUT;
+
+  public int nodeCount;
+  public int arcCount;
+  public int arcWithOutputCount;
+
+  public final static class Arc<T> {
+    int label;  // really a "unsigned" byte
+    int target;
+    byte flags;
+    T output;
+    T nextFinalOutput;
+    int nextArc;
+
+    // This is non-zero if current arcs are fixed array:
+    int posArcsStart;
+    int bytesPerArc;
+    int arcIdx;
+    int numArcs;
+
+    // Must call this before re-using an Arc instance on a
+    // new node
+    public void reset() {
+      bytesPerArc = 0;
+    }
+
+    public boolean flag(int flag) {
+      return FST.flag(flags, flag);
+    }
+
+    public boolean isLast() {
+      return flag(BIT_LAST_ARC);
+    }
+
+    public boolean isFinal() {
+      return flag(BIT_FINAL_ARC);
+    }
+  };
+
+  static boolean flag(int flags, int bit) {
+    return (flags & bit) != 0;
+  }
+
+  private final BytesWriter writer;
+
+  // make a new empty FST, for building
+  public FST(INPUT_TYPE inputType, Outputs<T> outputs) {
+    this.inputType = inputType;
+    this.outputs = outputs;
+    bytes = new byte[128];
+    NO_OUTPUT = outputs.getNoOutput();
+    
+    writer = new BytesWriter();
+
+    emptyOutput = null;
+  }
+
+  // create an existing FST
+  public FST(IndexInput in, Outputs<T> outputs) throws IOException {
+    this.outputs = outputs;
+    writer = null;
+    CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_START, VERSION_START);
+    if (in.readByte() == 1) {
+      // accepts empty string
+      int numBytes = in.readVInt();
+      // messy
+      bytes = new byte[numBytes];
+      in.readBytes(bytes, 0, numBytes);
+      emptyOutput = outputs.read(new BytesReader(numBytes-1));
+    } else {
+      emptyOutput = null;
+    }
+    final byte t = in.readByte();
+    switch(t) {
+      case 0:
+        inputType = INPUT_TYPE.BYTE1;
+        break;
+      case 1:
+        inputType = INPUT_TYPE.BYTE2;
+        break;
+      case 2:
+        inputType = INPUT_TYPE.BYTE4;
+        break;
+    default:
+      throw new IllegalStateException("invalid input type " + t);
+    }
+    startNode = in.readVInt();
+    nodeCount = in.readVInt();
+    arcCount = in.readVInt();
+    arcWithOutputCount = in.readVInt();
+
+    bytes = new byte[in.readVInt()];
+    in.readBytes(bytes, 0, bytes.length);
+    NO_OUTPUT = outputs.getNoOutput();
+  }
+
+  public INPUT_TYPE getInputType() {
+    return inputType;
+  }
+
+  /** Returns bytes used to represent the FST */
+  public int sizeInBytes() {
+    return bytes.length;
+  }
+
+  void finish(int startNode) {
+    if (this.startNode != -1) {
+      throw new IllegalStateException("already finished");
+    }
+    byte[] finalBytes = new byte[writer.posWrite];
+    System.arraycopy(bytes, 0, finalBytes, 0, writer.posWrite);
+    bytes = finalBytes;
+    this.startNode = startNode;
+  }
+
+  public void setEmptyOutput(T v) throws IOException {
+    if (emptyOutput != null) {
+      throw new IllegalStateException("empty output is already set");
+    }
+    emptyOutput = v;
+
+    // TODO: this is messy -- replace with sillyBytesWriter; maybe make
+    // bytes private
+    final int posSave = writer.posWrite;
+    outputs.write(emptyOutput, writer);
+    emptyOutputBytes = new byte[writer.posWrite-posSave];
+
+    // reverse
+    final int stopAt = (writer.posWrite - posSave)/2;
+    int upto = 0;
+    while(upto < stopAt) {
+      final byte b = bytes[posSave + upto];
+      bytes[posSave+upto] = bytes[writer.posWrite-upto-1];
+      bytes[writer.posWrite-upto-1] = b;
+      upto++;
+    }
+    System.arraycopy(bytes, posSave, emptyOutputBytes, 0, writer.posWrite-posSave);
+    writer.posWrite = posSave;
+  }
+
+  public void save(IndexOutput out) throws IOException {
+    if (startNode == -1) {
+      throw new IllegalStateException("call finish first");
+    }
+    CodecUtil.writeHeader(out, FILE_FORMAT_NAME, VERSION_CURRENT);
+    if (emptyOutput != null) {
+      out.writeByte((byte) 1);
+      out.writeVInt(emptyOutputBytes.length);
+      out.writeBytes(emptyOutputBytes, 0, emptyOutputBytes.length);
+    } else {
+      out.writeByte((byte) 0);
+    }
+    final byte t;
+    if (inputType == INPUT_TYPE.BYTE1) {
+      t = 0;
+    } else if (inputType == INPUT_TYPE.BYTE2) {
+      t = 1;
+    } else {
+      t = 2;
+    }
+    out.writeByte(t);
+    out.writeVInt(startNode);
+    out.writeVInt(nodeCount);
+    out.writeVInt(arcCount);
+    out.writeVInt(arcWithOutputCount);
+    out.writeVInt(bytes.length);
+    out.writeBytes(bytes, 0, bytes.length);
+  }
+
+  private void writeLabel(int v) throws IOException {
+    assert v >= 0: "v=" + v;
+    if (inputType == INPUT_TYPE.BYTE1) {
+      assert v <= 255: "v=" + v;
+      writer.writeByte((byte) v);
+    } else if (inputType == INPUT_TYPE.BYTE2) {
+      assert v <= 65535: "v=" + v;
+      writer.writeVInt(v);
+    } else {
+      //writeInt(v);
+      writer.writeVInt(v);
+    }
+  }
+
+  private int readLabel(DataInput in) throws IOException {
+    final int v;
+    if (inputType == INPUT_TYPE.BYTE1) {
+      v = in.readByte()&0xFF;
+    } else if (inputType == INPUT_TYPE.BYTE2) {
+      v = in.readVInt();
+    } else {
+      v = in.readVInt();
+    }
+    return v;
+  }
+
+  // returns true if the node at this address has any
+  // outgoing arcs
+  public boolean hasArcs(int address) {
+    return address != FINAL_END_NODE && address != NON_FINAL_END_NODE;
+  }
+
+  public int getStartNode() {
+    if (startNode == -1) {
+      throw new IllegalStateException("call finish first");
+    }
+    return startNode;
+  }
+
+  // returns null if this FST does not accept the empty
+  // string, else, the output for the empty string
+  public T getEmptyOutput() {
+    return emptyOutput;
+  }
+
+  // serializes new node by appending its bytes to the end
+  // of the current byte[]
+  int addNode(Builder.UnCompiledNode<T> node) throws IOException {
+    //System.out.println("FST.addNode pos=" + posWrite + " numArcs=" + node.numArcs);
+    if (node.numArcs == 0) {
+      if (node.isFinal) {
+        return FINAL_END_NODE;
+      } else {
+        return NON_FINAL_END_NODE;
+      }
+    }
+
+    int startAddress = writer.posWrite;
+    //System.out.println("  startAddr=" + startAddress);
+
+    final boolean doFixedArray = node.numArcs >= NUM_ARCS_FIXED_ARRAY;
+    final int fixedArrayStart;
+    if (doFixedArray) {
+      if (bytesPerArc.length < node.numArcs) {
+        bytesPerArc = new int[ArrayUtil.oversize(node.numArcs, 1)];
+      }
+      // write a "false" first arc:
+      writer.writeByte((byte) BIT_ARCS_AS_FIXED_ARRAY);
+      writer.writeVInt(node.numArcs);
+      // placeholder -- we'll come back and write the number
+      // of bytes per arc here:
+      writer.writeByte((byte) 0);
+      fixedArrayStart = writer.posWrite;
+      //System.out.println("  do fixed arcs array arcsStart=" + fixedArrayStart);
+    } else {
+      fixedArrayStart = 0;
+    }
+
+    nodeCount++;
+    arcCount += node.numArcs;
+    
+    final int lastArc = node.numArcs-1;
+
+    int lastArcStart = writer.posWrite;
+    int maxBytesPerArc = 0;
+    for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {
+      final Builder.Arc<T> arc = node.arcs[arcIdx];
+      final Builder.CompiledNode target = (Builder.CompiledNode) arc.target;
+      int flags = 0;
+
+      if (arcIdx == lastArc) {
+        flags += BIT_LAST_ARC;
+      }
+
+      if (lastFrozenNode == target.address && !doFixedArray) {
+        flags += BIT_TARGET_NEXT;
+      }
+
+      if (arc.isFinal) {
+        flags += BIT_FINAL_ARC;
+        if (arc.nextFinalOutput != NO_OUTPUT) {
+          flags += BIT_ARC_HAS_FINAL_OUTPUT;
+        }
+      } else {
+        assert arc.nextFinalOutput == NO_OUTPUT;
+      }
+
+      boolean targetHasArcs = hasArcs(target.address);
+
+      if (!targetHasArcs) {
+        flags += BIT_STOP_NODE;
+      }
+
+      if (arc.output != NO_OUTPUT) {
+        flags += BIT_ARC_HAS_OUTPUT;
+      }
+
+      writer.writeByte((byte) flags);
+      writeLabel(arc.label);
+
+      //System.out.println("  write arc: label=" + arc.label + " flags=" + flags);
+
+      if (arc.output != NO_OUTPUT) {
+        outputs.write(arc.output, writer);
+        arcWithOutputCount++;
+      }
+      if (arc.nextFinalOutput != NO_OUTPUT) {
+        outputs.write(arc.nextFinalOutput, writer);
+      }
+
+      if (targetHasArcs && (doFixedArray || lastFrozenNode != target.address)) {
+        assert target.address > 0;
+        writer.writeInt(target.address);
+      }
+
+      // just write the arcs "like normal" on first pass,
+      // but record how many bytes each one took, and max
+      // byte size:
+      if (doFixedArray) {
+        bytesPerArc[arcIdx] = writer.posWrite - lastArcStart;
+        lastArcStart = writer.posWrite;
+        maxBytesPerArc = Math.max(maxBytesPerArc, bytesPerArc[arcIdx]);
+        //System.out.println("    bytes=" + bytesPerArc[arcIdx]);
+      }
+    }
+
+    if (doFixedArray) {
+      assert maxBytesPerArc > 0;
+      // 2nd pass just "expands" all arcs to take up a fixed
+      // byte size
+      final int sizeNeeded = fixedArrayStart + node.numArcs * maxBytesPerArc;
+      bytes = ArrayUtil.grow(bytes, sizeNeeded);
+      if (maxBytesPerArc > 255) {
+        throw new IllegalStateException("max arc size is too large (" + maxBytesPerArc + ")");
+      }
+      bytes[fixedArrayStart-1] = (byte) maxBytesPerArc;
+
+      // expand the arcs in place, backwards
+      int srcPos = writer.posWrite;
+      int destPos = fixedArrayStart + node.numArcs*maxBytesPerArc;
+      writer.posWrite = destPos;
+      for(int arcIdx=node.numArcs-1;arcIdx>=0;arcIdx--) {
+        //System.out.println("  repack arcIdx=" + arcIdx + " srcPos=" + srcPos + " destPos=" + destPos);
+        destPos -= maxBytesPerArc;
+        srcPos -= bytesPerArc[arcIdx];
+        if (srcPos != destPos) {
+          assert destPos > srcPos;
+          System.arraycopy(bytes, srcPos, bytes, destPos, bytesPerArc[arcIdx]);
+        }
+      }
+    }
+
+    // reverse bytes in-place; we do this so that the
+    // "BIT_TARGET_NEXT" opto can work, ie, it reads the
+    // node just before the current one
+    final int endAddress = writer.posWrite;
+    final int stopAt = (endAddress - startAddress)/2;
+    int upto = 0;
+    while (upto < stopAt) {
+      final byte b = bytes[startAddress+upto];
+      bytes[startAddress+upto] = bytes[endAddress-upto-1];
+      bytes[endAddress-upto-1] = b;
+      upto++;
+    }
+
+    lastFrozenNode = endAddress - 1;
+    /*
+    System.out.println("  return node addr=" + (endAddress-1));
+    for(int i=endAddress-1;i>=startAddress;i--) {
+      System.out.println("    bytes[" + i + "]=" + bytes[i]);
+    }
+    */
+
+    return endAddress-1;
+  }
+
+  public Arc<T> readFirstArc(int address, Arc<T> arc) throws IOException {
+    //System.out.println("readFirstArc addr=" + address);
+    //int pos = address;
+    final BytesReader in = new BytesReader(address);
+
+    arc.flags = in.readByte();
+
+    if (arc.flag(BIT_ARCS_AS_FIXED_ARRAY)) {
+      //System.out.println("  fixedArray");
+      // this is first arc in a fixed-array
+      arc.numArcs = in.readVInt();
+      arc.bytesPerArc = in.readByte() & 0xFF;
+      arc.arcIdx = -1;
+      arc.posArcsStart = in.pos;
+      //System.out.println("  bytesPer=" + arc.bytesPerArc + " numArcs=" + arc.numArcs + " arcsStart=" + pos);
+    } else {
+      in.pos++;
+      arc.bytesPerArc = 0;
+    }
+    arc.nextArc = in.pos;
+    return readNextArc(arc);
+  }
+
+  public Arc<T> readNextArc(Arc<T> arc) throws IOException {
+    // this is a continuing arc in a fixed array
+    final BytesReader in;
+    if (arc.bytesPerArc != 0) {
+      // arcs are at fixed entries
+      arc.arcIdx++;
+      in = new BytesReader(arc.posArcsStart - arc.arcIdx*arc.bytesPerArc);
+    } else {
+      // arcs are packed
+      in = new BytesReader(arc.nextArc);
+    }
+    arc.flags = in.readByte();
+    arc.label = readLabel(in);
+
+    if (arc.flag(BIT_ARC_HAS_OUTPUT)) {
+      arc.output = outputs.read(in);
+    } else {
+      arc.output = outputs.getNoOutput();
+    }
+
+    if (arc.flag(BIT_ARC_HAS_FINAL_OUTPUT)) {
+      arc.nextFinalOutput = outputs.read(in);
+    } else {
+      arc.nextFinalOutput = outputs.getNoOutput();
+    }
+
+    if (arc.flag(BIT_STOP_NODE)) {
+      arc.target = FINAL_END_NODE;
+      arc.nextArc = in.pos;
+    } else if (arc.flag(BIT_TARGET_NEXT)) {
+      arc.nextArc = in.pos;
+      if (!arc.flag(BIT_LAST_ARC)) {
+        if (arc.bytesPerArc == 0) {
+          // must scan
+          seekToNextNode(in);
+        } else {
+          in.pos = arc.posArcsStart - arc.bytesPerArc * arc.numArcs;
+        }
+      }
+      arc.target = in.pos;
+    } else {
+      arc.target = in.readInt();
+      arc.nextArc = in.pos;
+    }
+
+    return arc;
+  }
+
+  public Arc<T> findArc(int address, int labelToMatch, Arc<T> arc) throws IOException {
+    // TODO: maybe make an explicit thread state that holds
+    // reusable stuff eg BytesReader:
+    final BytesReader in = new BytesReader(address);
+
+    if ((in.readByte() & BIT_ARCS_AS_FIXED_ARRAY) != 0) {
+      // Arcs are full array; do binary search:
+      //System.out.println("findArc: array label=" + labelToMatch);
+      arc.numArcs = in.readVInt();
+      arc.bytesPerArc = in.readByte() & 0xFF;
+      arc.posArcsStart = in.pos;
+      int low = 0;
+      int high = arc.numArcs-1;
+      while (low <= high) {
+        int mid = (low + high) >>> 1;
+        in.pos = arc.posArcsStart - arc.bytesPerArc*mid - 1;
+        int midLabel = readLabel(in);
+        final int cmp = midLabel - labelToMatch;
+        if (cmp < 0)
+          low = mid + 1;
+        else if (cmp > 0)
+          high = mid - 1;
+        else {
+          arc.arcIdx = mid-1;
+          return readNextArc(arc);
+        }
+      }
+
+      return null;
+    }
+    //System.out.println("findArc: scan");
+
+    readFirstArc(address, arc);
+
+    while(true) {
+      if (arc.label == labelToMatch) {
+        return arc;
+      } else if (arc.isLast()) {
+        return null;
+      } else {
+        readNextArc(arc);
+      }
+    }
+  }
+
+  /** Looks up the output for this input, or null if the
+   *  input is not accepted. FST must be
+   *  INPUT_TYPE.BYTE4. */
+  public T get(IntsRef input) throws IOException {
+    assert inputType == INPUT_TYPE.BYTE4;
+
+    if (input.length == 0) {
+      return getEmptyOutput();
+    }
+
+    // TODO: would be nice not to alloc this on every lookup
+    final FST.Arc<T> arc = new FST.Arc<T>();
+    int node = getStartNode();
+    T output = NO_OUTPUT;
+    for(int i=0;i<input.length;i++) {
+      if (!hasArcs(node)) {
+        // hit end of FST before input end
+        return null;
+      }
+
+      if (findArc(node, input.ints[input.offset + i], arc) != null) {
+        node = arc.target;
+        if (arc.output != NO_OUTPUT) {
+          output = outputs.add(output, arc.output);
+        }
+      } else {
+        return null;
+      }
+    }
+
+    if (!arc.isFinal()) {
+      // hit input's end before end node
+      return null;
+    }
+
+    if (arc.nextFinalOutput != NO_OUTPUT) {
+      output = outputs.add(output, arc.nextFinalOutput);
+    }
+
+    return output;
+  }
+
+  /** Logically casts input to UTF32 ints then looks up the output
+   *  or null if the input is not accepted.  FST must be
+   *  INPUT_TYPE.BYTE4.  */
+  public T get(char[] input, int offset, int length) throws IOException {
+    assert inputType == INPUT_TYPE.BYTE4;
+
+    if (length == 0) {
+      return getEmptyOutput();
+    }
+
+    // TODO: would be nice not to alloc this on every lookup
+    final FST.Arc<T> arc = new FST.Arc<T>();
+    int node = getStartNode();
+    int charIdx = offset;
+    final int charLimit = offset + length;
+    T output = NO_OUTPUT;
+    while(charIdx < charLimit) {
+      if (!hasArcs(node)) {
+        // hit end of FST before input end
+        return null;
+      }
+
+      final int utf32 = Character.codePointAt(input, charIdx);
+      charIdx += Character.charCount(utf32);
+
+      if (findArc(node, utf32, arc) != null) {
+        node = arc.target;
+        if (arc.output != NO_OUTPUT) {
+          output = outputs.add(output, arc.output);
+        }
+      } else {
+        return null;
+      }
+    }
+
+    if (!arc.isFinal()) {
+      // hit input's end before end node
+      return null;
+    }
+
+    if (arc.nextFinalOutput != NO_OUTPUT) {
+      output = outputs.add(output, arc.nextFinalOutput);
+    }
+
+    return output;
+  }
+
+
+  /** Logically casts input to UTF32 ints then looks up the output
+   *  or null if the input is not accepted.  FST must be
+   *  INPUT_TYPE.BYTE4.  */
+  public T get(CharSequence input) throws IOException {
+    assert inputType == INPUT_TYPE.BYTE4;
+
+    final int len = input.length();
+    if (len == 0) {
+      return getEmptyOutput();
+    }
+
+    // TODO: would be nice not to alloc this on every lookup
+    final FST.Arc<T> arc = new FST.Arc<T>();
+    int node = getStartNode();
+    int charIdx = 0;
+    final int charLimit = input.length();
+    T output = NO_OUTPUT;
+    while(charIdx < charLimit) {
+      if (!hasArcs(node)) {
+        // hit end of FST before input end
+        return null;
+      }
+
+      final int utf32 = Character.codePointAt(input, charIdx);
+      charIdx += Character.charCount(utf32);
+
+      if (findArc(node, utf32, arc) != null) {
+        node = arc.target;
+        if (arc.output != NO_OUTPUT) {
+          output = outputs.add(output, arc.output);
+        }
+      } else {
+        return null;
+      }
+    }
+
+    if (!arc.isFinal()) {
+      // hit input's end before end node
+      return null;
+    }
+
+    if (arc.nextFinalOutput != NO_OUTPUT) {
+      output = outputs.add(output, arc.nextFinalOutput);
+    }
+
+    return output;
+  }
+
+  /** Looks up the output for this input, or null if the
+   *  input is not accepted */
+  public T get(BytesRef input) throws IOException {
+    assert inputType == INPUT_TYPE.BYTE1;
+
+    if (input.length == 0) {
+      return getEmptyOutput();
+    }
+
+    // TODO: would be nice not to alloc this on every lookup
+    final FST.Arc<T> arc = new FST.Arc<T>();
+    int node = getStartNode();
+    T output = NO_OUTPUT;
+    for(int i=0;i<input.length;i++) {
+      if (!hasArcs(node)) {
+        // hit end of FST before input end
+        return null;
+      }
+
+      if (findArc(node, input.bytes[i+input.offset], arc) != null) {
+        node = arc.target;
+        if (arc.output != NO_OUTPUT) {
+          output = outputs.add(output, arc.output);
+        }
+      } else {
+        return null;
+      }
+    }
+
+    if (!arc.isFinal()) {
+      // hit input's end before end node
+      return null;
+    }
+
+    if (arc.nextFinalOutput != NO_OUTPUT) {
+      output = outputs.add(output, arc.nextFinalOutput);
+    }
+
+    return output;
+  }
+
+  /** Returns true if this FST has no nodes */
+  public boolean noNodes() {
+    //System.out.println("isempty startNode=" + startNode);
+    return startNode == 0;
+  }
+
+  private void seekToNextNode(BytesReader in) throws IOException {
+
+    while(true) {
+
+      final int flags = in.readByte();
+      readLabel(in);
+
+      if (flag(flags, BIT_ARC_HAS_OUTPUT)) {
+        outputs.read(in);
+      }
+
+      if (flag(flags, BIT_ARC_HAS_FINAL_OUTPUT)) {
+        outputs.read(in);
+      }
+
+      if (!flag(flags, BIT_STOP_NODE) && !flag(flags, BIT_TARGET_NEXT)) {
+        in.readInt();
+      }
+
+      if (flag(flags, BIT_LAST_ARC)) {
+        return;
+      }
+    }
+  }
+
+  // NOTE: this consumes alot of RAM!
+  // final arcs have a flat end (not arrow)
+  // arcs w/ NEXT opto are in blue
+  /*
+    eg:
+      PrintStream ps = new PrintStream("out.dot");
+      fst.toDot(ps);
+      ps.close();
+      System.out.println("SAVED out.dot");
+      
+    then dot -Tpng out.dot > /x/tmp/out.png
+  */
+  public void toDot(PrintStream out) throws IOException {
+
+    final List<Integer> queue = new ArrayList<Integer>();
+    queue.add(startNode);
+
+    final Set<Integer> seen = new HashSet<Integer>();
+    seen.add(startNode);
+    
+    out.println("digraph FST {");
+    out.println("  rankdir = LR;");
+    //out.println("  " + startNode + " [shape=circle label=" + startNode + "];");
+    out.println("  " + startNode + " [label=\"\" shape=circle];");
+    out.println("  initial [shape=point color=white label=\"\"];");
+    if (emptyOutput != null) {
+      out.println("  initial -> " + startNode + " [arrowhead=tee label=\"(" + outputs.outputToString(emptyOutput) + ")\"];");
+    } else {
+      out.println("  initial -> " + startNode);
+    }
+
+    final Arc<T> arc = new Arc<T>();
+
+    while(queue.size() != 0) {
+      Integer node = queue.get(queue.size()-1);
+      queue.remove(queue.size()-1);
+
+      if (node == FINAL_END_NODE || node == NON_FINAL_END_NODE) {
+        continue;
+      }
+
+      // scan all arcs
+      readFirstArc(node, arc);
+      while(true) {
+
+        if (!seen.contains(arc.target)) {
+          //out.println("  " + arc.target + " [label=" + arc.target + "];");
+          out.println("  " + arc.target + " [label=\"\" shape=circle];");
+          seen.add(arc.target);
+          queue.add(arc.target);
+        }
+        String outs;
+        if (arc.output != NO_OUTPUT) {
+          outs = "/" + outputs.outputToString(arc.output);
+        } else {
+          outs = "";
+        }
+        if (arc.isFinal() && arc.nextFinalOutput != NO_OUTPUT) {
+          outs += " (" + outputs.outputToString(arc.nextFinalOutput) + ")";
+        }
+        out.print("  " + node + " -> " + arc.target + " [label=\"" + arc.label + outs + "\"");
+        if (arc.isFinal()) {
+          out.print(" arrowhead=tee");
+        }
+        if (arc.flag(BIT_TARGET_NEXT)) {
+          out.print(" color=blue");
+        }
+        out.println("];");
+        
+        if (arc.isLast()) {
+          break;
+        } else {
+          readNextArc(arc);
+        }
+      }
+    }
+    out.println("}");
+  }
+  
+  public int getNodeCount() {
+    // 1+ in order to count the -1 implicit final node
+    return 1+nodeCount;
+  }
+  
+  public int getArcCount() {
+    return arcCount;
+  }
+
+  public int getArcWithOutputCount() {
+    return arcWithOutputCount;
+  }
+
+  // Non-static: writes to FST's byte[]
+  private class BytesWriter extends DataOutput {
+    int posWrite;
+
+    public BytesWriter() {
+      // pad: ensure no node gets address 0 which is reserved to mean
+      // the stop state w/ no arcs
+      posWrite = 1;
+    }
+
+    @Override
+    public void writeByte(byte b) {
+      if (bytes.length == posWrite) {
+        bytes = ArrayUtil.grow(bytes);
+      }
+      assert posWrite < bytes.length: "posWrite=" + posWrite + " bytes.length=" + bytes.length;
+      bytes[posWrite++] = b;
+    }
+
+    @Override
+    public void writeBytes(byte[] b, int offset, int length) {
+      final int size = posWrite + length;
+      bytes = ArrayUtil.grow(bytes, size);
+      System.arraycopy(b, offset, bytes, posWrite, length);
+      posWrite += length;
+    }
+  }
+
+  // Non-static: reads byte[] from FST
+  private class BytesReader extends DataInput {
+    int pos;
+
+    public BytesReader(int pos) {
+      this.pos = pos;
+    }
+
+    @Override
+    public byte readByte() {
+      return bytes[pos--];
+    }
+
+    @Override
+    public void readBytes(byte[] b, int offset, int len) {
+      for(int i=0;i<len;i++) {
+        b[offset+i] = bytes[pos--];
+      }
+    }
+  }
+}
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/IntSequenceOutputs.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/IntSequenceOutputs.java
new file mode 100644
index 00000000000..807bd836c59
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/IntSequenceOutputs.java
@@ -0,0 +1,141 @@
+package org.apache.lucene.util.automaton.fst;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.IntsRef;
+
+/**
+ * Output is a sequence of ints, for each input term.
+ * @lucene.experimental
+ */
+
+public final class IntSequenceOutputs extends Outputs<IntsRef> {
+
+  private final static IntsRef NO_OUTPUT = new IntsRef();
+
+  private IntSequenceOutputs() {
+  }
+
+  public static IntSequenceOutputs getSingleton() {
+    return new IntSequenceOutputs();
+  }
+
+  @Override
+  public IntsRef common(IntsRef output1, IntsRef output2) {
+    assert output1 != null;
+    assert output2 != null;
+
+    int pos1 = output1.offset;
+    int pos2 = output2.offset;
+    int stopAt1 = pos1 + Math.min(output1.length, output2.length);
+    while(pos1 < stopAt1) {
+      if (output1.ints[pos1] != output2.ints[pos2]) {
+        break;
+      }
+      pos1++;
+      pos2++;
+    }
+
+    if (pos1 == output1.offset) {
+      // no common prefix
+      return NO_OUTPUT;
+    } else if (pos1 == output1.offset + output1.length) {
+      // output1 is a prefix of output2
+      return output1;
+    } else if (pos2 == output2.offset + output2.length) {
+      // output2 is a prefix of output1
+      return output2;
+    } else {
+      return new IntsRef(output1.ints, output1.offset, pos1-output1.offset);
+    }
+  }
+
+  @Override
+  public IntsRef subtract(IntsRef output, IntsRef inc) {
+    assert output != null;
+    assert inc != null;
+    if (inc == NO_OUTPUT) {
+      // no prefix removed
+      return output;
+    } else if (inc.length == output.length) {
+      // entire output removed
+      return NO_OUTPUT;
+    } else {
+      assert inc.length < output.length: "inc.length=" + inc.length + " vs output.length=" + output.length;
+      assert inc.length > 0;
+      return new IntsRef(output.ints, output.offset + inc.length, output.length-inc.length);
+    }
+  }
+
+  @Override
+  public IntsRef add(IntsRef prefix, IntsRef output) {
+    assert prefix != null;
+    assert output != null;
+    if (prefix == NO_OUTPUT) {
+      return output;
+    } else if (output == NO_OUTPUT) {
+      return prefix;
+    } else {
+      assert prefix.length > 0;
+      assert output.length > 0;
+      IntsRef result = new IntsRef(prefix.length + output.length);
+      System.arraycopy(prefix.ints, prefix.offset, result.ints, 0, prefix.length);
+      System.arraycopy(output.ints, output.offset, result.ints, prefix.length, output.length);
+      result.length = prefix.length + output.length;
+      return result;
+    }
+  }
+
+  @Override
+  public void write(IntsRef prefix, DataOutput out) throws IOException {
+    assert prefix != null;
+    out.writeVInt(prefix.length);
+    for(int idx=0;idx<prefix.length;idx++) {
+      out.writeVInt(prefix.ints[prefix.offset+idx]);
+    }
+  }
+
+  @Override
+  public IntsRef read(DataInput in) throws IOException {
+    final int len = in.readVInt();
+    if (len == 0) {
+      return NO_OUTPUT;
+    } else {
+      final IntsRef output = new IntsRef(len);
+      for(int idx=0;idx<len;idx++) {
+        output.ints[idx] = in.readVInt();
+      }
+      output.length = len;
+      return output;
+    }
+  }
+
+  @Override
+  public IntsRef getNoOutput() {
+    return NO_OUTPUT;
+  }
+
+  @Override
+  public String outputToString(IntsRef output) {
+    return output.toString();
+  }
+}
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/IntsRefFSTEnum.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/IntsRefFSTEnum.java
new file mode 100644
index 00000000000..743f17c7072
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/IntsRefFSTEnum.java
@@ -0,0 +1,304 @@
+package org.apache.lucene.util.automaton.fst;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.IntsRef;
+
+import java.io.IOException;
+
+/** Can next() and advance() through the terms in an FST
+  * @lucene.experimental
+*/
+
+public class IntsRefFSTEnum<T> {
+  private final FST<T> fst;
+
+  private IntsRef current = new IntsRef(10);
+  @SuppressWarnings("unchecked") private FST.Arc<T>[] arcs = (FST.Arc<T>[]) new FST.Arc[10];
+  // outputs are cumulative
+  @SuppressWarnings("unchecked") private T[] output = (T[]) new Object[10];
+
+  private boolean lastFinal;
+  private boolean didEmpty;
+  private final T NO_OUTPUT;
+  private final InputOutput<T> result = new InputOutput<T>();
+
+  public static class InputOutput<T> {
+    public IntsRef input;
+    public T output;
+  }
+  
+  public IntsRefFSTEnum(FST<T> fst) {
+    this.fst = fst;
+    result.input = current;
+    NO_OUTPUT = fst.outputs.getNoOutput();
+  }
+
+  public void reset() {
+    lastFinal = false;
+    didEmpty = false;
+    current.length = 0;
+    result.output = NO_OUTPUT;
+  }
+
+  /** NOTE: target must be >= where we are already
+   *  positioned */
+  public InputOutput<T> advance(IntsRef target) throws IOException {
+
+    assert target.compareTo(current) >= 0;
+
+    //System.out.println("    advance len=" + target.length + " curlen=" + current.length);
+
+    // special case empty string
+    if (current.length == 0) {
+      if (target.length == 0) {
+        final T output = fst.getEmptyOutput();      
+        if (output != null) {
+          if (!didEmpty) {
+            current.length = 0;
+            lastFinal = true;
+            result.output = output;
+            didEmpty = true;
+          }
+          return result;
+        } else {
+          return next();
+        }
+      }
+      
+      if (fst.noNodes()) {
+        return null;
+      }
+    }
+
+    // TODO: possibly caller could/should provide common
+    // prefix length?  ie this work may be redundant if
+    // caller is in fact intersecting against its own
+    // automaton
+
+    // what prefix does target share w/ current
+    int idx = 0;
+    while (idx < current.length && idx < target.length) {
+      if (current.ints[idx] != target.ints[target.offset + idx]) {
+        break;
+      }
+      idx++;
+    }
+
+    //System.out.println("  shared " + idx);
+
+    FST.Arc<T> arc;
+    if (current.length == 0) {
+      // new enum (no seek/next yet)
+      arc = fst.readFirstArc(fst.getStartNode(), getArc(0));
+      //System.out.println("  new enum");
+    } else if (idx < current.length) {
+      // roll back to shared point
+      lastFinal = false;
+      current.length = idx;
+      arc = arcs[idx];
+      if (arc.isLast()) {
+        if (idx == 0) {
+          return null;
+        } else {
+          return next();
+        }
+      }
+      arc = fst.readNextArc(arc);
+    } else if (idx == target.length) {
+      // degenerate case -- seek to term we are already on
+      assert target.equals(current);
+      return result;
+    } else {
+      // current is a full prefix of target
+      if (lastFinal) {
+        arc = fst.readFirstArc(arcs[current.length-1].target, getArc(current.length));
+      } else {
+        return next();
+      }
+    }
+
+    lastFinal = false;
+
+    assert arc == arcs[current.length];
+    int targetLabel = target.ints[target.offset+current.length];
+
+    while(true) {
+      //System.out.println("    cycle len=" + current.length + " target=" + ((char) targetLabel) + " vs " + ((char) arc.label));
+      if (arc.label == targetLabel) {
+        grow();
+        current.ints[current.length] = arc.label;
+        appendOutput(arc.output);
+        current.length++;
+        grow();
+        if (current.length == target.length) {
+          result.output = output[current.length-1];
+          if (arc.isFinal()) {
+            // target is exact match
+            if (fst.hasArcs(arc.target)) {
+              // target is also a proper prefix of other terms
+              lastFinal = true;
+              appendFinalOutput(arc.nextFinalOutput);
+            }
+          } else {
+            // target is not a match but is a prefix of
+            // other terms
+            current.length--;
+            push();
+          }
+          return result;
+        } else if (!fst.hasArcs(arc.target)) {
+          // we only match a prefix of the target
+          return next();
+        } else {
+          targetLabel = target.ints[target.offset+current.length];
+          arc = fst.readFirstArc(arc.target, getArc(current.length));
+        }
+      } else if (arc.label > targetLabel) {
+        // we are now past the target
+        push();
+        return result;
+      } else if (arc.isLast()) {
+        if (current.length == 0) {
+          return null;
+        }
+        return next();
+      } else {
+        arc = fst.readNextArc(getArc(current.length));
+      }
+    }
+  }
+
+  public InputOutput<T> current() {
+    return result;
+  }
+
+  public InputOutput<T> next() throws IOException {
+    //System.out.println("  enum.next");
+
+    if (current.length == 0) {
+      final T output = fst.getEmptyOutput();
+      if (output != null) {
+        if (!didEmpty) {
+          current.length = 0;
+          lastFinal = true;
+          result.output = output;
+          didEmpty = true;
+          return result;
+        } else {
+          lastFinal = false;
+        }
+      }
+      if (fst.noNodes()) {
+        return null;
+      }
+      fst.readFirstArc(fst.getStartNode(), getArc(0));
+      push();
+    } else if (lastFinal) {
+      lastFinal = false;
+      assert current.length > 0;
+      // resume pushing
+      fst.readFirstArc(arcs[current.length-1].target, getArc(current.length));
+      push();
+    } else {
+      //System.out.println("    pop/push");
+      pop();
+      if (current.length == 0) {
+        // enum done
+        return null;
+      } else {
+        current.length--;
+        fst.readNextArc(arcs[current.length]);
+        push();
+      }
+    }
+
+    return result;
+  }
+
+  private void grow() {
+    final int l = current.length + 1;
+    current.grow(l);
+    arcs = ArrayUtil.grow(arcs, l);
+    output = ArrayUtil.grow(output, l);
+  }
+
+  private void appendOutput(T addedOutput) {
+    T newOutput;
+    if (current.length == 0) {
+      newOutput = addedOutput;
+    } else if (addedOutput == NO_OUTPUT) {
+      output[current.length] = output[current.length-1];
+      return;
+    } else {
+      newOutput = fst.outputs.add(output[current.length-1], addedOutput);
+    }
+    output[current.length] = newOutput;
+  }
+
+  private void appendFinalOutput(T addedOutput) {
+    if (current.length == 0) {
+      result.output = addedOutput;
+    } else {
+      result.output = fst.outputs.add(output[current.length-1], addedOutput);
+    }
+  }
+
+  private void push() throws IOException {
+
+    FST.Arc<T> arc = arcs[current.length];
+    assert arc != null;
+
+    while(true) {
+      grow();
+      
+      current.ints[current.length] = arc.label;
+      appendOutput(arc.output);
+      //System.out.println("    push: append label=" + ((char) arc.label) + " output=" + fst.outputs.outputToString(arc.output));
+      current.length++;
+      grow();
+
+      if (!fst.hasArcs(arc.target)) {
+        break;
+      }
+
+      if (arc.isFinal()) {
+        appendFinalOutput(arc.nextFinalOutput);
+        lastFinal = true;
+        return;
+      }
+
+      arc = fst.readFirstArc(arc.target, getArc(current.length));
+    }
+    result.output = output[current.length-1];
+  }
+
+  private void pop() {
+    while (current.length > 0 && arcs[current.length-1].isLast()) {
+      current.length--;
+    }
+  }
+
+  private FST.Arc<T> getArc(int idx) {
+    if (arcs[idx] == null) {
+      arcs[idx] = new FST.Arc<T>();
+    }
+    return arcs[idx];
+  }
+}
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/NoOutputs.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/NoOutputs.java
new file mode 100644
index 00000000000..edb9167e84c
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/NoOutputs.java
@@ -0,0 +1,94 @@
+package org.apache.lucene.util.automaton.fst;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+
+/**
+ * Use this if you just want to build an FSA.
+ */
+
+public final class NoOutputs extends Outputs<Object> {
+
+  final Object NO_OUTPUT = new Object() {
+    // NodeHash calls hashCode for this output; we fix this
+    // so we get deterministic hashing.
+    @Override
+    public int hashCode() {
+      return 42;
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      return other == this;
+    }
+  };
+
+  private static final NoOutputs singleton = new NoOutputs();
+
+  private NoOutputs() {
+  }
+
+  public static NoOutputs getSingleton() {
+    return singleton;
+  }
+
+  @Override
+  public Object common(Object output1, Object output2) {
+    assert output1 == NO_OUTPUT;
+    assert output2 == NO_OUTPUT;
+    return NO_OUTPUT;
+  }
+
+  @Override
+  public Object subtract(Object output, Object inc) {
+    assert output == NO_OUTPUT;
+    assert inc == NO_OUTPUT;
+    return NO_OUTPUT;
+  }
+
+  @Override
+  public Object add(Object prefix, Object output) {
+    assert prefix == NO_OUTPUT: "got " + prefix;
+    assert output == NO_OUTPUT;
+    return NO_OUTPUT;
+  }
+
+  @Override
+  public void write(Object prefix, DataOutput out) {
+    //assert false;
+  }
+
+  @Override
+  public Object read(DataInput in) {
+    //assert false;
+    //return null;
+    return NO_OUTPUT;
+  }
+
+  @Override
+  public Object getNoOutput() {
+    return NO_OUTPUT;
+  }
+
+  @Override
+  public String outputToString(Object output) {
+    return "";
+  }
+}
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java
new file mode 100644
index 00000000000..7c244467669
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java
@@ -0,0 +1,174 @@
+package org.apache.lucene.util.automaton.fst;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+// Used to dedup states (lookup already-frozen states)
+final class NodeHash<T> {
+
+  private int[] table;
+  private int count;
+  private int mask;
+  private final FST<T> fst;
+  private final FST.Arc<T> scratchArc = new FST.Arc<T>();
+
+  public static int conf;
+
+  public NodeHash(FST<T> fst) {
+    table = new int[16];
+    mask = 15;
+    this.fst = fst;
+  }
+
+  private boolean nodesEqual(Builder.UnCompiledNode<T> node, int address) throws IOException {
+    fst.readFirstArc(address, scratchArc);
+    if (scratchArc.bytesPerArc != 0 && node.numArcs != scratchArc.numArcs) {
+      return false;
+    }
+    for(int arcUpto=0;arcUpto<node.numArcs;arcUpto++) {
+      final Builder.Arc arc = node.arcs[arcUpto];
+      if (arc.label != scratchArc.label ||
+          !arc.output.equals(scratchArc.output) ||
+          ((Builder.CompiledNode) arc.target).address != scratchArc.target ||
+          !arc.nextFinalOutput.equals(scratchArc.nextFinalOutput) ||
+          arc.isFinal != scratchArc.isFinal()) {
+        return false;
+      }
+
+      if (scratchArc.isLast()) {
+        if (arcUpto == node.numArcs-1) {
+          return true;
+        } else {
+          return false;
+        }
+      }
+      fst.readNextArc(scratchArc);
+    }
+
+    return false;
+  }
+
+  // hash code for an unfrozen node.  This must be identical
+  // to the un-frozen case (below)!!
+  private int hash(Builder.UnCompiledNode<T> node) {
+    final int PRIME = 31;
+    //System.out.println("hash unfrozen");
+    int h = 0;
+    // TODO: maybe if number of arcs is high we can safely subsample?
+    for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {
+      final Builder.Arc<T> arc = node.arcs[arcIdx];
+      //System.out.println("  label=" + arc.label + " target=" + ((Builder.CompiledNode) arc.target).address + " h=" + h + " output=" + fst.outputs.outputToString(arc.output) + " isFinal?=" + arc.isFinal);
+      h = PRIME * h + arc.label;
+      h = PRIME * h + ((Builder.CompiledNode) arc.target).address;
+      h = PRIME * h + arc.output.hashCode();
+      h = PRIME * h + arc.nextFinalOutput.hashCode();
+      if (arc.isFinal) {
+        h += 17;
+      }
+    }
+    //System.out.println("  ret " + (h&Integer.MAX_VALUE));
+    return h & Integer.MAX_VALUE;
+  }
+
+  // hash code for a frozen node
+  private int hash(int node) throws IOException {
+    final int PRIME = 31;
+    //System.out.println("hash frozen");
+    int h = 0;
+    fst.readFirstArc(node, scratchArc);
+    while(true) {
+      //System.out.println("  label=" + scratchArc.label + " target=" + scratchArc.target + " h=" + h + " output=" + fst.outputs.outputToString(scratchArc.output) + " next?=" + scratchArc.flag(4) + " final?=" + scratchArc.isFinal());
+      h = PRIME * h + scratchArc.label;
+      h = PRIME * h + scratchArc.target;
+      h = PRIME * h + scratchArc.output.hashCode();
+      h = PRIME * h + scratchArc.nextFinalOutput.hashCode();
+      if (scratchArc.isFinal()) {
+        h += 17;
+      }
+      if (scratchArc.isLast()) {
+        break;
+      }
+      fst.readNextArc(scratchArc);
+    }
+    //System.out.println("  ret " + (h&Integer.MAX_VALUE));
+    return h & Integer.MAX_VALUE;
+  }
+
+  public int add(Builder.UnCompiledNode<T> node) throws IOException {
+    // System.out.println("hash: add count=" + count + " vs " + table.length);
+    final int h = hash(node);
+    int h2 = h;
+    int c = 1;
+    while(true) {
+      final int pos = h2 & mask;
+      final int v = table[pos];
+      if (v == 0) {
+        // freeze & add
+        final int address = fst.addNode(node);
+        //System.out.println("  now freeze addr=" + address);
+        assert hash(address) == h : "frozenHash=" + hash(address) + " vs h=" + h;
+        count++;
+        table[pos] = address;
+        if (table.length < 2*count) {
+          rehash();
+        }
+        return address;
+      } else if (nodesEqual(node, v)) {
+        // same node is already here
+        return v;
+      }
+
+      // quadratic probe
+      h2 = h+(c + c*c)/2;
+      c++;
+      conf++;
+    }
+  }
+
+  // called only by rehash
+  private void addNew(int address) throws IOException {
+    final int h = hash(address);
+    int h2 = h;
+    int c = 1;
+    while(true) {
+      final int pos = h2 & mask;
+      if (table[pos] == 0) {
+        table[pos] = address;
+        break;
+      }
+
+      // quadratic probe
+      h2 = h + (c + c*c)/2;
+      c++;
+      conf++;
+    }
+  }
+
+  private void rehash() throws IOException {
+    final int[] oldTable = table;
+    table = new int[2*table.length];
+    mask = table.length-1;
+    for(int idx=0;idx<oldTable.length;idx++) {
+      final int address = oldTable[idx];
+      if (address != 0) {
+        addNew(address);
+      }
+    }
+  }
+}
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/Outputs.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/Outputs.java
new file mode 100644
index 00000000000..18f4dc29432
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/Outputs.java
@@ -0,0 +1,57 @@
+package org.apache.lucene.util.automaton.fst;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+
+/**
+ * Represents the outputs for an FST, providing the basic
+ * algebra needed for the FST.
+ * @lucene.experimental
+ */
+
+public abstract class Outputs<T> {
+
+  // TODO: maybe change this API to allow for re-use of the
+  // output instances -- this is an insane amount of garbage
+  // (new object per byte/char/int) if eg used during
+  // analysis
+
+  /** Eg common("foo", "foobar") -> "foo" */
+  public abstract T common(T output1, T output2);
+
+  /** Eg subtract("foobar", "foo") -> "bar" */
+  public abstract T subtract(T output, T inc);
+
+  /** Eg add("foo", "bar") -> "foobar" */
+  public abstract T add(T prefix, T output);
+
+  public abstract void write(T output, DataOutput out) throws IOException;
+
+  public abstract T read(DataInput in) throws IOException;
+
+  /** NOTE: this output is compared with == so you must
+   *  ensure that all methods return the single object if
+   *  it's really no output */
+  public abstract T getNoOutput();
+
+  public abstract String outputToString(T output);
+}
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/PairOutputs.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/PairOutputs.java
new file mode 100644
index 00000000000..64275bc55fd
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/PairOutputs.java
@@ -0,0 +1,117 @@
+package org.apache.lucene.util.automaton.fst;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+
+/**
+ * Pairs up two outputs into one.
+ * @lucene.experimental
+ */
+
+
+public class PairOutputs<A,B> extends Outputs<PairOutputs.Pair<A,B>> {
+
+  private final Pair<A,B> NO_OUTPUT;
+  private final Outputs<A> outputs1;
+  private final Outputs<B> outputs2;
+
+  public static class Pair<A,B> {
+    public final A output1;
+    public final B output2;
+
+    public Pair(A output1, B output2) {
+      this.output1 = output1;
+      this.output2 = output2;
+    }
+
+    @Override @SuppressWarnings("unchecked")
+    public boolean equals(Object other) {
+      if (other == this) {
+        return true;
+      } else if (other instanceof Pair) {
+        Pair pair = (Pair) other;
+        return output1.equals(pair.output1) && output2.equals(pair.output2);
+      } else {
+        return false;
+      }
+    }
+
+    public int hashCode() {
+      return output1.hashCode() + output2.hashCode();
+    }
+  };
+
+  public PairOutputs(Outputs<A> outputs1, Outputs<B> outputs2) {
+    this.outputs1 = outputs1;
+    this.outputs2 = outputs2;
+    NO_OUTPUT = new Pair<A,B>(outputs1.getNoOutput(), outputs2.getNoOutput());
+  }
+  
+  public Pair<A,B> get(A output1, B output2) {
+    if (output1 == outputs1.getNoOutput() && output2 == outputs2.getNoOutput()) {
+      return NO_OUTPUT;
+    } else {
+      return new Pair<A,B>(output1, output2);
+    }
+  }
+ 
+  @Override
+  public Pair<A,B> common(Pair<A,B> pair1, Pair<A,B> pair2) {
+    return get(outputs1.common(pair1.output1, pair2.output1),
+               outputs2.common(pair1.output2, pair2.output2));
+  }
+
+  @Override
+  public Pair<A,B> subtract(Pair<A,B> output, Pair<A,B> inc) {
+    return get(outputs1.subtract(output.output1, inc.output1),
+               outputs2.subtract(output.output2, inc.output2));
+  }
+
+  @Override
+  public Pair<A,B> add(Pair<A,B> prefix, Pair<A,B> output) {
+    return get(outputs1.add(prefix.output1, output.output1),
+               outputs2.add(prefix.output2, output.output2));
+  }
+
+  @Override
+  public void write(Pair<A,B> output, DataOutput writer) throws IOException {
+    outputs1.write(output.output1, writer);
+    outputs2.write(output.output2, writer);
+  }
+
+  @Override
+  public Pair<A,B> read(DataInput in) throws IOException {
+    A output1 = outputs1.read(in);
+    B output2 = outputs2.read(in);
+    return get(output1, output2);
+  }
+
+  @Override
+  public Pair<A,B> getNoOutput() {
+    return NO_OUTPUT;
+  }
+
+  @Override
+  public String outputToString(Pair<A,B> output) {
+    return "<pair:" + outputs1.outputToString(output.output1) + "," + outputs2.outputToString(output.output2) + ">";
+  }
+}
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/PositiveIntOutputs.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/PositiveIntOutputs.java
new file mode 100644
index 00000000000..ba17fe99dee
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/PositiveIntOutputs.java
@@ -0,0 +1,138 @@
+package org.apache.lucene.util.automaton.fst;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+
+// TODO: make a sharing and non-sharing variant; eg if you
+// output docFreq per term the FST will be smaller if you
+// don't share since they are not "well shared"
+
+/**
+ * Output is a long, for each input term.  NOTE: the
+ * resulting FST is not guaranteed to be minimal!  See
+ * {@link Builder}.
+ * @lucene.experimental
+ */
+
+public final class PositiveIntOutputs extends Outputs<Long> {
+  
+  private final static Long NO_OUTPUT = new Long(0);
+
+  private final boolean doShare;
+
+  private final static PositiveIntOutputs singletonShare = new PositiveIntOutputs(true);
+  private final static PositiveIntOutputs singletonNoShare = new PositiveIntOutputs(false);
+
+  private PositiveIntOutputs(boolean doShare) {
+    this.doShare = doShare;
+  }
+
+  public static PositiveIntOutputs getSingleton(boolean doShare) {
+    return doShare ? singletonShare : singletonNoShare;
+  }
+
+  public Long get(long v) {
+    if (v == 0) {
+      return NO_OUTPUT;
+    } else {
+      return Long.valueOf(v);
+    }
+  }
+
+  @Override
+  public Long common(Long output1, Long output2) {
+    assert valid(output1);
+    assert valid(output2);
+    if (output1 == NO_OUTPUT || output2 == NO_OUTPUT) {
+      return NO_OUTPUT;
+    } else if (doShare) {
+      assert output1 > 0;
+      assert output2 > 0;
+      return Math.min(output1, output2);
+    } else if (output1.equals(output2)) {
+      return output1;
+    } else {
+      return NO_OUTPUT;
+    }
+  }
+
+  @Override
+  public Long subtract(Long output, Long inc) {
+    assert valid(output);
+    assert valid(inc);
+    assert output >= inc;
+
+    if (inc == NO_OUTPUT) {
+      return output;
+    } else if (output.equals(inc)) {
+      return NO_OUTPUT;
+    } else {
+      return output - inc;
+    }
+  }
+
+  @Override
+  public Long add(Long prefix, Long output) {
+    assert valid(prefix);
+    assert valid(output);
+    if (prefix == NO_OUTPUT) {
+      return output;
+    } else if (output == NO_OUTPUT) {
+      return prefix;
+    } else {
+      return prefix + output;
+    }
+  }
+
+  @Override
+  public void write(Long output, DataOutput out) throws IOException {
+    assert valid(output);
+    out.writeVLong(output);
+  }
+
+  @Override
+  public Long read(DataInput in) throws IOException {
+    long v = in.readVLong();
+    if (v == 0) {
+      return NO_OUTPUT;
+    } else {
+      return v;
+    }
+  }
+
+  private boolean valid(Long o) {
+    assert o != null;
+    assert o instanceof Long;
+    assert o == NO_OUTPUT || o > 0;
+    return true;
+  }
+
+  @Override
+  public Long getNoOutput() {
+    return NO_OUTPUT;
+  }
+
+  @Override
+  public String outputToString(Long output) {
+    return output.toString();
+  }
+}
diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/TODO b/lucene/src/java/org/apache/lucene/util/automaton/fst/TODO
new file mode 100644
index 00000000000..98fc6797e04
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/TODO
@@ -0,0 +1,39 @@
+is threadlocal.get costly?  if so maybe make an FSTReader?  would hold this "relative" pos, and each thread'd use it for reading, instead of PosRef
+
+maybe changed Outputs class to "reuse" stuff?  eg this new BytesRef in ByteSequenceOutputs..
+
+do i even "need" both non_final_end_state and final_end_state?
+
+hmm -- can I get weights working here?
+
+can FST be used to index all internal substrings, mapping to term?
+  - maybe put back ability to add multiple outputs per input...?
+
+make this work w/ char...?
+  - then FSTCharFilter/FSTTokenFilter
+  - syn filter?
+
+experiment: try reversing terms before compressing -- how much smaller?
+
+maybe seprate out a 'writable/growing fst' from a read-only one?
+
+can we somehow [partially] tableize lookups like oal.util.automaton?
+
+make an FST terms index option for codecs...?
+
+make an FSTCharsMap?
+
+need a benchmark testing FST traversal -- just fix the static main to rewind & visit all terms
+
+thread state
+
+when writing FST to disk:
+- Sequentially writing (would save memory in codec during indexing). We are now using DataOutput, which could also go directly to disk
+- problem: size of BytesRef must be known before
+
+later
+  - maybe don't require FSTEnum.advance to be forward only?
+  - should i make a posIntOutputs separate from posLongOutputs?
+  - mv randomAccpetedWord / run / etc. from test into FST?
+  - hmm get multi-outputs working again?  do we ever need this?
+
diff --git a/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java b/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java
new file mode 100644
index 00000000000..c444d488cdf
--- /dev/null
+++ b/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java
@@ -0,0 +1,1233 @@
+package org.apache.lucene.util.automaton.fst;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStreamReader;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.MockDirectoryWrapper;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.LineFileDocs;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util._TestUtil;
+
+public class TestFSTs extends LuceneTestCase {
+
+  private MockDirectoryWrapper dir;
+
+  public void setUp() throws IOException {
+    dir = newDirectory();
+    dir.setPreventDoubleWrite(false);
+  }
+
+  public void tearDown() throws IOException {
+    dir.close();
+  }
+
+  private static BytesRef toBytesRef(IntsRef ir) {
+    BytesRef br = new BytesRef(ir.length);
+    for(int i=0;i<ir.length;i++) {
+      int x = ir.ints[ir.offset+i];
+      assert x >= 0 && x <= 255;
+      br.bytes[i] = (byte) x;
+    }
+    br.length = ir.length;
+    return br;
+  }
+
+  private static IntsRef toIntsRef(String s, int inputMode) {
+    return toIntsRef(s, inputMode, new IntsRef(10));
+  }
+
+  private static IntsRef toIntsRef(String s, int inputMode, IntsRef ir) {
+    if (inputMode == 0) {
+      // utf8
+      return toIntsRef(new BytesRef(s), ir);
+    } else {
+      // utf32
+      return toIntsRefUTF32(s, ir);
+    }
+  }
+
+  private static IntsRef toIntsRefUTF32(String s, IntsRef ir) {
+    final int charLength = s.length();
+    int charIdx = 0;
+    int intIdx = 0;
+    while(charIdx < charLength) {
+      if (intIdx == ir.ints.length) {
+        ir.grow(intIdx+1);
+      }
+      final int utf32 = s.codePointAt(charIdx);
+      ir.ints[intIdx] = utf32;
+      charIdx += Character.charCount(utf32);
+      intIdx++;
+    }
+    ir.length = intIdx;
+    return ir;
+  }
+
+  private static IntsRef toIntsRef(BytesRef br, IntsRef ir) {
+    if (br.length > ir.ints.length) {
+      ir.grow(br.length);
+    }
+    for(int i=0;i<br.length;i++) {
+      ir.ints[i] = br.bytes[br.offset+i]&0xFF;
+    }
+    ir.length = br.length;
+    return ir;
+  }
+
+  public void testBasicFSA() throws IOException {
+    String[] strings = new String[] {"station", "commotion", "elation", "elastic", "plastic", "stop", "ftop", "ftation"};
+    IntsRef[] terms = new IntsRef[strings.length];
+    for(int inputMode=0;inputMode<2;inputMode++) {
+      if (VERBOSE) {
+        System.out.println("TEST: inputMode=" + inputModeToString(inputMode));
+      }
+
+      for(int idx=0;idx<strings.length;idx++) {
+        terms[idx] = toIntsRef(strings[idx], inputMode);
+      }
+
+      doTest(inputMode, terms);
+    
+      // Test pre-determined FST sizes to make sure we haven't lost minimality (at least on this trivial set of terms):
+
+      // FSA
+      {
+        final Outputs<Object> outputs = NoOutputs.getSingleton();
+        final Object NO_OUTPUT = outputs.getNoOutput();      
+        final List<FSTTester.InputOutput<Object>> pairs = new ArrayList<FSTTester.InputOutput<Object>>(terms.length);
+        for(IntsRef term : terms) {
+          pairs.add(new FSTTester.InputOutput<Object>(term, NO_OUTPUT));
+        }
+        FST<Object> fst = new FSTTester<Object>(random, dir, inputMode, pairs, outputs).doTest(0, 0);
+        assertNotNull(fst);
+        assertEquals(22, fst.getNodeCount());
+        assertEquals(27, fst.getArcCount());
+      }
+
+      // FST ord pos int
+      {
+        final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+        final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms.length);
+        for(int idx=0;idx<terms.length;idx++) {
+          pairs.add(new FSTTester.InputOutput<Long>(terms[idx], outputs.get(idx)));
+        }
+        final FST<Long> fst = new FSTTester<Long>(random, dir, inputMode, pairs, outputs).doTest(0, 0);
+        assertNotNull(fst);
+        assertEquals(22, fst.getNodeCount());
+        assertEquals(27, fst.getArcCount());
+      }
+
+      // FST byte sequence ord
+      {
+        final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
+        final BytesRef NO_OUTPUT = outputs.getNoOutput();      
+        final List<FSTTester.InputOutput<BytesRef>> pairs = new ArrayList<FSTTester.InputOutput<BytesRef>>(terms.length);
+        for(int idx=0;idx<terms.length;idx++) {
+          final BytesRef output = random.nextInt(30) == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx));
+          pairs.add(new FSTTester.InputOutput<BytesRef>(terms[idx], output));
+        }
+        final FST<BytesRef> fst = new FSTTester<BytesRef>(random, dir, inputMode, pairs, outputs).doTest(0, 0);
+        assertNotNull(fst);
+        assertEquals(24, fst.getNodeCount());
+        assertEquals(30, fst.getArcCount());
+      }
+    }
+  }
+
+  private static String simpleRandomString(Random r) {
+    final int end = r.nextInt(10);
+    if (end == 0) {
+      // allow 0 length
+      return "";
+    }
+    final char[] buffer = new char[end];
+    for (int i = 0; i < end; i++) {
+      buffer[i] = (char) _TestUtil.nextInt(random, 97, 102);
+    }
+    return new String(buffer, 0, end);
+  }
+
+  // given set of terms, test the different outputs for them
+  private void doTest(int inputMode, IntsRef[] terms) throws IOException {
+    Arrays.sort(terms);
+
+    // NoOutputs (simple FSA)
+    {
+      final Outputs<Object> outputs = NoOutputs.getSingleton();
+      final Object NO_OUTPUT = outputs.getNoOutput();      
+      final List<FSTTester.InputOutput<Object>> pairs = new ArrayList<FSTTester.InputOutput<Object>>(terms.length);
+      for(IntsRef term : terms) {
+        pairs.add(new FSTTester.InputOutput<Object>(term, NO_OUTPUT));
+      }
+      new FSTTester<Object>(random, dir, inputMode, pairs, outputs).doTest();
+    }
+
+    // PositiveIntOutput (ord)
+    {
+      final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+      final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms.length);
+      for(int idx=0;idx<terms.length;idx++) {
+        pairs.add(new FSTTester.InputOutput<Long>(terms[idx], outputs.get(idx)));
+      }
+      new FSTTester<Long>(random, dir, inputMode, pairs, outputs).doTest();
+    }
+
+    // PositiveIntOutput (random monotonically increasing positive number)
+    {
+      final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random.nextBoolean());
+      final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms.length);
+      long lastOutput = 0;
+      for(int idx=0;idx<terms.length;idx++) {
+        final long value = lastOutput + _TestUtil.nextInt(random, 1, 1000);
+        lastOutput = value;
+        pairs.add(new FSTTester.InputOutput<Long>(terms[idx], outputs.get(value)));
+      }
+      new FSTTester<Long>(random, dir, inputMode, pairs, outputs).doTest();
+    }
+
+    // PositiveIntOutput (random positive number)
+    {
+      final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random.nextBoolean());
+      final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms.length);
+      for(int idx=0;idx<terms.length;idx++) {
+        pairs.add(new FSTTester.InputOutput<Long>(terms[idx], outputs.get(random.nextLong()) & Long.MAX_VALUE));
+      }
+      new FSTTester<Long>(random, dir, inputMode, pairs, outputs).doTest();
+    }
+
+    // Pair<ord, (random monotonically increasing positive number>
+    {
+      final PositiveIntOutputs o1 = PositiveIntOutputs.getSingleton(random.nextBoolean());
+      final PositiveIntOutputs o2 = PositiveIntOutputs.getSingleton(random.nextBoolean());
+      final PairOutputs<Long,Long> outputs = new PairOutputs<Long,Long>(o1, o2);
+      final List<FSTTester.InputOutput<PairOutputs.Pair<Long,Long>>> pairs = new ArrayList<FSTTester.InputOutput<PairOutputs.Pair<Long,Long>>>(terms.length);
+      long lastOutput = 0;
+      for(int idx=0;idx<terms.length;idx++) {
+        final long value = lastOutput + _TestUtil.nextInt(random, 1, 1000);
+        lastOutput = value;
+        pairs.add(new FSTTester.InputOutput<PairOutputs.Pair<Long,Long>>(terms[idx],
+                                                                         outputs.get(o1.get(idx),
+                                                                                     o2.get(value))));
+      }
+      new FSTTester<PairOutputs.Pair<Long,Long>>(random, dir, inputMode, pairs, outputs).doTest();
+    }
+
+    // Sequence-of-bytes
+    {
+      final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
+      final BytesRef NO_OUTPUT = outputs.getNoOutput();      
+      final List<FSTTester.InputOutput<BytesRef>> pairs = new ArrayList<FSTTester.InputOutput<BytesRef>>(terms.length);
+      for(int idx=0;idx<terms.length;idx++) {
+        final BytesRef output = random.nextInt(30) == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx));
+        pairs.add(new FSTTester.InputOutput<BytesRef>(terms[idx], output));
+      }
+      new FSTTester<BytesRef>(random, dir, inputMode, pairs, outputs).doTest();
+    }
+
+    // Sequence-of-ints
+    {
+      final IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
+      final List<FSTTester.InputOutput<IntsRef>> pairs = new ArrayList<FSTTester.InputOutput<IntsRef>>(terms.length);
+      for(int idx=0;idx<terms.length;idx++) {
+        final String s = Integer.toString(idx);
+        final IntsRef output = new IntsRef(s.length());
+        output.length = s.length();
+        for(int idx2=0;idx2<output.length;idx2++) {
+          output.ints[idx2] = s.charAt(idx2);
+        }
+        pairs.add(new FSTTester.InputOutput<IntsRef>(terms[idx], output));
+      }
+      new FSTTester<IntsRef>(random, dir, inputMode, pairs, outputs).doTest();
+    }
+  }
+
+  private static class FSTTester<T> {
+
+    final Random random;
+    final List<InputOutput<T>> pairs;
+    final int inputMode;
+    final Outputs<T> outputs;
+    final Directory dir;
+
+    public FSTTester(Random random, Directory dir, int inputMode, List<InputOutput<T>> pairs, Outputs<T> outputs) {
+      this.random = random;
+      this.dir = dir;
+      this.inputMode = inputMode;
+      this.pairs = pairs;
+      this.outputs = outputs;
+    }
+
+    private static class InputOutput<T> implements Comparable<InputOutput<T>> {
+      public final IntsRef input;
+      public final T output;
+
+      public InputOutput(IntsRef input, T output) {
+        this.input = input;
+        this.output = output;
+      }
+
+      public int compareTo(InputOutput<T> other) {
+        if (other instanceof InputOutput) {
+          return input.compareTo((other).input);
+        } else {
+          throw new IllegalArgumentException();
+        }
+      }
+    }
+
+    private String getRandomString() {
+      final String term;
+      if (random.nextBoolean()) {
+        term = _TestUtil.randomRealisticUnicodeString(random);
+      } else {
+        // we want to mix in limited-alphabet symbols so
+        // we get more sharing of the nodes given how few
+        // terms we are testing...
+        term = simpleRandomString(random);
+      }
+      return term;
+    }
+
+    public void doTest() throws IOException {
+      // no pruning
+      doTest(0, 0);
+
+      // simple pruning
+      doTest(_TestUtil.nextInt(random, 1, 1+pairs.size()), 0);
+
+      // leafy pruning
+      doTest(0, _TestUtil.nextInt(random, 1, 1+pairs.size()));
+    }
+
+    // NOTE: only copies the stuff this test needs!!
+    private FST.Arc<T> copyArc(FST.Arc<T> arc) {
+      final FST.Arc<T> copy = new FST.Arc<T>();
+      copy.label = arc.label;
+      copy.target = arc.target;
+      copy.output = arc.output;
+      copy.nextFinalOutput = arc.nextFinalOutput;
+      return arc;
+    }
+
+    // runs the term, returning the output, or null if term
+    // isn't accepted.  if stopNode is non-null it must be
+    // length 2 int array; stopNode[0] will be the last
+    // matching node (-1 if the term is accepted)
+    // and stopNode[1] will be the length of the
+    // term prefix that matches
+    private T run(FST<T> fst, IntsRef term, int[] stopNode) throws IOException {
+      if (term.length == 0) {
+        final T output = fst.getEmptyOutput();
+        if (stopNode != null) {
+          stopNode[1] = 0;
+          if (output != null) {
+            // accepted
+            stopNode[0] = -1;
+          } else {
+            stopNode[0] = fst.getStartNode();
+          }
+        }
+        return output;
+      }
+
+      final FST.Arc<T> arc = new FST.Arc<T>();
+      int node = fst.getStartNode();
+      int lastNode = -1;
+      T output = fst.outputs.getNoOutput();
+      //System.out.println("match?");
+      for(int i=0;i<term.length;i++) {
+        //System.out.println("  int=" + term.ints[i]);
+        if (!fst.hasArcs(node)) {
+          //System.out.println("    no arcs!");
+          // hit end node before term's end
+          if (stopNode != null) {
+            stopNode[0] = lastNode;
+            stopNode[1] = i-1;
+            return output;
+          } else {
+            return null;
+          }
+        }
+
+        if (fst.findArc(node, term.ints[term.offset + i], arc) != null) {
+          node = arc.target;
+          //System.out.println("    match final?=" + arc.isFinal());
+          if (arc.output != fst.outputs.getNoOutput()) {
+            output = fst.outputs.add(output, arc.output);
+          }
+        } else if (stopNode != null) {
+          stopNode[0] = node;
+          stopNode[1] = i;
+          return output;
+        } else {
+          //System.out.println("    no match");
+          return null;
+        }
+
+        lastNode = node;
+      }
+
+      if (!arc.isFinal()) {
+        // hit term's end before end node
+        if (stopNode != null) {
+          stopNode[0] = node;
+          stopNode[1] = term.length;
+          return output;
+        } else {
+          return null;
+        }
+      }
+
+      if (arc.nextFinalOutput != fst.outputs.getNoOutput()) {
+        output = fst.outputs.add(output, arc.nextFinalOutput);
+      }
+
+      if (stopNode != null) {
+        stopNode[0] = -1;
+        stopNode[1] = term.length;
+      }
+      return output;
+    }
+
+    private T randomAcceptedWord(FST<T> fst, IntsRef in) throws IOException {
+      int node = fst.getStartNode();
+
+      if (fst.noNodes()) {
+        // degenerate FST: only accepts the empty string
+        assertTrue(fst.getEmptyOutput() != null);
+        in.length = 0;
+        return fst.getEmptyOutput();
+      }
+      final List<FST.Arc<T>> arcs = new ArrayList<FST.Arc<T>>();
+      in.length = 0;
+      in.offset = 0;
+      T output = fst.outputs.getNoOutput();
+      //System.out.println("get random");
+      while(true) {
+        // read all arcs:
+        //System.out.println("  n=" + node);
+        int arcAddress = node;
+        FST.Arc<T> arc = new FST.Arc<T>();
+        fst.readFirstArc(arcAddress, arc);
+        arcs.add(copyArc(arc));
+        while(!arc.isLast()) {
+          fst.readNextArc(arc);
+          arcs.add(copyArc(arc));
+        }
+      
+        // pick one
+        arc = arcs.get(random.nextInt(arcs.size()));
+
+        arcs.clear();
+
+        // append label
+        if (in.ints.length == in.length) {
+          in.grow(1+in.length);
+        }
+        in.ints[in.length++] = arc.label;
+
+        output = fst.outputs.add(output, arc.output);
+
+        // maybe stop
+        if (arc.isFinal()) {
+          if (fst.hasArcs(arc.target)) {
+            // final state but it also has outgoing edges
+            if (random.nextBoolean()) {
+              output = fst.outputs.add(output, arc.nextFinalOutput);
+              break;
+            }
+          } else {
+            break;
+          }
+        }
+
+        node = arc.target;
+      }
+
+      return output;
+    }
+
+
+    private FST<T> doTest(int prune1, int prune2) throws IOException {
+      if (VERBOSE) {
+        System.out.println("TEST: prune1=" + prune1 + " prune2=" + prune2);
+      }
+
+      final Builder<T> builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4,
+                                                prune1, prune2,
+                                                prune1==0 && prune2==0, outputs);
+
+      for(InputOutput<T> pair : pairs) {
+        builder.add(pair.input, pair.output);
+      }
+      FST<T> fst = builder.finish();
+
+      if (random.nextBoolean() && fst != null) {
+        IndexOutput out = dir.createOutput("fst.bin");
+        fst.save(out);
+        out.close();
+        IndexInput in = dir.openInput("fst.bin");
+        try {
+          fst = new FST<T>(in, outputs);
+        } finally {
+          in.close();
+          dir.deleteFile("fst.bin");
+        }
+      }
+
+      if (VERBOSE && pairs.size() <= 20 && fst != null) {
+        PrintStream ps = new PrintStream("out.dot");
+        fst.toDot(ps);
+        ps.close();
+        System.out.println("SAVED out.dot");
+      }
+
+      if (VERBOSE) {
+        if (fst == null) {
+          System.out.println("  fst has 0 nodes (fully pruned)");
+        } else {
+          System.out.println("  fst has " + fst.getNodeCount() + " nodes and " + fst.getArcCount() + " arcs");
+        }
+      }
+
+      if (prune1 == 0 && prune2 == 0) {
+        verifyUnPruned(inputMode, fst);
+      } else {
+        verifyPruned(inputMode, fst, prune1, prune2);
+      }
+
+      return fst;
+    }
+
+    // FST is complete
+    private void verifyUnPruned(int inputMode, FST<T> fst) throws IOException {
+
+      if (pairs.size() == 0) {
+        assertNull(fst);
+        return;
+      }
+
+      if (VERBOSE) {
+        System.out.println("TEST: now verify " + pairs.size() + " terms");
+        for(InputOutput<T> pair : pairs) {
+          assertNotNull(pair);
+          assertNotNull(pair.input);
+          assertNotNull(pair.output);
+          System.out.println("  " + inputToString(inputMode, pair.input) + ": " + outputs.outputToString(pair.output));
+        }
+      }
+
+      assertNotNull(fst);
+
+      // make sure all words are accepted
+      {
+        IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
+        for(InputOutput<T> pair : pairs) {
+          IntsRef term = pair.input;
+          Object output = run(fst, term, null);
+
+          assertNotNull("term " + inputToString(inputMode, term) + " is not accepted", output);
+          assertEquals(output, pair.output);
+
+          // verify enum's next
+          IntsRefFSTEnum.InputOutput<T> t = fstEnum.next();
+
+          assertEquals(term, t.input);
+          assertEquals(pair.output, t.output);
+        }
+        assertNull(fstEnum.next());
+      }
+
+      final Map<IntsRef,T> termsMap = new HashMap<IntsRef,T>();
+      for(InputOutput<T> pair : pairs) {
+        termsMap.put(pair.input, pair.output);
+      }
+
+      // find random matching word and make sure it's valid
+      final IntsRef scratch = new IntsRef(10);
+      for(int iter=0;iter<500*RANDOM_MULTIPLIER;iter++) {
+        T output = randomAcceptedWord(fst, scratch);
+        assertTrue("accepted word " + inputToString(inputMode, scratch) + " is not valid", termsMap.containsKey(scratch));
+        assertEquals(termsMap.get(scratch), output);
+      }
+    
+      // test single IntsRefFSTEnum.advance:
+      //System.out.println("TEST: verify advance");
+      for(int iter=0;iter<100*RANDOM_MULTIPLIER;iter++) {
+        final IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
+        if (random.nextBoolean()) {
+          // seek to term that doesn't exist:
+          while(true) {
+            final IntsRef term = toIntsRef(getRandomString(), inputMode);
+            int pos = Collections.binarySearch(pairs, new InputOutput<T>(term, null));
+            if (pos < 0) {
+              pos = -(pos+1);
+              // ok doesn't exist
+              //System.out.println("  seek " + inputToString(inputMode, term));
+              final IntsRefFSTEnum.InputOutput<T> seekResult = fstEnum.advance(term);
+              if (pos < pairs.size()) {
+                //System.out.println("    got " + inputToString(inputMode,seekResult.input) + " output=" + fst.outputs.outputToString(seekResult.output));
+                assertEquals(pairs.get(pos).input, seekResult.input);
+                assertEquals(pairs.get(pos).output, seekResult.output);
+              } else {
+                // seeked beyond end
+                //System.out.println("seek=" + seekTerm);
+                assertNull("expected null but got " + (seekResult==null ? "null" : inputToString(inputMode, seekResult.input)), seekResult);
+              }
+
+              break;
+            }
+          }
+        } else {
+          // seek to term that does exist:
+          InputOutput pair = pairs.get(random.nextInt(pairs.size()));
+          //System.out.println("  seek " + inputToString(inputMode, pair.input));
+          final IntsRefFSTEnum.InputOutput<T> seekResult = fstEnum.advance(pair.input);
+          assertEquals(pair.input, seekResult.input);
+          assertEquals(pair.output, seekResult.output);
+        }
+      }
+
+      if (VERBOSE) {
+        System.out.println("TEST: mixed next/advance");
+      }
+
+      // test mixed next/advance
+      for(int iter=0;iter<100*RANDOM_MULTIPLIER;iter++) {
+        if (VERBOSE) {
+          System.out.println("TEST: iter " + iter);
+        }
+        final IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
+        int upto = -1;
+        while(true) {
+          boolean isDone = false;
+          if (upto == pairs.size()-1 || random.nextBoolean()) {
+            // next
+            upto++;
+            if (VERBOSE) {
+              System.out.println("  do next");
+            }
+            isDone = fstEnum.next() == null;
+          } else if (upto != -1 && upto < 0.75 * pairs.size() && random.nextBoolean()) {
+            int attempt = 0;
+            for(;attempt<10;attempt++) {
+              IntsRef term = toIntsRef(getRandomString(), inputMode);
+              if (!termsMap.containsKey(term) && term.compareTo(pairs.get(upto).input) > 0) {
+                if (VERBOSE) {
+                  System.out.println("  do non-exist advance(" + inputToString(inputMode, term) + "]");
+                }
+                int pos = Collections.binarySearch(pairs, new InputOutput<T>(term, null));
+                assert pos < 0;
+                upto = -(pos+1);
+                isDone = fstEnum.advance(term) == null;
+                break;
+              }
+            }
+            if (attempt == 10) {
+              continue;
+            }
+            
+          } else {
+            final int inc = random.nextInt(pairs.size() - upto - 1);
+            upto += inc;
+            if (upto == -1) {
+              upto = 0;
+            }
+
+            if (VERBOSE) {
+              System.out.println("  do advance(" + inputToString(inputMode, pairs.get(upto).input) + "]");
+            }
+            isDone = fstEnum.advance(pairs.get(upto).input) == null;
+          }
+          if (VERBOSE) {
+            if (!isDone) {
+              System.out.println("    got " + inputToString(inputMode, fstEnum.current().input));
+            } else {
+              System.out.println("    got null");
+            }
+          }
+
+          if (upto == pairs.size()) {
+            assertTrue(isDone);
+            break;
+          } else {
+            assertFalse(isDone);
+            assertEquals(pairs.get(upto).input, fstEnum.current().input);
+            assertEquals(pairs.get(upto).output, fstEnum.current().output);
+          }
+        }
+      }
+    }
+
+    private static class CountMinOutput<T> {
+      int count;
+      T output;
+      T finalOutput;
+      boolean isLeaf = true;
+      boolean isFinal;
+    }
+
+    // FST is pruned
+    private void verifyPruned(int inputMode, FST<T> fst, int prune1, int prune2) throws IOException {
+
+      if (VERBOSE) {
+        System.out.println("TEST: now verify pruned " + pairs.size() + " terms; outputs=" + outputs);
+        for(InputOutput<T> pair : pairs) {
+          System.out.println("  " + inputToString(inputMode, pair.input) + ": " + outputs.outputToString(pair.output));
+        }
+      }
+
+      // To validate the FST, we brute-force compute all prefixes
+      // in the terms, matched to their "common" outputs, prune that
+      // set according to the prune thresholds, then assert the FST
+      // matches that same set.
+
+      // NOTE: Crazy RAM intensive!!
+
+      //System.out.println("TEST: tally prefixes");
+
+      // build all prefixes
+      final Map<IntsRef,CountMinOutput<T>> prefixes = new HashMap<IntsRef,CountMinOutput<T>>();
+      final IntsRef scratch = new IntsRef(10);
+      for(InputOutput<T> pair: pairs) {
+        scratch.copy(pair.input);
+        for(int idx=0;idx<=pair.input.length;idx++) {
+          scratch.length = idx;
+          CountMinOutput<T> cmo = prefixes.get(scratch);
+          if (cmo == null) {
+            cmo = new CountMinOutput<T>();
+            cmo.count = 1;
+            cmo.output = pair.output;
+            prefixes.put(new IntsRef(scratch), cmo);
+          } else {
+            cmo.count++;
+            cmo.output = outputs.common(cmo.output, pair.output);
+          }
+          if (idx == pair.input.length) {
+            cmo.isFinal = true;
+            cmo.finalOutput = cmo.output;
+          }
+        }
+      }
+
+      //System.out.println("TEST: now prune");
+
+      // prune 'em
+      final Iterator<Map.Entry<IntsRef,CountMinOutput<T>>> it = prefixes.entrySet().iterator();
+      while(it.hasNext()) {
+        Map.Entry<IntsRef,CountMinOutput<T>> ent = it.next();
+        final IntsRef prefix = ent.getKey();
+        final CountMinOutput<T> cmo = ent.getValue();
+        //System.out.println("  term=" + inputToString(inputMode, prefix) + " count=" + cmo.count + " isLeaf=" + cmo.isLeaf);
+        final boolean keep;
+        if (prune1 > 0) {
+          keep = cmo.count >= prune1;
+        } else {
+          assert prune2 > 0;
+          if (prune2 > 1 && cmo.count >= prune2) {
+            keep = true;
+          } else if (prefix.length > 0) {
+            // consult our parent
+            scratch.length = prefix.length-1;
+            System.arraycopy(prefix.ints, prefix.offset, scratch.ints, 0, scratch.length);
+            final CountMinOutput<T> cmo2 = prefixes.get(scratch);
+            //System.out.println("    parent count = " + (cmo2 == null ? -1 : cmo2.count));
+            keep = cmo2 != null && ((prune2 > 1 && cmo2.count >= prune2) || (prune2 == 1 && (cmo2.count >= 2 || prefix.length <= 1)));
+          } else if (cmo.count >= prune2) {
+            keep = true;
+          } else {
+            keep = false;
+          }
+        }
+
+        if (!keep) {
+          it.remove();
+          //System.out.println("    remove");
+        } else {
+          // clear isLeaf for all ancestors
+          //System.out.println("    keep");
+          scratch.copy(prefix);
+          scratch.length--;
+          while(scratch.length >= 0) {
+            final CountMinOutput<T> cmo2 = prefixes.get(scratch);
+            if (cmo2 != null) {
+              //System.out.println("    clear isLeaf " + inputToString(inputMode, scratch));
+              cmo2.isLeaf = false;
+            }
+            scratch.length--;
+          }
+        }
+      }
+
+      //System.out.println("TEST: after prune");
+      /*
+        for(Map.Entry<BytesRef,CountMinOutput> ent : prefixes.entrySet()) {
+        System.out.println("  " + inputToString(inputMode, ent.getKey()) + ": isLeaf=" + ent.getValue().isLeaf + " isFinal=" + ent.getValue().isFinal);
+        if (ent.getValue().isFinal) {
+        System.out.println("    finalOutput=" + outputs.outputToString(ent.getValue().finalOutput));
+        }
+        }
+      */
+
+      if (prefixes.size() <= 1) {
+        assertNull(fst);
+        return;
+      }
+
+      assertNotNull(fst);
+
+      // make sure FST only enums valid prefixes
+      IntsRefFSTEnum<T> fstEnum = new IntsRefFSTEnum<T>(fst);
+      IntsRefFSTEnum.InputOutput current;
+      while((current = fstEnum.next()) != null) {
+        //System.out.println("  fst enum term=" + inputToString(inputMode, current.input) + " output=" + outputs.outputToString(current.output));
+        final CountMinOutput cmo = prefixes.get(current.input);
+        assertNotNull(cmo);
+        assertTrue(cmo.isLeaf || cmo.isFinal);
+        if (cmo.isFinal && !cmo.isLeaf) {
+          assertEquals(cmo.finalOutput, current.output);
+        } else {
+          assertEquals(cmo.output, current.output);
+        }
+      }
+
+      // make sure all non-pruned prefixes are present in the FST
+      final int[] stopNode = new int[2];
+      for(Map.Entry<IntsRef,CountMinOutput<T>> ent : prefixes.entrySet()) {
+        if (ent.getKey().length > 0) {
+          final CountMinOutput<T> cmo = ent.getValue();
+          final T output = run(fst, ent.getKey(), stopNode);
+          //System.out.println("  term=" + inputToString(inputMode, ent.getKey()) + " output=" + outputs.outputToString(cmo.output));
+          // if (cmo.isFinal && !cmo.isLeaf) {
+          if (cmo.isFinal) {
+            assertEquals(cmo.finalOutput, output);
+          } else {
+            assertEquals(cmo.output, output);
+          }
+          assertEquals(ent.getKey().length, stopNode[1]);
+        }
+      }
+    }
+  }
+
+  public void testRandomWords() throws IOException {
+    testRandomWords(1000, 5 * RANDOM_MULTIPLIER);
+    //testRandomWords(10, 100);
+  }
+
+  private String inputModeToString(int mode) {
+    if (mode == 0) {
+      return "utf8";
+    } else {
+      return "utf32";
+    }
+  }
+
+  private void testRandomWords(int maxNumWords, int numIter) throws IOException {
+    for(int iter=0;iter<numIter;iter++) {
+      if (VERBOSE) {
+        System.out.println("\nTEST: iter " + iter);
+      }
+      for(int inputMode=0;inputMode<2;inputMode++) {
+        final int numWords = random.nextInt(maxNumWords+1);
+        Set<IntsRef> termsSet = new HashSet<IntsRef>();
+        IntsRef[] terms = new IntsRef[numWords];
+        while(termsSet.size() < numWords) {
+          final String term = getRandomString();
+          termsSet.add(toIntsRef(term, inputMode));
+        }
+        doTest(inputMode, termsSet.toArray(new IntsRef[termsSet.size()]));
+      }
+    }
+  }
+
+  private String getRandomString() {
+    final String term;
+    if (random.nextBoolean()) {
+      term = _TestUtil.randomRealisticUnicodeString(random);
+    } else {
+      // we want to mix in limited-alphabet symbols so
+      // we get more sharing of the nodes given how few
+      // terms we are testing...
+      term = simpleRandomString(random);
+    }
+    return term;
+  }
+
+  @Nightly
+  public void testBigSet() throws IOException {
+    testRandomWords(50000, RANDOM_MULTIPLIER);
+  }
+
+  private static String inputToString(int inputMode, IntsRef term) {
+    if (inputMode == 0) {
+      // utf8
+      return toBytesRef(term).utf8ToString();
+    } else {
+      // utf32
+      return UnicodeUtil.newString(term.ints, term.offset, term.length);
+    }
+  }
+
+  // Build FST for all unique terms in the test line docs
+  // file, up until a time limit
+  public void testRealTerms() throws Exception {
+
+    if (CodecProvider.getDefault().getDefaultFieldCodec().equals("SimpleText")) {
+      // no
+      CodecProvider.getDefault().setDefaultFieldCodec("Standard");
+    }
+
+    final LineFileDocs docs = new LineFileDocs(false);
+    final int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : 1;
+    final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
+    final File tempDir = _TestUtil.getTempDir("fstlines");
+    final MockDirectoryWrapper dir = new MockDirectoryWrapper(random, FSDirectory.open(tempDir));
+    final IndexWriter writer = new IndexWriter(dir, conf);
+    final long stopTime = System.currentTimeMillis() + RUN_TIME_SEC * 1000;
+    Document doc;
+    int docCount = 0;
+    while((doc = docs.nextDoc()) != null && System.currentTimeMillis() < stopTime) {
+      writer.addDocument(doc);
+      docCount++;
+    }
+    IndexReader r = IndexReader.open(writer);
+    writer.close();
+    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random.nextBoolean());
+    Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs);
+
+    boolean storeOrd = random.nextBoolean();
+    if (VERBOSE) {
+      if (storeOrd) {
+        System.out.println("FST stores ord");
+      } else {
+        System.out.println("FST stores docFreq");
+      }
+    }
+    Terms terms = MultiFields.getTerms(r, "body");
+    if (terms != null) {
+      final TermsEnum termsEnum = terms.iterator();
+      BytesRef term;
+      int ord = 0;
+      while((term = termsEnum.next()) != null) {
+        if (ord == 0) {
+          try {
+            termsEnum.ord();
+          } catch (UnsupportedOperationException uoe) {
+            storeOrd = false;
+          }
+        }
+        final int output;
+        if (storeOrd) {
+          output = ord;
+        } else {
+          output = termsEnum.docFreq();
+        }
+        builder.add(term, outputs.get(output));
+        ord++;
+      }
+      final FST<Long> fst = builder.finish();
+      if (VERBOSE) {
+        System.out.println("FST: " + docCount + " docs; " + ord + " terms; " + fst.getNodeCount() + " nodes; " + fst.getArcCount() + " arcs;" + " " + fst.sizeInBytes() + " bytes");
+      }
+
+      if (ord > 0) {
+        // Now confirm BytesRefFSTEnum and TermsEnum act the
+        // same:
+        final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
+        for(int iter=0;iter<1000*RANDOM_MULTIPLIER;iter++) {
+          fstEnum.reset();
+          final BytesRef randomTerm = new BytesRef(getRandomString());
+        
+          final TermsEnum.SeekStatus seekResult = termsEnum.seek(randomTerm);
+          final BytesRefFSTEnum.InputOutput fstSeekResult = fstEnum.advance(randomTerm);
+
+          if (VERBOSE) {
+            System.out.println("TEST: seek " + randomTerm.utf8ToString());
+          }
+
+          if (seekResult == TermsEnum.SeekStatus.END) {
+            assertNull(fstSeekResult);
+          } else {
+            assertSame(termsEnum, fstEnum, storeOrd);
+            for(int nextIter=0;nextIter<10;nextIter++) {
+              if (VERBOSE) {
+                System.out.println("TEST: next");
+              }
+              if (termsEnum.next() != null) {
+                if (VERBOSE) {
+                  System.out.println("  term=" + termsEnum.term().utf8ToString());
+                }
+                assertNotNull(fstEnum.next());
+                assertSame(termsEnum, fstEnum, storeOrd);
+              } else {
+                BytesRefFSTEnum.InputOutput<Long> nextResult = fstEnum.next();
+                if (nextResult != null) {
+                  System.out.println("expected null but got: input=" + nextResult.input.utf8ToString() + " output=" + outputs.outputToString(nextResult.output));
+                  fail();
+                }
+                break;
+              }
+            }
+          }
+        }
+      }
+    }
+
+    r.close();
+    dir.close();
+  }
+
+  private void assertSame(TermsEnum termsEnum, BytesRefFSTEnum fstEnum, boolean storeOrd) throws Exception {
+    if (termsEnum.term() == null) {
+      assertNull(fstEnum.current());
+    } else {
+      assertEquals(termsEnum.term(), fstEnum.current().input);
+      if (storeOrd) {
+        // fst stored the ord
+        assertEquals(termsEnum.ord(), ((Long) fstEnum.current().output).longValue());
+      } else {
+        // fst stored the docFreq
+        assertEquals(termsEnum.docFreq(), (int) (((Long) fstEnum.current().output).longValue()));
+      }
+    }
+  }
+
+  private static abstract class VisitTerms<T> {
+    private final String dirOut;
+    private final String wordsFileIn;
+    private int inputMode;
+    private final Outputs<T> outputs;
+    private final Builder<T> builder;
+
+    public VisitTerms(String dirOut, String wordsFileIn, int inputMode, int prune, Outputs<T> outputs) {
+      this.dirOut = dirOut;
+      this.wordsFileIn = wordsFileIn;
+      this.inputMode = inputMode;
+      this.outputs = outputs;
+      
+      builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, outputs);
+    }
+
+    protected abstract T getOutput(IntsRef input, int ord) throws IOException;
+
+    public void run(int limit) throws IOException {
+      BufferedReader is = new BufferedReader(new InputStreamReader(new FileInputStream(wordsFileIn), "UTF-8"), 65536);
+      try {
+        final IntsRef intsRef = new IntsRef(10);
+        long tStart = System.currentTimeMillis();
+        int ord = 0;
+        while(true) {
+          String w = is.readLine();
+          if (w == null) {
+            break;
+          }
+          toIntsRef(w, inputMode, intsRef);
+          builder.add(intsRef,
+                      getOutput(intsRef, ord));
+
+          ord++;
+          if (ord % 500000 == 0) {
+            System.out.println(((System.currentTimeMillis()-tStart)/1000.0) + "s: " + ord + "...");
+          }
+          if (ord >= limit) {
+            break;
+          }
+        }
+
+        assert builder.getTermCount() == ord;
+        final FST<T> fst = builder.finish();
+        if (fst == null) {
+          System.out.println("FST was fully pruned!");
+          System.exit(0);
+        }
+
+        System.out.println(ord + " terms; " + fst.getNodeCount() + " nodes; " + fst.getArcCount() + " arcs; " + fst.getArcWithOutputCount() + " arcs w/ output; tot size " + fst.sizeInBytes());
+        if (fst.getNodeCount() < 100) {
+          PrintStream ps = new PrintStream("out.dot");
+          fst.toDot(ps);
+          ps.close();
+          System.out.println("Wrote FST to out.dot");
+        }
+
+        Directory dir = FSDirectory.open(new File(dirOut));
+        IndexOutput out = dir.createOutput("fst.bin");
+        fst.save(out);
+        out.close();
+
+        System.out.println("Saved FST to fst.bin.");
+
+        System.out.println("\nNow verify...");
+
+        is.close();
+        is = new BufferedReader(new InputStreamReader(new FileInputStream(wordsFileIn), "UTF-8"), 65536);
+
+        ord = 0;
+        tStart = System.currentTimeMillis();
+        while(true) {
+          String w = is.readLine();
+          if (w == null) {
+            break;
+          }
+          toIntsRef(w, inputMode, intsRef);
+          T expected = getOutput(intsRef, ord);
+          T actual = fst.get(intsRef);
+          if (actual == null) {
+            throw new RuntimeException("unexpected null output on input=" + w);
+          }
+          if (!actual.equals(expected)) {
+            throw new RuntimeException("wrong output (got " + outputs.outputToString(actual) + " but expected " + outputs.outputToString(expected) + ") on input=" + w);
+          }
+
+          ord++;
+          if (ord % 500000 == 0) {
+            System.out.println(((System.currentTimeMillis()-tStart)/1000.0) + "s: " + ord + "...");
+          }
+          if (ord >= limit) {
+            break;
+          }
+        }
+
+        double totSec = ((System.currentTimeMillis() - tStart)/1000.0);
+        System.out.println("Verify took " + totSec + " sec + (" + (int) ((totSec*1000000000/ord)) + " nsec per lookup)");
+
+      } finally {
+        is.close();
+      }
+    }
+  }
+
+  // java -cp build/classes/test:build/classes/java:lib/junit-4.7.jar org.apache.lucene.util.automaton.fst.TestFSTs /x/tmp/allTerms3.txt out
+  public static void main(String[] args) throws IOException {
+    final String wordsFileIn = args[0];
+    final String dirOut = args[1];
+    int idx = 2;
+    int prune = 0;
+    int limit = Integer.MAX_VALUE;
+    int inputMode = 0;                             // utf8
+    boolean storeOrds = false;
+    boolean storeDocFreqs = false;
+    while(idx < args.length) {
+      if (args[idx].equals("-prune")) {
+        prune = Integer.valueOf(args[1+idx]);
+        idx++;
+      }
+      if (args[idx].equals("-limit")) {
+        limit = Integer.valueOf(args[1+idx]);
+        idx++;
+      }
+      if (args[idx].equals("-utf8")) {
+        inputMode = 0;
+      }
+      if (args[idx].equals("-utf32")) {
+        inputMode = 1;
+      }
+      if (args[idx].equals("-docFreq")) {
+        storeDocFreqs = true;
+      }
+      if (args[idx].equals("-ords")) {
+        storeOrds = true;
+      }
+      idx++;
+    }
+
+    // ord benefits from share, docFreqs don't:
+
+    if (storeOrds && storeDocFreqs) {
+      // Store both ord & docFreq:
+      final PositiveIntOutputs o1 = PositiveIntOutputs.getSingleton(true);
+      final PositiveIntOutputs o2 = PositiveIntOutputs.getSingleton(false);
+      final PairOutputs<Long,Long> outputs = new PairOutputs<Long,Long>(o1, o2);
+      new VisitTerms<PairOutputs.Pair<Long,Long>>(dirOut, wordsFileIn, inputMode, prune, outputs) {
+        Random rand;
+        @Override
+        public PairOutputs.Pair<Long,Long> getOutput(IntsRef input, int ord) {
+          if (ord == 0) {
+            rand = new Random(17);
+          }
+          return new PairOutputs.Pair<Long,Long>(o1.get(ord),
+                                                 o2.get(_TestUtil.nextInt(rand, 1, 5000)));
+        }
+      }.run(limit);
+    } else if (storeOrds) {
+      // Store only ords
+      final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+      new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs) {
+        @Override
+        public Long getOutput(IntsRef input, int ord) {
+          return outputs.get(ord);
+        }
+      }.run(limit);
+    } else if (storeDocFreqs) {
+      // Store only docFreq
+      final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(false);
+      new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs) {
+        Random rand;
+        @Override
+        public Long getOutput(IntsRef input, int ord) {
+          if (ord == 0) {
+            rand = new Random(17);
+          }
+          return outputs.get(_TestUtil.nextInt(rand, 1, 5000));
+        }
+      }.run(limit);
+    } else {
+      // Store nothing
+      final NoOutputs outputs = NoOutputs.getSingleton();
+      final Object NO_OUTPUT = outputs.getNoOutput();
+      new VisitTerms<Object>(dirOut, wordsFileIn, inputMode, prune, outputs) {
+        @Override
+        public Object getOutput(IntsRef input, int ord) {
+          return NO_OUTPUT;
+        }
+      }.run(limit);
+    }
+  }
+}