LUCENE-3807: Cleanup Suggest / Lookup API

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1296268 13f79535-47bb-0310-9956-ffa450edef68
2012-03-02 15:59:55 +00:00 · 2012-03-02 15:59:55 +00:00 · f303bcd465
parent 2c94c522fd
commit f303bcd465
27 changed files with 692 additions and 627 deletions
--- a/lucene/contrib/CHANGES.txt
+++ b/lucene/contrib/CHANGES.txt
@ -116,6 +116,13 @@ Changes in backwards compatibility policy
   and MultiPassIndexSplitter were made private as they now work
   per segment.  (Uwe Schindler)
   
+ * LUCENE-3807: Cleaned up Suggest / Lookup API. Term weights (freqs) are now
+   64bit signed integers instead of 32bit floats. Sorting of terms is now a 
+   disk based merge sort instead of an in-memory sort. The Lookup API now 
+   accepts and returns CharSequence instead of String which should be converted
+   into a String before used in a datastructure that relies on hashCode / equals.
+   (Simon Willnauer)
+  
 Changes in Runtime Behavior

 * LUCENE-3698: FastVectorHighlighter no longer adds a multi value separator
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java
@ -25,6 +25,7 @@ import org.apache.lucene.util.BytesRef;

 /**
 * This wrapper buffers incoming elements.
+ * @lucene.experimental
 */
 public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {
  // TODO keep this for now
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java
@ -18,81 +18,113 @@ package org.apache.lucene.search.suggest;
 */

 import java.io.IOException;
+import java.util.Arrays;
 import java.util.Comparator;

 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefIterator;
+import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.SorterTemplate;

-final class BytesRefList {
-
+/**
+ * A simple append only random-access {@link BytesRef} array that stores full
+ * copies of the appended bytes in a {@link ByteBlockPool}.
+ * 
+ * 
+ * <b>Note: This class is not Thread-Safe!</b>
+ * 
+ * @lucene.internal
+ * @lucene.experimental
+ */
+public final class BytesRefList {
+  // TODO rename to BytesRefArray
  private final ByteBlockPool pool;
  private int[] offsets = new int[1];
-  private int currentElement = 0;
+  private int lastElement = 0;
  private int currentOffset = 0;
+  private final Counter bytesUsed = Counter.newCounter(false);
  
+  /**
+   * Creates a new {@link BytesRefList}
+   */
  public BytesRefList() {
-    this(new ByteBlockPool(new ByteBlockPool.DirectAllocator()));
-  }
-
-  public BytesRefList(ByteBlockPool pool) {
-    this.pool = pool;
+    this.pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(
+        bytesUsed));
    pool.nextBuffer();
+    bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
+        + RamUsageEstimator.NUM_BYTES_INT);
  }
 
+  /**
+   * Clears this {@link BytesRefList}
+   */
+  public void clear() {
+    lastElement = 0;
+    currentOffset = 0;
+    Arrays.fill(offsets, 0);
+    pool.reset();
+  }
+  
+  /**
+   * Appends a copy of the given {@link BytesRef} to this {@link BytesRefList}.
+   * @param bytes the bytes to append
+   * @return the ordinal of the appended bytes
+   */
  public int append(BytesRef bytes) {
-    if (currentElement >= offsets.length) {
+    if (lastElement >= offsets.length) {
+      int oldLen = offsets.length;
      offsets = ArrayUtil.grow(offsets, offsets.length + 1);
+      bytesUsed.addAndGet((offsets.length - oldLen)
+          * RamUsageEstimator.NUM_BYTES_INT);
    }
    pool.copy(bytes);
-    offsets[currentElement++] = currentOffset;
+    offsets[lastElement++] = currentOffset;
    currentOffset += bytes.length;
-    return currentElement;
+    return lastElement;
  }
  
+  /**
+   * Returns the current size of this {@link BytesRefList}
+   * @return the current size of this {@link BytesRefList}
+   */
  public int size() {
-    return currentElement;
+    return lastElement;
  }
  
-  public BytesRef get(BytesRef bytes, int pos) {
-    if (currentElement > pos) {
-      bytes.offset = offsets[pos];
-      bytes.length = pos == currentElement - 1 ? currentOffset - bytes.offset
-          : offsets[pos + 1] - bytes.offset;
-      pool.copyFrom(bytes);
-      return bytes;
+  /**
+   * Returns the <i>n'th</i> element of this {@link BytesRefList}
+   * @param spare a spare {@link BytesRef} instance
+   * @param ord the elements ordinal to retrieve 
+   * @return the <i>n'th</i> element of this {@link BytesRefList}
+   */
+  public BytesRef get(BytesRef spare, int ord) {
+    if (lastElement > ord) {
+      spare.offset = offsets[ord];
+      spare.length = ord == lastElement - 1 ? currentOffset - spare.offset
+          : offsets[ord + 1] - spare.offset;
+      pool.copyFrom(spare);
+      return spare;
    }
-    throw new IndexOutOfBoundsException("index " + pos
-        + " must be less than the size: " + currentElement);
+    throw new IndexOutOfBoundsException("index " + ord
+        + " must be less than the size: " + lastElement);
    
  }
  
-  public BytesRefIterator iterator() {
-    final int numElements = currentElement;
-    
-    return new BytesRefIterator() {
-      private final BytesRef spare = new BytesRef();
-      private int pos = 0;
-
-      @Override
-      public BytesRef next() throws IOException {
-        if (pos < numElements) {
-          get(spare, pos++);
-          return spare;
-        }
-        return null;
-      }
-
-      @Override
-      public Comparator<BytesRef> getComparator() {
-        return null;
-      }
-    };
+  /**
+   * Returns the number internally used bytes to hold the appended bytes in
+   * memory
+   * 
+   * @return the number internally used bytes to hold the appended bytes in
+   *         memory
+   */
+  public long bytesUsed() {
+    return bytesUsed.get();
  }
  
-  public int[] sort(final Comparator<BytesRef> comp) {
+  private int[] sort(final Comparator<BytesRef> comp) {
    final int[] orderdEntries = new int[size()];
    for (int i = 0; i < orderdEntries.length; i++) {
      orderdEntries[i] = i;
@ -123,9 +155,52 @@ final class BytesRefList {
        return comp.compare(pivot, get(scratch2, ord));
      }
      
-      private final BytesRef pivot = new BytesRef(),
-        scratch1 = new BytesRef(), scratch2 = new BytesRef();
+      private final BytesRef pivot = new BytesRef(), scratch1 = new BytesRef(),
+          scratch2 = new BytesRef();
    }.quickSort(0, size() - 1);
    return orderdEntries;
  }
+  
+  /**
+   * sugar for {@link #iterator(Comparator)} with a <code>null</code> comparator
+   */
+  public BytesRefIterator iterator() {
+    return iterator(null);
+  }
+  
+  /**
+   * <p>
+   * Returns a {@link BytesRefIterator} with point in time semantics. The
+   * iterator provides access to all so far appended {@link BytesRef} instances.
+   * </p>
+   * <p>
+   * If a non <code>null</code> {@link Comparator} is provided the iterator will
+   * iterate the byte values in the order specified by the comparator. Otherwise
+   * the order is the same as the values were appended.
+   * </p>
+   * <p>
+   * This is a non-destructive operation.
+   * </p>
+   */
+  public BytesRefIterator iterator(final Comparator<BytesRef> comp) {
+    final BytesRef spare = new BytesRef();
+    final int size = size();
+    final int[] ords = comp == null ? null : sort(comp);
+    return new BytesRefIterator() {
+      int pos = 0;
+      
+      @Override
+      public BytesRef next() throws IOException {
+        if (pos < size) {
+          return get(spare, ords == null ? pos++ : ords[pos++]);
+        }
+        return null;
+      }
+      
+      @Override
+      public Comparator<BytesRef> getComparator() {
+        return comp;
+      }
+    };
+  }
 }
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
@ -75,7 +75,11 @@ public class FileDictionary implements Dictionary {
        String[] fields = line.split("\t");
        if (fields.length > 1) {
          // keep reading floats for bw compat
-          curFreq = (int)Float.parseFloat(fields[1]);
+          try {
+            curFreq = Long.parseLong(fields[1]);
+          } catch (NumberFormatException e) {
+            curFreq = (long)Double.parseDouble(fields[1]);
+          }
          spare.copyChars(fields[0]);
        } else {
          spare.copyChars(line);
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
@ -29,15 +29,19 @@ import org.apache.lucene.search.spell.TermFreqIterator;
 import org.apache.lucene.util.BytesRefIterator;
 import org.apache.lucene.util.PriorityQueue;

+/**
+ * Simple Lookup interface for {@link CharSequence} suggestions.
+ * @lucene.experimental
+ */
 public abstract class Lookup {
  /**
   * Result of a lookup.
   */
  public static final class LookupResult implements Comparable<LookupResult> {
    public final CharSequence key;
-    public final float value;
+    public final long value;
    
-    public LookupResult(CharSequence key, float value) {
+    public LookupResult(CharSequence key, long value) {
      this.key = key;
      this.value = value;
    }
@ -112,6 +116,10 @@ public abstract class Lookup {
    build(tfit);
  }
  
+  /**
+   * Builds up a new internal {@link Lookup} representation based on the given {@link TermFreqIterator}.
+   * The implementation might re-sort the data internally.
+   */
  public abstract void build(TermFreqIterator tfit) throws IOException;
  
  /**
@ -124,21 +132,6 @@ public abstract class Lookup {
   */
  public abstract List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num);

-  /**
-   * Modify the lookup data by recording additional data. Optional operation.
-   * @param key new lookup key
-   * @param value value to associate with this key
-   * @return true if new key is added, false if it already exists or operation
-   * is not supported.
-   */
-  public abstract boolean add(CharSequence key, Object value);
-  
-  /**
-   * Get value associated with a specific key.
-   * @param key lookup key
-   * @return associated value
-   */
-  public abstract Object get(CharSequence key);
  
  /**
   * Persist the constructed lookup data to a directory. Optional operation.
@ -173,4 +166,5 @@ public abstract class Lookup {
   * @throws IOException when fatal IO error occurs.
   */
  public abstract boolean load(File storeDir) throws IOException;
+  
 }
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java
@ -17,45 +17,166 @@ package org.apache.lucene.search.suggest;
 * limitations under the License.
 */

+import java.io.File;
 import java.io.IOException;
 import java.util.Comparator;

 import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.search.suggest.fst.Sort;
+import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesReader;
+import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesWriter;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;

 /**
- * This wrapper buffers incoming elements and makes sure they are sorted in
- * ascending lexicographic order.
+ * This wrapper buffers incoming elements and makes sure they are sorted based on given comparator.
+ * @lucene.experimental
 */
-public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper {
-  // TODO keep this for now - but the consumer should really sort this stuff on disk with sorter...
-  private final int[] sortedOrds;
-  private int currentOrd = -1;
-  private final BytesRef spare = new BytesRef();
-  private final Comparator<BytesRef> comp;
+public class SortedTermFreqIteratorWrapper implements TermFreqIterator {
  
-  public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comp) throws IOException {
-    super(source);
-    this.sortedOrds = entries.sort(comp);
-    this.comp = comp;
+  private final TermFreqIterator source;
+  private File tempInput;
+  private File tempSorted;
+  private final ByteSequencesReader reader;
+  private boolean done = false;
+  
+  private long weight;
+  private final BytesRef scratch = new BytesRef();
+  private final Comparator<BytesRef> comparator;
+  
+  public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comparator) throws IOException {
+    this(source, comparator, false);
  }
  
-  @Override
-  public long weight() {
-    return freqs[currentOrd];
+  public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comparator, boolean compareRawBytes) throws IOException {
+    this.source = source;
+    this.comparator = comparator;
+    this.reader = sort(compareRawBytes ? comparator : new BytesOnlyComparator(this.comparator));
  }
  
  @Override
  public BytesRef next() throws IOException {
-    if (++curPos < entries.size()) {
-      return entries.get(spare, (currentOrd = sortedOrds[curPos]));  
+    boolean success = false;
+    if (done) {
+      return null;
+    }
+    try {
+      ByteArrayDataInput input = new ByteArrayDataInput();
+      if (reader.read(scratch)) {
+        weight = decode(scratch, input);
+        success = true;
+        return scratch;
+      }
+      close();
+      success = done = true;
+      return null;
+    } finally {
+      if (!success) {
+        done = true;
+        close();
+      }
    }
-    return null;
  }
  
  @Override
  public Comparator<BytesRef> getComparator() {
-    return comp;
+    return comparator;
+  }
+  
+  @Override
+  public long weight() {
+    return weight;
+  }
+  
+  private Sort.ByteSequencesReader sort(Comparator<BytesRef> comparator) throws IOException {
+    String prefix = getClass().getSimpleName();
+    File directory = Sort.defaultTempDir();
+    tempInput = File.createTempFile(prefix, ".input", directory);
+    tempSorted = File.createTempFile(prefix, ".sorted", directory);
+    
+    final Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
+    boolean success = false;
+    try {
+      BytesRef spare;
+      byte[] buffer = new byte[0];
+      ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
+
+      while ((spare = source.next()) != null) {
+        encode(writer, output, buffer, spare, source.weight());
+      }
+      writer.close();
+      new Sort(comparator).sort(tempInput, tempSorted);
+      ByteSequencesReader reader = new Sort.ByteSequencesReader(tempSorted);
+      success = true;
+      return reader;
+      
+    } finally {
+      if (success) {
+        IOUtils.close(writer);
+      } else {
+        try {
+          IOUtils.closeWhileHandlingException(writer);
+        } finally {
+          close();
+        }
+      }
+      
+    }
+  }
+  
+  private void close() throws IOException {
+    if (tempInput != null) {
+      tempInput.delete();
+    }
+    if (tempSorted != null) {
+      tempSorted.delete();
+    }
+    IOUtils.close(reader);
+  }
+  
+  private final static class BytesOnlyComparator implements Comparator<BytesRef> {
+
+    final Comparator<BytesRef> other;
+    private final BytesRef leftScratch = new BytesRef();
+    private final BytesRef rightScratch = new BytesRef();
+    
+    public BytesOnlyComparator(Comparator<BytesRef> other) {
+      this.other = other;
+    }
+
+    @Override
+    public int compare(BytesRef left, BytesRef right) {
+      wrap(leftScratch, left);
+      wrap(rightScratch, right);
+      return other.compare(leftScratch, rightScratch);
+    }
+    
+    private void wrap(BytesRef wrapper, BytesRef source) {
+      wrapper.bytes = source.bytes;
+      wrapper.offset = source.offset;
+      wrapper.length = source.length - 8;
+      
+    }
+  }
+  
+  protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException {
+    if (spare.length + 8 >= buffer.length) {
+      buffer = ArrayUtil.grow(buffer, spare.length + 8);
+    }
+    output.reset(buffer);
+    output.writeBytes(spare.bytes, spare.offset, spare.length);
+    output.writeLong(weight);
+    writer.write(buffer, 0, output.getPosition());
+  }
+  
+  protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
+    tmpInput.reset(scratch.bytes);
+    tmpInput.skipBytes(scratch.length - 8); // suggestion + separator
+    scratch.length -= 8; // sep + long
+    return tmpInput.readLong();
  }
  
 }
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java
@ -26,6 +26,7 @@ import org.apache.lucene.util.BytesRef;
 /**
 * This wrapper buffers the incoming elements and makes sure they are in
 * random order.
+ * @lucene.experimental
 */
 public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper {
  // TODO keep this for now
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/BytesRefSorter.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/BytesRefSorter.java
@ -18,13 +18,16 @@ package org.apache.lucene.search.suggest.fst;
 */

 import java.io.IOException;
-import java.util.Iterator;
+import java.util.Comparator;

 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;

 /**
 * Collects {@link BytesRef} and then allows one to iterate over their sorted order. Implementations
 * of this interface will be called in a single-threaded scenario.
+ * @lucene.experimental
+ * @lucene.internal  
 */
 public interface BytesRefSorter {
  /**
@ -42,5 +45,7 @@ public interface BytesRefSorter {
   * 
   * @throws IOException If an I/O exception occurs.
   */
-  Iterator<BytesRef> iterator() throws IOException;
+   BytesRefIterator iterator() throws IOException;
+   
+   Comparator<BytesRef> getComparator();
 }
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java
@ -18,14 +18,17 @@ package org.apache.lucene.search.suggest.fst;
 */

 import java.io.*;
-import java.util.Iterator;
-import java.util.NoSuchElementException;
+import java.util.Comparator;

 import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesReader;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
+import org.apache.lucene.util.IOUtils;

 /**
 * Builds and iterates over sequences stored on disk.
+ * @lucene.experimental
+ * @lucene.internal
 */
 public class ExternalRefSorter implements BytesRefSorter, Closeable {
  private final Sort sort;
@ -38,30 +41,31 @@ public class ExternalRefSorter implements BytesRefSorter, Closeable {
   */
  public ExternalRefSorter(Sort sort) throws IOException {
    this.sort = sort;
-    this.input = File.createTempFile("RefSorter-", ".raw", Sort.defaultTempDir());
+    this.input = File.createTempFile("RefSorter-", ".raw",
+        Sort.defaultTempDir());
    this.writer = new Sort.ByteSequencesWriter(input);
  }
  
  @Override
  public void add(BytesRef utf8) throws IOException {
-    if (writer == null)
-      throw new IllegalStateException();
+    if (writer == null) throw new IllegalStateException();
    writer.write(utf8);
  }
  
-  @Override
-  public Iterator<BytesRef> iterator() throws IOException {
+  public BytesRefIterator iterator() throws IOException {
    if (sorted == null) {
      closeWriter();
      
-      sorted = File.createTempFile("RefSorter-", ".sorted", Sort.defaultTempDir());
+      sorted = File.createTempFile("RefSorter-", ".sorted",
+          Sort.defaultTempDir());
      sort.sort(input, sorted);
      
      input.delete();
      input = null;
    }
    
-    return new ByteSequenceIterator(new Sort.ByteSequencesReader(sorted));
+    return new ByteSequenceIterator(new Sort.ByteSequencesReader(sorted),
+        sort.getComparator());
  }
  
  private void closeWriter() throws IOException {
@ -87,36 +91,50 @@ public class ExternalRefSorter implements BytesRefSorter, Closeable {
  /**
   * Iterate over byte refs in a file.
   */
-  class ByteSequenceIterator implements Iterator<BytesRef> {
-    private ByteSequencesReader reader;
-    private byte[] next;
+  class ByteSequenceIterator implements BytesRefIterator {
+    private final ByteSequencesReader reader;
+    private BytesRef scratch = new BytesRef();
+    private final Comparator<BytesRef> comparator;
    
-    public ByteSequenceIterator(ByteSequencesReader reader) throws IOException {
+    public ByteSequenceIterator(ByteSequencesReader reader,
+        Comparator<BytesRef> comparator) {
      this.reader = reader;
-      this.next = reader.read();
+      this.comparator = comparator;
    }
    
    @Override
-    public boolean hasNext() {
-      return next != null;
-    }
-    
-    @Override
-    public BytesRef next() {
-      if (next == null) throw new NoSuchElementException();
-      BytesRef r = new BytesRef(next);
-      try {
-        next = reader.read();
-        if (next == null) {
-          reader.close();
-        }
-      } catch (IOException e) {
-        throw new RuntimeException(e);
+    public BytesRef next() throws IOException {
+      if (scratch == null) {
+        return null;
+      }
+      boolean success = false;
+      try {
+        byte[] next = reader.read();
+        if (next != null) {
+          scratch.bytes = next;
+          scratch.length = next.length;
+          scratch.offset = 0;
+        } else {
+          IOUtils.close(reader);
+          scratch = null;
+        }
+        success = true;
+        return scratch;
+      } finally {
+        if (!success) {
+          IOUtils.closeWhileHandlingException(reader);
+        }
      }
-      return r;
    }
    
    @Override
-    public void remove() { throw new UnsupportedOperationException(); }
+    public Comparator<BytesRef> getComparator() {
+      return comparator;
+    }
+  }
+
+  @Override
+  public Comparator<BytesRef> getComparator() {
+    return sort.getComparator();
  }
 }
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java
@ -28,6 +28,7 @@ import org.apache.lucene.util.fst.FST.Arc;
 * Finite state automata based implementation of "autocomplete" functionality.
 * 
 * @see FSTCompletionBuilder
+ * @lucene.experimental
 */

 // TODO: we could store exact weights as outputs from the FST (int4 encoded
@ -159,10 +160,10 @@ public class FSTCompletion {
   * @param utf8
   *          The sequence of utf8 bytes to follow.
   * 
-   * @return Returns the bucket number of the match or <code>null</code> if no
+   * @return Returns the bucket number of the match or <code>-1</code> if no
   *         match was found.
   */
-  private Integer getExactMatchStartingFromRootArc(
+  private int getExactMatchStartingFromRootArc(
      int rootArcIndex, BytesRef utf8) {
    // Get the UTF-8 bytes representation of the input key.
    try {
@ -186,7 +187,7 @@ public class FSTCompletion {
    }
    
    // No match.
-    return null;
+    return -1;
  }
  
  /**
@ -273,8 +274,8 @@ public class FSTCompletion {
          // exact match, if requested.
          if (exactFirst) {
            if (!checkExistingAndReorder(res, key)) {
-              Integer exactMatchBucket = getExactMatchStartingFromRootArc(i, key);
-              if (exactMatchBucket != null) {
+              int exactMatchBucket = getExactMatchStartingFromRootArc(i, key);
+              if (exactMatchBucket != -1) {
                // Insert as the first result and truncate at num.
                while (res.size() >= num) {
                  res.remove(res.size() - 1);
@ -385,10 +386,10 @@ public class FSTCompletion {
  }

  /**
-   * Returns the bucket assigned to a given key (if found) or <code>null</code> if
+   * Returns the bucket assigned to a given key (if found) or <code>-1</code> if
   * no exact match exists.
   */
-  public Integer getBucket(CharSequence key) {
+  public int getBucket(CharSequence key) {
    return getExactMatchStartingFromRootArc(0, new BytesRef(key));
  }

--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
@ -19,9 +19,9 @@ package org.apache.lucene.search.suggest.fst;

 import java.io.Closeable;
 import java.io.IOException;
-import java.util.Iterator;

 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.fst.*;

@ -98,6 +98,7 @@ import org.apache.lucene.util.fst.*;
 * change, requiring you to rebuild the FST suggest index.
 * 
 * @see FSTCompletion
+ * @lucene.experimental
 */
 public class FSTCompletionBuilder {
  /** 
@ -143,10 +144,11 @@ public class FSTCompletionBuilder {

  /**
   * Creates an {@link FSTCompletion} with default options: 10 buckets, exact match
-   * promoted to first position and {@link InMemorySorter}.
+   * promoted to first position and {@link InMemorySorter} with a comparator obtained from
+   * {@link BytesRef#getUTF8SortedAsUnicodeComparator()}.
   */
  public FSTCompletionBuilder() {
-    this(DEFAULT_BUCKETS, new InMemorySorter(), Integer.MAX_VALUE);
+    this(DEFAULT_BUCKETS, new InMemorySorter(BytesRef.getUTF8SortedAsUnicodeComparator()), Integer.MAX_VALUE);
  }

  /**
@ -237,10 +239,12 @@ public class FSTCompletionBuilder {
        shareMaxTailLength, outputs, null, false);
    
    BytesRef scratch = new BytesRef();
+    BytesRef entry;
    final IntsRef scratchIntsRef = new IntsRef();
    int count = 0;
-    for (Iterator<BytesRef> i = sorter.iterator(); i.hasNext(); count++) {
-      BytesRef entry = i.next();
+    BytesRefIterator iter = sorter.iterator();
+    while((entry = iter.next()) != null) {
+      count++;
      if (scratch.compareTo(entry) != 0) {
        builder.add(Util.toIntsRef(entry, scratchIntsRef), empty);
        scratch.copyBytes(entry);
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
@ -59,6 +59,7 @@ import org.apache.lucene.util.fst.NoOutputs;
 * use {@link FSTCompletion} directly or {@link TSTLookup}, for example.
 * 
 * @see FSTCompletion
+ * @lucene.experimental
 */
 public class FSTCompletionLookup extends Lookup {
  /** 
@ -171,7 +172,7 @@ public class FSTCompletionLookup extends Lookup {
        }

        output.reset(buffer);
-        output.writeInt(FloatMagic.toSortable(tfit.weight()));
+        output.writeInt(encodeWeight(tfit.weight()));
        output.writeBytes(spare.bytes, spare.offset, spare.length);
        writer.write(buffer, 0, output.getPosition());
      }
@ -188,13 +189,13 @@ public class FSTCompletionLookup extends Lookup {
      reader = new Sort.ByteSequencesReader(tempSorted);
      long line = 0;
      int previousBucket = 0;
-      float previousScore = 0;
+      int previousScore = 0;
      ByteArrayDataInput input = new ByteArrayDataInput();
      BytesRef tmp1 = new BytesRef();
      BytesRef tmp2 = new BytesRef();
      while (reader.read(tmp1)) {
        input.reset(tmp1.bytes);
-        float currentScore = FloatMagic.fromSortable(input.readInt());
+        int currentScore = input.readInt();

        int bucket;
        if (line > 0 && currentScore == previousScore) {
@ -231,6 +232,14 @@ public class FSTCompletionLookup extends Lookup {
    }
  }
  
+  /** weight -> cost */
+  private static int encodeWeight(long value) {
+    if (value < Integer.MIN_VALUE || value > Integer.MAX_VALUE) {
+      throw new UnsupportedOperationException("cannot encode value: " + value);
+    }
+    return (int)value;
+  }
+
  @Override
  public List<LookupResult> lookup(CharSequence key, boolean higherWeightsFirst, int num) {
    final List<Completion> completions;
@ -250,19 +259,9 @@ public class FSTCompletionLookup extends Lookup {
    return results;
  }

-  @Override
-  public boolean add(CharSequence key, Object value) {
-    // Not supported.
-    return false;
-  }
-
-  @Override
  public Object get(CharSequence key) {
-    Integer bucket = normalCompletion.getBucket(key);
-    if (bucket == null)
-      return null;
-    else
-      return (float) normalCompletion.getBucket(key) / normalCompletion.getBucketCount();
+    final int bucket = normalCompletion.getBucket(key);
+    return bucket == -1 ? null : Long.valueOf(bucket);
  }

  /**
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FloatMagic.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FloatMagic.java
@ -1,75 +0,0 @@
-package org.apache.lucene.search.suggest.fst;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.util.NumericUtils;
-
-/**
- * Converts normalized float representations ({@link Float#floatToIntBits(float)})
- * into integers that are directly sortable in int4 representation (or unsigned values or
- * after promoting to a long with higher 32-bits zeroed).
- */
-class FloatMagic {
-  /**
-   * Convert a float to a directly sortable unsigned integer. For sortable signed
-   * integers, see {@link NumericUtils#floatToSortableInt(float)}.
-   */
-  public static int toSortable(float f) {
-    return floatBitsToUnsignedOrdered(Float.floatToRawIntBits(f));
-  }
-
-  /**
-   * Back from {@link #toSortable(float)} to float.
-   */
-  public static float fromSortable(int v) {
-    return Float.intBitsToFloat(unsignedOrderedToFloatBits(v));
-  }
-
-  /**
-   * Convert float bits to directly sortable bits. 
-   * Normalizes all NaNs to canonical form.
-   */
-  static int floatBitsToUnsignedOrdered(int v) {
-    // Canonicalize NaN ranges. I assume this check will be faster here than 
-    // (v == v) == false on the FPU? We don't distinguish between different
-    // flavors of NaNs here (see http://en.wikipedia.org/wiki/NaN). I guess
-    // in Java this doesn't matter much anyway.
-    if ((v & 0x7fffffff) > 0x7f800000) {
-      // Apply the logic below to a canonical "quiet NaN"
-      return 0x7fc00000 ^ 0x80000000;
-    }
-
-    if (v < 0) {
-      // Reverse the order of negative values and push them before positive values.
-      return ~v;
-    } else {
-      // Shift positive values after negative, but before NaNs, they're sorted already.
-      return v ^ 0x80000000;
-    }
-  }
-
-  /**
-   * Back from {@link #floatBitsToUnsignedOrdered(int)}.
-   */
-  static int unsignedOrderedToFloatBits(int v) {
-    if (v < 0)
-      return v & ~0x80000000;
-    else
-      return ~v; 
-  }
-}
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/InMemorySorter.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/InMemorySorter.java
@ -17,29 +17,40 @@ package org.apache.lucene.search.suggest.fst;
 * limitations under the License.
 */

-import java.util.*;
+import java.util.Comparator;

+import org.apache.lucene.search.suggest.BytesRefList;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;

 /**
 * An {@link BytesRefSorter} that keeps all the entries in memory.
+ * @lucene.experimental
+ * @lucene.internal
 */
 public final class InMemorySorter implements BytesRefSorter {
-  // TODO: use a single byte[] to back up all entries?
-  private final ArrayList<BytesRef> refs = new ArrayList<BytesRef>();
-  
+  private final BytesRefList buffer = new BytesRefList();
  private boolean closed = false;
+  private final Comparator<BytesRef> comparator;
+
+  public InMemorySorter(Comparator<BytesRef> comparator) {
+    this.comparator = comparator;
+  }
  
  @Override
  public void add(BytesRef utf8) {
    if (closed) throw new IllegalStateException();
-    refs.add(BytesRef.deepCopyOf(utf8));
+    buffer.append(utf8);
  }

  @Override
-  public Iterator<BytesRef> iterator() {
+  public BytesRefIterator iterator() {
    closed = true;
-    Collections.sort(refs, BytesRef.getUTF8SortedAsUnicodeComparator());
-    return Collections.unmodifiableCollection(refs).iterator();
+    return buffer.iterator(comparator);
+  }
+
+  @Override
+  public Comparator<BytesRef> getComparator() {
+    return comparator;
  }
 }
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java
@ -20,15 +20,10 @@ package org.apache.lucene.search.suggest.fst;
 import java.io.*;
 import java.util.*;

+import org.apache.lucene.search.suggest.BytesRefList;
 import org.apache.lucene.util.*;
 import org.apache.lucene.util.PriorityQueue;

-// TODO: the buffer is currently byte[][] which with very small arrays will terribly overallocate
-// memory (alignments) and make GC very happy.
-// 
-// We could move it to a single byte[] + and use custom sorting, but we'd need to check if this
-// yields any improvement first.
-
 /**
 * On-disk sorting of byte arrays. Each byte array (entry) is a composed of the following
 * fields:
@ -38,6 +33,8 @@ import org.apache.lucene.util.PriorityQueue;
 * </ul>
 * 
 * @see #sort(File, File)
+ * @lucene.experimental
+ * @lucene.internal
 */
 public final class Sort {
  public final static int MB = 1024 * 1024;
@ -59,11 +56,6 @@ public final class Sort {
   */
  public final static int MAX_TEMPFILES = 128;

-  /**
-   * Minimum slot buffer expansion.
-   */
-  private final static int MIN_EXPECTED_GROWTH = 1000;
-
  /** 
   * A bit more descriptive unit for constructors.
   * 
@ -111,21 +103,6 @@ public final class Sort {
    }
  }

-  /**
-   * byte[] in unsigned byte order.
-   */
-  static final Comparator<byte[]> unsignedByteOrderComparator = new Comparator<byte[]>() {
-    public int compare(byte[] left, byte[] right) {
-      final int max = Math.min(left.length, right.length);
-      for (int i = 0, j = 0; i < max; i++, j++) {
-        int diff = (left[i]  & 0xff) - (right[j] & 0xff); 
-        if (diff != 0) 
-          return diff;
-      }
-      return left.length - right.length;
-    }
-  };
-
  /**
   * Sort info (debugging mostly).
   */
@ -149,14 +126,15 @@ public final class Sort {
    }
  }

-  private final static byte [][] EMPTY = new byte [0][];
-
  private final BufferSize ramBufferSize;
  private final File tempDirectory;
  
-  private byte [][] buffer = new byte [0][];
+  private final BytesRefList buffer = new BytesRefList();
  private SortInfo sortInfo;
  private int maxTempFiles;
+  private final Comparator<BytesRef> comparator;
+  
+  public static final Comparator<BytesRef> DEFAULT_COMPARATOR = BytesRef.getUTF8SortedAsUnicodeComparator();

  /**
   * Defaults constructor.
@ -165,13 +143,17 @@ public final class Sort {
   * @see BufferSize#automatic()
   */
  public Sort() throws IOException {
-    this(BufferSize.automatic(), defaultTempDir(), MAX_TEMPFILES);
+    this(DEFAULT_COMPARATOR, BufferSize.automatic(), defaultTempDir(), MAX_TEMPFILES);
+  }
+  
+  public Sort(Comparator<BytesRef> comparator) throws IOException {
+    this(comparator, BufferSize.automatic(), defaultTempDir(), MAX_TEMPFILES);
  }

  /**
   * All-details constructor.
   */
-  public Sort(BufferSize ramBufferSize, File tempDirectory, int maxTempfiles) {
+  public Sort(Comparator<BytesRef> comparator, BufferSize ramBufferSize, File tempDirectory, int maxTempfiles) {
    if (ramBufferSize.bytes < ABSOLUTE_MIN_SORT_BUFFER_SIZE) {
      throw new IllegalArgumentException(MIN_BUFFER_SIZE_MSG + ": " + ramBufferSize.bytes);
    }
@ -183,6 +165,7 @@ public final class Sort {
    this.ramBufferSize = ramBufferSize;
    this.tempDirectory = tempDirectory;
    this.maxTempFiles = maxTempfiles;
+    this.comparator = comparator;
  }

  /** 
@ -283,23 +266,25 @@ public final class Sort {

  /** Sort a single partition in-memory. */
  protected File sortPartition(int len) throws IOException {
-    byte [][] data = this.buffer;
+    BytesRefList data = this.buffer;
    File tempFile = File.createTempFile("sort", "partition", tempDirectory);

    long start = System.currentTimeMillis();
-    Arrays.sort(data, 0, len, unsignedByteOrderComparator);
    sortInfo.sortTime += (System.currentTimeMillis() - start);
    
-    ByteSequencesWriter out = new ByteSequencesWriter(tempFile);
+    final ByteSequencesWriter out = new ByteSequencesWriter(tempFile);
+    BytesRef spare;
    try {
-      for (int i = 0; i < len; i++) {
-        assert data[i].length <= Short.MAX_VALUE;
-        out.write(data[i]);
+      BytesRefIterator iter = buffer.iterator(comparator);
+      while((spare = iter.next()) != null) {
+        assert spare.length <= Short.MAX_VALUE;
+        out.write(spare);
      }
+      
      out.close();

      // Clean up the buffer for the next partition.
-      this.buffer = EMPTY;
+      data.clear();
      return tempFile;
    } finally {
      IOUtils.close(out);
@ -314,7 +299,7 @@ public final class Sort {

    PriorityQueue<FileAndTop> queue = new PriorityQueue<FileAndTop>(merges.size()) {
      protected boolean lessThan(FileAndTop a, FileAndTop b) {
-        return a.current.compareTo(b.current) < 0;
+        return comparator.compare(a.current, b.current) < 0;
      }
    };

@ -359,33 +344,18 @@ public final class Sort {
  /** Read in a single partition of data */
  int readPartition(ByteSequencesReader reader) throws IOException {
    long start = System.currentTimeMillis();
-
-    // We will be reallocating from scratch.
-    Arrays.fill(this.buffer, null);
-
-    int bytesLimit = this.ramBufferSize.bytes;
-    byte [][] data = this.buffer;
-    byte[] line;
-    int linesRead = 0;
-    while ((line = reader.read()) != null) {
-      if (linesRead + 1 >= data.length) {
-        data = Arrays.copyOf(data,
-            ArrayUtil.oversize(linesRead + MIN_EXPECTED_GROWTH, 
-                RamUsageEstimator.NUM_BYTES_OBJECT_REF));
-      }
-      data[linesRead++] = line;
-
+    final BytesRef scratch = new BytesRef();
+    while ((scratch.bytes = reader.read()) != null) {
+      scratch.length = scratch.bytes.length; 
+      buffer.append(scratch);
      // Account for the created objects.
      // (buffer slots do not account to buffer size.) 
-      bytesLimit -= line.length + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;
-      if (bytesLimit < 0) {
+      if (ramBufferSize.bytes < buffer.bytesUsed()) {
        break;
      }
    }
-    this.buffer = data;
-
    sortInfo.readTime += (System.currentTimeMillis() - start);
-    return linesRead;
+    return buffer.size();
  }

  static class FileAndTop {
@ -516,4 +486,8 @@ public final class Sort {
      }
    }
  }
+
+  public Comparator<BytesRef> getComparator() {
+    return comparator;
+  }  
 }
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
@ -28,6 +28,8 @@ import java.util.List;

 import org.apache.lucene.search.spell.TermFreqIterator;
 import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper;
+import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesWriter;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.ByteArrayDataOutput;
 import org.apache.lucene.store.InputStreamDataInput;
@ -102,72 +104,27 @@ public class WFSTCompletionLookup extends Lookup {
  
  @Override
  public void build(TermFreqIterator iterator) throws IOException {
-    String prefix = getClass().getSimpleName();
-    File directory = Sort.defaultTempDir();
-    File tempInput = File.createTempFile(prefix, ".input", directory);
-    File tempSorted = File.createTempFile(prefix, ".sorted", directory);
-    
-    Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
-    Sort.ByteSequencesReader reader = null;
    BytesRef scratch = new BytesRef();
+    TermFreqIterator iter = new WFSTTermFreqIteratorWrapper(iterator,
+        BytesRef.getUTF8SortedAsUnicodeComparator());
+    IntsRef scratchInts = new IntsRef();
+    BytesRef previous = null;
+    PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+    Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
+    while ((scratch = iter.next()) != null) {
+      long cost = iter.weight();
      
-    boolean success = false;
-    try {
-      byte [] buffer = new byte [0];
-      ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
-      BytesRef spare;
-      while ((spare = iterator.next()) != null) {
-        if (spare.length + 5 >= buffer.length) {
-          buffer = ArrayUtil.grow(buffer, spare.length + 5);
-        }
-
-        output.reset(buffer);
-        output.writeBytes(spare.bytes, spare.offset, spare.length);
-        output.writeByte((byte)0); // separator: not used, just for sort order
-        output.writeInt((int)encodeWeight(iterator.weight()));
-        writer.write(buffer, 0, output.getPosition());
+      if (previous == null) {
+        previous = new BytesRef();
+      } else if (scratch.equals(previous)) {
+        continue; // for duplicate suggestions, the best weight is actually
+                  // added
      }
-      writer.close();
-      new Sort().sort(tempInput, tempSorted);
-      reader = new Sort.ByteSequencesReader(tempSorted);
-      
-      PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
-      Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
-      
-      BytesRef previous = null;
-      BytesRef suggestion = new BytesRef();
-      IntsRef scratchInts = new IntsRef();
-      ByteArrayDataInput input = new ByteArrayDataInput();
-      while (reader.read(scratch)) {
-        suggestion.bytes = scratch.bytes;
-        suggestion.offset = scratch.offset;
-        suggestion.length = scratch.length - 5; // int + separator
-
-        input.reset(scratch.bytes);
-        input.skipBytes(suggestion.length + 1); // suggestion + separator
-        long cost = input.readInt();
-   
-        if (previous == null) {
-          previous = new BytesRef();
-        } else if (suggestion.equals(previous)) {
-          continue; // for duplicate suggestions, the best weight is actually added
-        }
-        Util.toIntsRef(suggestion, scratchInts);
-        builder.add(scratchInts, cost);
-        previous.copyBytes(suggestion);
-      }
-      fst = builder.finish();
-      success = true;
-    } finally {
-      if (success) {
-        IOUtils.close(reader, writer);
-      } else {
-        IOUtils.closeWhileHandlingException(reader, writer);
-      }
-      
-      tempInput.delete();
-      tempSorted.delete();
+      Util.toIntsRef(scratch, scratchInts);
+      builder.add(scratchInts, cost);
+      previous.copyBytes(scratch);
    }
+    fst = builder.finish();
  }

  @Override
@ -270,16 +227,10 @@ public class WFSTCompletionLookup extends Lookup {
    return output;
  }
  
-  @Override
-  public boolean add(CharSequence key, Object value) {
-    return false; // Not supported.
-  }
-
  /**
   * Returns the weight associated with an input string,
   * or null if it does not exist.
   */
-  @Override
  public Object get(CharSequence key) {
    Arc<Long> arc = new Arc<Long>();
    Long result = null;
@ -289,23 +240,51 @@ public class WFSTCompletionLookup extends Lookup {
    if (result == null || !arc.isFinal()) {
      return null;
    } else {
-      return decodeWeight(result + arc.nextFinalOutput);
+      return Integer.valueOf(decodeWeight(result + arc.nextFinalOutput));
    }
  }
  
  /** cost -> weight */
-  private static float decodeWeight(long encoded) {
-    return Integer.MAX_VALUE - encoded;
+  private static int decodeWeight(long encoded) {
+    return (int)(Integer.MAX_VALUE - encoded);
  }
  
  /** weight -> cost */
-  private static long encodeWeight(float value) {
-    if (Float.isNaN(value) || Float.isInfinite(value) || value < 0 || value > Integer.MAX_VALUE) {
+  private static int encodeWeight(long value) {
+    if (value < 0 || value > Integer.MAX_VALUE) {
      throw new UnsupportedOperationException("cannot encode value: " + value);
    }
    return Integer.MAX_VALUE - (int)value;
  }
  
+  private final class WFSTTermFreqIteratorWrapper extends SortedTermFreqIteratorWrapper {
+
+    WFSTTermFreqIteratorWrapper(TermFreqIterator source,
+        Comparator<BytesRef> comparator) throws IOException {
+      super(source, comparator, true);
+    }
+
+    @Override
+    protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException {
+      if (spare.length + 5 >= buffer.length) {
+        buffer = ArrayUtil.grow(buffer, spare.length + 5);
+      }
+      output.reset(buffer);
+      output.writeBytes(spare.bytes, spare.offset, spare.length);
+      output.writeByte((byte)0); // separator: not used, just for sort order
+      output.writeInt(encodeWeight(weight));
+      writer.write(buffer, 0, output.getPosition());
+    }
+    
+    @Override
+    protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
+      tmpInput.reset(scratch.bytes);
+      tmpInput.skipBytes(scratch.length - 4); // suggestion + separator
+      scratch.length -= 5; // sep + long
+      return tmpInput.readInt();
+    }
+  }
+  
  static final Comparator<Long> weightComparator = new Comparator<Long> () {
    public int compare(Long left, Long right) {
      return left.compareTo(right);
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
@ -55,24 +55,22 @@ public class JaspellLookup extends Lookup {
    final CharsRef charsSpare = new CharsRef();

    while ((spare = tfit.next()) != null) {
-      float freq = tfit.weight();
+      final long weight = tfit.weight();
      if (spare.length == 0) {
        continue;
      }
      charsSpare.grow(spare.length);
      UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
-      trie.put(charsSpare.toString(), new Float(freq));
+      trie.put(charsSpare.toString(), Long.valueOf(weight));
    }
  }

-  @Override
  public boolean add(CharSequence key, Object value) {
    trie.put(key, value);
    // XXX
    return false;
  }

-  @Override
  public Object get(CharSequence key) {
    return trie.get(key);
  }
@ -95,7 +93,7 @@ public class JaspellLookup extends Lookup {
    if (onlyMorePopular) {
      LookupPriorityQueue queue = new LookupPriorityQueue(num);
      for (String s : list) {
-        float freq = (Float)trie.get(s);
+        long freq = ((Number)trie.get(s)).longValue();
        queue.insertWithOverflow(new LookupResult(new CharsRef(s), freq));
      }
      for (LookupResult lr : queue.getResults()) {
@ -104,7 +102,7 @@ public class JaspellLookup extends Lookup {
    } else {
      for (int i = 0; i < maxCnt; i++) {
        String s = list.get(i);
-        float freq = (Float)trie.get(s);
+        long freq = ((Number)trie.get(s)).longValue();
        res.add(new LookupResult(new CharsRef(s), freq));
      }      
    }
@ -131,7 +129,7 @@ public class JaspellLookup extends Lookup {
    node.splitchar = in.readChar();
    byte mask = in.readByte();
    if ((mask & HAS_VALUE) != 0) {
-      node.data = new Float(in.readFloat());
+      node.data = Long.valueOf(in.readLong());
    }
    if ((mask & LO_KID) != 0) {
      TSTNode kid = trie.new TSTNode('\0', node);
@ -171,7 +169,7 @@ public class JaspellLookup extends Lookup {
    if (node.data != null) mask |= HAS_VALUE;
    out.writeByte(mask);
    if (node.data != null) {
-      out.writeFloat((Float)node.data);
+      out.writeLong(((Number)node.data).longValue());
    }
    writeRecursively(out, node.relatives[TSTNode.LOKID]);
    writeRecursively(out, node.relatives[TSTNode.EQKID]);
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
@ -50,26 +50,24 @@ public class TSTLookup extends Lookup {
    }

    ArrayList<String> tokens = new ArrayList<String>();
-    ArrayList<Float> vals = new ArrayList<Float>();
+    ArrayList<Number> vals = new ArrayList<Number>();
    BytesRef spare;
    CharsRef charsSpare = new CharsRef();
    while ((spare = tfit.next()) != null) {
      charsSpare.grow(spare.length);
      UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
      tokens.add(charsSpare.toString());
-      vals.add(new Float(tfit.weight()));
+      vals.add(Long.valueOf(tfit.weight()));
    }
    autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
  }

-  @Override
  public boolean add(CharSequence key, Object value) {
    autocomplete.insert(root, key, value, 0);
    // XXX we don't know if a new node was created
    return true;
  }

-  @Override
  public Object get(CharSequence key) {
    List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0);
    if (list == null || list.isEmpty()) {
@ -107,7 +105,7 @@ public class TSTLookup extends Lookup {
    if (onlyMorePopular) {
      LookupPriorityQueue queue = new LookupPriorityQueue(num);
      for (TernaryTreeNode ttn : list) {
-        queue.insertWithOverflow(new LookupResult(ttn.token, (Float)ttn.val));
+        queue.insertWithOverflow(new LookupResult(ttn.token, ((Number)ttn.val).longValue()));
      }
      for (LookupResult lr : queue.getResults()) {
        res.add(lr);
@ -115,7 +113,7 @@ public class TSTLookup extends Lookup {
    } else {
      for (int i = 0; i < maxCnt; i++) {
        TernaryTreeNode ttn = list.get(i);
-        res.add(new LookupResult(ttn.token, (Float)ttn.val));
+        res.add(new LookupResult(ttn.token, ((Number)ttn.val).longValue()));
      }
    }
    return res;
@ -146,7 +144,7 @@ public class TSTLookup extends Lookup {
      node.token = in.readUTF();
    }
    if ((mask & HAS_VALUE) != 0) {
-      node.val = new Float(in.readFloat());
+      node.val = Long.valueOf(in.readLong());
    }
    if ((mask & LO_KID) != 0) {
      node.loKid = new TernaryTreeNode();
@ -184,7 +182,7 @@ public class TSTLookup extends Lookup {
    if (node.val != null) mask |= HAS_VALUE;
    out.writeByte(mask);
    if (node.token != null) out.writeUTF(node.token);
-    if (node.val != null) out.writeFloat((Float)node.val);
+    if (node.val != null) out.writeLong(((Number)node.val).longValue());
    // recurse and write kids
    if (node.loKid != null) {
      writeRecursively(out, node.loKid);
--- a/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java
+++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java
@ -17,8 +17,10 @@
 package org.apache.lucene.search.suggest;

 import java.io.File;
+import java.util.List;

 import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.Lookup.LookupResult;
 import org.apache.lucene.search.suggest.fst.FSTCompletionLookup;
 import org.apache.lucene.search.suggest.jaspell.JaspellLookup;
 import org.apache.lucene.search.suggest.tst.TSTLookup;
@ -74,16 +76,18 @@ public class PersistenceTest extends LuceneTestCase {
    lookup.load(storeDir);

    // Assert validity.
-    float previous = Float.NEGATIVE_INFINITY;
+    long previous = Long.MIN_VALUE;
    for (TermFreq k : keys) {
-      Float val = (Float) lookup.get(_TestUtil.bytesToCharSequence(k.term, random));
-      assertNotNull(k.term.utf8ToString(), val);
+      List<LookupResult> list = lookup.lookup(_TestUtil.bytesToCharSequence(k.term, random), false, 1);
+      assertEquals(1, list.size());
+      LookupResult lookupResult = list.get(0);
+      assertNotNull(k.term.utf8ToString(), lookupResult.key);

      if (supportsExactWeights) { 
-        assertEquals(k.term.utf8ToString(), Float.valueOf(k.v), val);
+        assertEquals(k.term.utf8ToString(), k.v, lookupResult.value);
      } else {
-        assertTrue(val + ">=" + previous, val >= previous);
-        previous = val.floatValue();
+        assertTrue(lookupResult.value + ">=" + previous, lookupResult.value >= previous);
+        previous = lookupResult.value;
      }
    }
  }
--- a/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java
+++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java
@ -33,55 +33,75 @@ public class TestBytesRefList extends LuceneTestCase {
  public void testAppend() throws IOException {
    BytesRefList list = new BytesRefList();
    List<String> stringList = new ArrayList<String>();
-    int entries = atLeast(500);
-    BytesRef spare = new BytesRef();
-    for (int i = 0; i < entries; i++) {
-      String randomRealisticUnicodeString = _TestUtil
-          .randomRealisticUnicodeString(random);
-      spare.copyChars(randomRealisticUnicodeString);
-      list.append(spare);
-      stringList.add(randomRealisticUnicodeString);
-    }
-    for (int i = 0; i < entries; i++) {
-      assertNotNull(list.get(spare, i));
-      assertEquals("entry " + i + " doesn't match", stringList.get(i),
-          spare.utf8ToString());
-    }
+    for (int j = 0; j < 2; j++) {
+      if (j > 0 && random.nextBoolean()) {
+        list.clear();
+        stringList.clear();
+      }
+      int entries = atLeast(500);
+      BytesRef spare = new BytesRef();
+      for (int i = 0; i < entries; i++) {
+        String randomRealisticUnicodeString = _TestUtil
+            .randomRealisticUnicodeString(random);
+        spare.copyChars(randomRealisticUnicodeString);
+        list.append(spare);
+        stringList.add(randomRealisticUnicodeString);
+      }
+      for (int i = 0; i < entries; i++) {
+        assertNotNull(list.get(spare, i));
+        assertEquals("entry " + i + " doesn't match", stringList.get(i),
+            spare.utf8ToString());
+      }
      
-    // check random
-    for (int i = 0; i < entries; i++) {
-      int e = random.nextInt(entries);
-      assertNotNull(list.get(spare, e));
-      assertEquals("entry " + i + " doesn't match", stringList.get(e),
-          spare.utf8ToString());
-    }
-    for (int i = 0; i < 2; i++) {
+      // check random
+      for (int i = 0; i < entries; i++) {
+        int e = random.nextInt(entries);
+        assertNotNull(list.get(spare, e));
+        assertEquals("entry " + i + " doesn't match", stringList.get(e),
+            spare.utf8ToString());
+      }
+      for (int i = 0; i < 2; i++) {
        
-      BytesRefIterator iterator = list.iterator();
-      for (String string : stringList) {
-        assertEquals(string, iterator.next().utf8ToString());
+        BytesRefIterator iterator = list.iterator();
+        for (String string : stringList) {
+          assertEquals(string, iterator.next().utf8ToString());
+        }
      }
    }
  }
  
-  public void testSort() {
+  public void testSort() throws IOException {
    BytesRefList list = new BytesRefList();
    List<String> stringList = new ArrayList<String>();
-    int entries = atLeast(500);
-    BytesRef spare = new BytesRef();
-    for (int i = 0; i < entries; i++) {
-      String randomRealisticUnicodeString = _TestUtil.randomRealisticUnicodeString(random);
-      spare.copyChars(randomRealisticUnicodeString);
-      list.append(spare);
-      stringList.add(randomRealisticUnicodeString);
-    }
-    Collections.sort(stringList);
-    int[] sortedOrds = list.sort(BytesRef.getUTF8SortedAsUTF16Comparator());
-    for (int i = 0; i < entries; i++) {
-      assertNotNull(list.get(spare, sortedOrds[i]));
-      assertEquals("entry " + i + " doesn't match", stringList.get(i),
-          spare.utf8ToString());
+
+    for (int j = 0; j < 2; j++) {
+      if (j > 0 && random.nextBoolean()) {
+        list.clear();
+        stringList.clear();
+      }
+      int entries = atLeast(500);
+      BytesRef spare = new BytesRef();
+      for (int i = 0; i < entries; i++) {
+        String randomRealisticUnicodeString = _TestUtil
+            .randomRealisticUnicodeString(random);
+        spare.copyChars(randomRealisticUnicodeString);
+        list.append(spare);
+        stringList.add(randomRealisticUnicodeString);
+      }
+      
+      Collections.sort(stringList);
+      BytesRefIterator iter = list.iterator(BytesRef
+          .getUTF8SortedAsUTF16Comparator());
+      int i = 0;
+      while ((spare = iter.next()) != null) {
+        assertEquals("entry " + i + " doesn't match", stringList.get(i),
+            spare.utf8ToString());
+        i++;
+      }
+      assertNull(iter.next());
+      assertEquals(i, stringList.size());
    }
    
  }
+  
 }
--- a/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java
+++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java
@ -17,12 +17,16 @@ package org.apache.lucene.search.suggest;
 * the License.
 */

+import java.util.Comparator;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.TreeMap;

 import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;

@ -38,7 +42,8 @@ public class TestTermFreqIterator extends LuceneTestCase {
  public void testTerms() throws Exception {
    int num = atLeast(10000);
    
-    TreeMap<BytesRef,Long> sorted = new TreeMap<BytesRef,Long>();
+    Comparator<BytesRef> comparator = random.nextBoolean() ? BytesRef.getUTF8SortedAsUnicodeComparator() : BytesRef.getUTF8SortedAsUTF16Comparator();
+    TreeMap<BytesRef,Long> sorted = new TreeMap<BytesRef,Long>(comparator);
    TermFreq[] unsorted = new TermFreq[num];

    for (int i = 0; i < num; i++) {
@ -52,13 +57,13 @@ public class TestTermFreqIterator extends LuceneTestCase {
    }
    
    // test the sorted iterator wrapper
-    TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted), BytesRef.getUTF8SortedAsUnicodeComparator());
+    TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted), comparator);
    Iterator<Map.Entry<BytesRef,Long>> expected = sorted.entrySet().iterator();
    while (expected.hasNext()) {
      Map.Entry<BytesRef,Long> entry = expected.next();
      
      assertEquals(entry.getKey(), wrapper.next());
-      assertEquals(entry.getValue().longValue(), wrapper.weight(), 0F);
+      assertEquals(entry.getValue().longValue(), wrapper.weight());
    }
    assertNull(wrapper.next());
    
@ -72,4 +77,57 @@ public class TestTermFreqIterator extends LuceneTestCase {
    }
    assertEquals(sorted, actual);
  }
+  
+  
+  public void testRaw() throws Exception {
+    int num = atLeast(10000);
+    
+    Comparator<BytesRef> comparator = BytesRef.getUTF8SortedAsUnicodeComparator();
+    BytesRefHash sorted = new BytesRefHash();
+    TermFreq[] unsorted = new TermFreq[num];
+    byte[] buffer = new byte[0];
+    ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
+
+    for (int i = 0; i < num; i++) {
+      BytesRef spare;
+      long weight;
+      do {
+        spare = new BytesRef(_TestUtil.randomUnicodeString(random));
+        if (spare.length + 8 >= buffer.length) {
+          buffer = ArrayUtil.grow(buffer, spare.length + 8);
+        }
+        output.reset(buffer);
+        output.writeBytes(spare.bytes, spare.offset, spare.length);
+        weight = random.nextLong();
+        output.writeLong(weight);
+        
+      } while (sorted.add(new BytesRef(buffer, 0, output.getPosition())) < 0);
+      unsorted[i] = new TermFreq(spare, weight);
+    }
+    
+    // test the sorted iterator wrapper
+    TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted), comparator, true);
+    int[] sort = sorted.sort(comparator);
+    int size = sorted.size();
+    BytesRef spare = new BytesRef();
+    for (int i = 0; i < size; i++) {
+      sorted.get(sort[i], spare);
+      spare.length -= 8; // sub the long value
+      assertEquals(spare, wrapper.next());
+      spare.offset = spare.offset + spare.length;
+      spare.length = 8;
+      assertEquals(asLong(spare), wrapper.weight());
+    }
+    assertNull(wrapper.next());
+  }
+  
+  public static long asLong(BytesRef b) {
+    return (((long) asIntInternal(b, b.offset) << 32) | asIntInternal(b,
+        b.offset + 4) & 0xFFFFFFFFL);
+  }
+
+  private static int asIntInternal(BytesRef b, int pos) {
+    return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16)
+        | ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF);
+  }
 }
--- a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/BytesRefSortersTest.java
+++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/BytesRefSortersTest.java
@ -17,9 +17,8 @@ package org.apache.lucene.search.suggest.fst;
 * limitations under the License.
 */

-import java.util.Iterator;
-
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
 import org.apache.lucene.util.LuceneTestCase;
 import org.junit.Test;

@ -31,7 +30,7 @@ public class BytesRefSortersTest extends LuceneTestCase {

  @Test
  public void testInMemorySorter() throws Exception {
-    check(new InMemorySorter());
+    check(new InMemorySorter(BytesRef.getUTF8SortedAsUnicodeComparator()));
  }

  private void check(BytesRefSorter sorter) throws Exception {
@ -42,8 +41,8 @@ public class BytesRefSortersTest extends LuceneTestCase {
    }

    // Create two iterators and check that they're aligned with each other.
-    Iterator<BytesRef> i1 = sorter.iterator();
-    Iterator<BytesRef> i2 = sorter.iterator();
+    BytesRefIterator i1 = sorter.iterator();
+    BytesRefIterator i2 = sorter.iterator();
    
    // Verify sorter contract.
    try {
@ -52,10 +51,12 @@ public class BytesRefSortersTest extends LuceneTestCase {
    } catch (IllegalStateException e) {
      // Expected.
    }
-
-    while (i1.hasNext() && i2.hasNext()) {
-      assertEquals(i1.next(), i2.next());
+    BytesRef spare1;
+    BytesRef spare2;
+    while ((spare1 = i1.next()) != null && (spare2 = i2.next()) != null) {
+      assertEquals(spare1, spare2);
    }
-    assertEquals(i1.hasNext(), i2.hasNext());
+    assertNull(i1.next());
+    assertNull(i2.next());
  }  
 }
--- a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java
+++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java
@ -165,9 +165,9 @@ public class FSTCompletionTest extends LuceneTestCase {

    // All the weights were constant, so all returned buckets must be constant, whatever they
    // are.
-    Float previous = null; 
+    Long previous = null; 
    for (TermFreq tf : keys) {
-      Float current = (Float)lookup.get(_TestUtil.bytesToCharSequence(tf.term, random));
+      Long current = ((Number)lookup.get(_TestUtil.bytesToCharSequence(tf.term, random))).longValue();
      if (previous != null) {
        assertEquals(previous, current);
      }
@ -181,7 +181,7 @@ public class FSTCompletionTest extends LuceneTestCase {
    FSTCompletionLookup lookup = new FSTCompletionLookup();
    lookup.build(new TermFreqArrayIterator(input));
    for (TermFreq tf : input) {
-      assertTrue("Not found: " + tf.term.toString(), lookup.get(_TestUtil.bytesToCharSequence(tf.term, random)) != null);
+      assertNotNull("Not found: " + tf.term.toString(), lookup.get(_TestUtil.bytesToCharSequence(tf.term, random)));
      assertEquals(tf.term.utf8ToString(), lookup.lookup(_TestUtil.bytesToCharSequence(tf.term, random), true, 1).get(0).key.toString());
    }

--- a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FloatMagicTest.java
+++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FloatMagicTest.java
@ -1,140 +0,0 @@
-package org.apache.lucene.search.suggest.fst;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.*;
-
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.NumericUtils;
-import org.junit.Ignore;
-import org.junit.Test;
-
-public class FloatMagicTest extends LuceneTestCase {
-  public void testFloatMagic() {
-    ArrayList<Float> floats = new ArrayList<Float>(Arrays.asList(
-        Float.intBitsToFloat(0x7f800001), // NaN (invalid combination).
-        Float.intBitsToFloat(0x7fffffff), // NaN (invalid combination).
-        Float.intBitsToFloat(0xff800001), // NaN (invalid combination).
-        Float.intBitsToFloat(0xffffffff), // NaN (invalid combination).
-        Float.POSITIVE_INFINITY,
-        Float.MAX_VALUE,
-        100f,
-        0f,
-        0.1f,
-        Float.MIN_VALUE,
-        Float.NaN,
-        -0.0f,
-        -Float.MIN_VALUE,
-        -0.1f,
-        -1f,
-        -10f,
-        Float.NEGATIVE_INFINITY));
-
-    // Sort them using juc.
-    Collections.sort(floats);
-    
-    // Convert to sortable int4 representation (as long to have an unsigned sort).
-    long [] int4 = new long [floats.size()];
-    for (int i = 0; i < floats.size(); i++) {
-      int4[i] = FloatMagic.toSortable(floats.get(i)) & 0xffffffffL;
-
-      /*
-      System.out.println(
-          String.format("raw %8s sortable %8s %8s numutils %8s %s",
-              Integer.toHexString(Float.floatToRawIntBits(floats.get(i))),
-              Integer.toHexString(FloatMagic.toSortable(floats.get(i))),
-              Integer.toHexString(FloatMagic.unsignedOrderedToFloatBits(FloatMagic.toSortable(floats.get(i)))),
-              Integer.toHexString(NumericUtils.floatToSortableInt(floats.get(i))),
-              floats.get(i)));
-      */
-    }
-
-    // Sort and compare. Should be identical order.
-    Arrays.sort(int4);
-    ArrayList<Float> backFromFixed = new ArrayList<Float>();
-    for (int i = 0; i < int4.length; i++) {
-      backFromFixed.add(FloatMagic.fromSortable((int) int4[i]));
-    }
-
-    /*
-    for (int i = 0; i < int4.length; i++) {
-      System.out.println(
-          floats.get(i) + " " + FloatMagic.fromSortable((int) int4[i]));
-    }
-    */
-    
-    assertEquals(floats, backFromFixed);
-  }
-
-  @Ignore("Once checked, valid forever?") @Test
-  public void testRoundTripFullRange() {
-    int i = 0;
-    do {
-      float f = Float.intBitsToFloat(i);
-      float f2 = FloatMagic.fromSortable(FloatMagic.toSortable(f));
-      
-      if (!((Float.isNaN(f) && Float.isNaN(f2)) || f == f2)) {
-        throw new RuntimeException("! " + Integer.toHexString(i) + "> " + f + " " + f2); 
-      }
-
-      if ((i & 0xffffff) == 0) {
-        System.out.println(Integer.toHexString(i));
-      }
-      
-      i++;
-    } while (i != 0);
-  }
-  
-  @Ignore("Once checked, valid forever?") @Test
-  public void testIncreasingFullRange() {
-    // -infinity ... -0.0
-    for (int i = 0xff800000; i != 0x80000000; i--) {
-      checkSmaller(i, i - 1); 
-    }
-    
-    // -0.0 +0.0
-    checkSmaller(0x80000000, 0);
-
-    // +0.0 ... +infinity
-    for (int i = 0; i != 0x7f800000; i++) {
-      checkSmaller(i, i + 1); 
-    }
-
-    // All other are NaNs and should be after positive infinity.
-    final long infinity = toSortableL(Float.POSITIVE_INFINITY);
-    for (int i = 0x7f800001; i != 0x7fffffff; i++) {
-      assertTrue(infinity < toSortableL(Float.intBitsToFloat(i)));
-    }
-    for (int i = 0xff800001; i != 0xffffffff; i++) {
-      assertTrue(infinity < toSortableL(Float.intBitsToFloat(i)));
-    }
-  }
-
-  private long toSortableL(float f) {
-    return FloatMagic.toSortable(f) & 0xffffffffL;
-  }
-
-  private void checkSmaller(int i1, int i2) {
-    float f1 = Float.intBitsToFloat(i1);
-    float f2 = Float.intBitsToFloat(i2);
-    if (f1 > f2) {
-      throw new AssertionError(f1 + " " + f2 + " " + i1 + " " + i2);
-    }
-    assertTrue(toSortableL(f1) < toSortableL(f2));
-  }
-}
--- a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/TestSort.java
+++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/TestSort.java
@ -20,6 +20,7 @@ package org.apache.lucene.search.suggest.fst;
 import java.io.*;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Comparator;

 import org.apache.lucene.search.suggest.fst.Sort.BufferSize;
 import org.apache.lucene.search.suggest.fst.Sort.ByteSequencesWriter;
@ -61,7 +62,7 @@ public class TestSort extends LuceneTestCase {
  @Test
  public void testIntermediateMerges() throws Exception {
    // Sort 20 mb worth of data with 1mb buffer, binary merging.
-    SortInfo info = checkSort(new Sort(BufferSize.megabytes(1), Sort.defaultTempDir(), 2), 
+    SortInfo info = checkSort(new Sort(Sort.DEFAULT_COMPARATOR, BufferSize.megabytes(1), Sort.defaultTempDir(), 2), 
        generateRandom(Sort.MB * 20));
    assertTrue(info.mergeRounds > 10);
  }
@ -69,7 +70,7 @@ public class TestSort extends LuceneTestCase {
  @Test
  public void testSmallRandom() throws Exception {
    // Sort 20 mb worth of data with 1mb buffer.
-    SortInfo sortInfo = checkSort(new Sort(BufferSize.megabytes(1), Sort.defaultTempDir(), Sort.MAX_TEMPFILES), 
+    SortInfo sortInfo = checkSort(new Sort(Sort.DEFAULT_COMPARATOR, BufferSize.megabytes(1), Sort.defaultTempDir(), Sort.MAX_TEMPFILES), 
        generateRandom(Sort.MB * 20));
    assertEquals(1, sortInfo.mergeRounds);
  }
@ -77,7 +78,7 @@ public class TestSort extends LuceneTestCase {
  @Test @Nightly
  public void testLargerRandom() throws Exception {
    // Sort 100MB worth of data with 15mb buffer.
-    checkSort(new Sort(BufferSize.megabytes(16), Sort.defaultTempDir(), Sort.MAX_TEMPFILES), 
+    checkSort(new Sort(Sort.DEFAULT_COMPARATOR, BufferSize.megabytes(16), Sort.defaultTempDir(), Sort.MAX_TEMPFILES), 
        generateRandom(Sort.MB * 100));
  }

@ -93,13 +94,24 @@ public class TestSort extends LuceneTestCase {
    return bytes;
  }
  
+  static final Comparator<byte[]> unsignedByteOrderComparator = new Comparator<byte[]>() {
+    public int compare(byte[] left, byte[] right) {
+      final int max = Math.min(left.length, right.length);
+      for (int i = 0, j = 0; i < max; i++, j++) {
+        int diff = (left[i]  & 0xff) - (right[j] & 0xff); 
+        if (diff != 0) 
+          return diff;
+      }
+      return left.length - right.length;
+    }
+  };
  /**
   * Check sorting data on an instance of {@link Sort}.
   */
  private SortInfo checkSort(Sort sort, byte[][] data) throws IOException {
    File unsorted = writeAll("unsorted", data);

-    Arrays.sort(data, Sort.unsignedByteOrderComparator);
+    Arrays.sort(data, unsignedByteOrderComparator);
    File golden = writeAll("golden", data);

    File sorted = new File(tempDir, "sorted");
--- a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java
+++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java
@ -117,7 +117,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
      // TODO: could be faster... but its slowCompletor for a reason
      for (Map.Entry<String,Long> e : slowCompletor.entrySet()) {
        if (e.getKey().startsWith(prefix)) {
-          matches.add(new LookupResult(e.getKey(), (float)e.getValue().longValue()));
+          matches.add(new LookupResult(e.getKey(), e.getValue().longValue()));
        }
      }

--- a/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java
+++ b/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java
@ -153,11 +153,6 @@ public class Suggester extends SolrSpellChecker {
    build(core, searcher);
  }

-  public void add(CharsRef query, int numHits) {
-    LOG.info("add " + query + ", " + numHits);
-    lookup.add(query, new Integer(numHits));
-  }
-  
  static SpellingResult EMPTY_RESULT = new SpellingResult();

  @Override
@ -182,7 +177,7 @@ public class Suggester extends SolrSpellChecker {
        Collections.sort(suggestions);
      }
      for (LookupResult lr : suggestions) {
-        res.add(t, lr.key.toString(), ((Number)lr.value).intValue());
+        res.add(t, lr.key.toString(), (int)lr.value);
      }
    }
    return res;