LUCENE-3807: clean up TermFreqIterator API

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1291418 13f79535-47bb-0310-9956-ffa450edef68
2025-03-06 08:19:23 +00:00 · 2012-02-20 19:35:59 +00:00 · 2012-02-20 19:35:59 +00:00 · 1860439f15
commit 1860439f15
parent 630addb415
27 changed files with 753 additions and 390 deletions
--- a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
@ -23,6 +23,7 @@ import java.util.Comparator;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;

 /** Iterator to seek ({@link #seekCeil(BytesRef)}, {@link
 * #seekExact(BytesRef,boolean)}) or step through ({@link
@ -40,7 +41,7 @@ import org.apache.lucene.util.BytesRef;
 * of the <code>seek</code> methods.
 *
 * @lucene.experimental */
-public abstract class TermsEnum {
+public abstract class TermsEnum implements BytesRefIterator {

  private AttributeSource atts = null;

@ -114,14 +115,6 @@ public abstract class TermsEnum {
    }
  }

-  /** Increments the enumeration to the next term.
-   *  Returns the resulting term, or null if the end was
-   *  hit (which means the enum is unpositioned).  The
-   *  returned BytesRef may be re-used across calls to next.
-   *  After this method returns null, do not call it again:
-   *  the results are undefined. */
-  public abstract BytesRef next() throws IOException;
-
  /** Returns current term. Do not call this when the enum
   *  is unpositioned. */
  public abstract BytesRef term() throws IOException;
--- a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java
+++ b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java
@ -280,6 +280,37 @@ public final class ByteBlockPool {
    }  while(true);
  }
  
+  /**
+   *
+   */
+  public final BytesRef copyFrom(final BytesRef bytes) {
+    final int length = bytes.length;
+    final int offset = bytes.offset;
+    bytes.offset = 0;
+    bytes.grow(length);
+    int bufferIndex = offset >> BYTE_BLOCK_SHIFT;
+    byte[] buffer = buffers[bufferIndex];
+    int pos = offset & BYTE_BLOCK_MASK;
+    int overflow = (pos + length) - BYTE_BLOCK_SIZE;
+    do {
+      if (overflow <= 0) {
+        System.arraycopy(buffer, pos, bytes.bytes, bytes.offset, bytes.length);
+        bytes.length = length;
+        bytes.offset = 0;
+        break;
+      } else {
+        final int bytesToCopy = length - overflow;
+        System.arraycopy(buffer, pos, bytes.bytes, bytes.offset, bytesToCopy);
+        pos = 0;
+        bytes.length -= bytesToCopy;
+        bytes.offset += bytesToCopy;
+        buffer = buffers[bufferIndex];
+        overflow = overflow - BYTE_BLOCK_SIZE;
+      }
+    } while (true);
+    return bytes;
+  }
+  
  /**
   * Writes the pools content to the given {@link DataOutput}
   */
--- a/lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java
+++ b/lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java
@ -0,0 +1,52 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+/**
+ * A simple iterator interface for {@link BytesRef} iteration
+ * 
+ */
+public interface BytesRefIterator {
+  
+  public static final BytesRefIterator EMPTY_ITERATOR = new EmptyBytesRefIterator();
+  
+  /**
+   * Increments the iteration to the next {@link BytesRef} in the iterator.
+   * Returns the resulting {@link BytesRef} or <code>null</code> if the end of
+   * the iterator is reached. The returned BytesRef may be re-used across calls
+   * to next. After this method returns null, do not call it again: the results
+   * are undefined.
+   * 
+   * @return the next {@link BytesRef} in the iterator or <code>null</code> if
+   *         the end of the iterator is reached.
+   * @throws IOException
+   */
+  public BytesRef next() throws IOException;
+  
+  public final static class EmptyBytesRefIterator implements BytesRefIterator {
+
+    @Override
+    public BytesRef next() throws IOException {
+      return null;
+    }
+    
+  }
+  
+}
--- a/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java
@ -16,7 +16,7 @@ package org.apache.lucene.search.spell;
 * limitations under the License.
 */

-import java.util.Iterator;
+import org.apache.lucene.util.BytesRefIterator;

 /**
 * A simple interface representing a Dictionary. A Dictionary
@ -30,5 +30,5 @@ public interface Dictionary {
   * Return all words present in the dictionary
   * @return Iterator
   */
-  Iterator<String> getWordsIterator();
+  BytesRefIterator getWordsIterator();
 }
--- a/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
@ -18,12 +18,14 @@
 package org.apache.lucene.search.spell;

 import java.io.IOException;
+import java.util.Comparator;
 import java.util.Iterator;

 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.util.BytesRefIterator;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.UnicodeUtil;
@ -50,14 +52,13 @@ public class HighFrequencyDictionary implements Dictionary {
    this.thresh = thresh;
  }

-  public final Iterator<String> getWordsIterator() {
+  public final BytesRefIterator getWordsIterator() {
    return new HighFrequencyIterator();
  }

  final class HighFrequencyIterator implements TermFreqIterator, SortedIterator {
-    private TermsEnum termsEnum;
-    private BytesRef actualTerm;
-    private boolean hasNextCalled;
+    private final BytesRef spare = new BytesRef();
+    private final TermsEnum termsEnum;
    private int minNumDocs;

    HighFrequencyIterator() {
@ -65,6 +66,8 @@ public class HighFrequencyDictionary implements Dictionary {
        Terms terms = MultiFields.getTerms(reader, field);
        if (terms != null) {
          termsEnum = terms.iterator(null);
+        } else {
+          termsEnum = null;
        }
        minNumDocs = (int)(thresh * (float)reader.numDocs());
      } catch (IOException e) {
@ -83,57 +86,27 @@ public class HighFrequencyDictionary implements Dictionary {
        throw new RuntimeException(ioe);
      }
    }
-    
-    public String next() {
-      if (!hasNextCalled && !hasNext()) {
-        return null;
-      }
-      hasNextCalled = false;

-      if (actualTerm == null) {
-        return null;
-      } else {
-        UnicodeUtil.UTF8toUTF16(actualTerm, spare);
-        return spare.toString();
+
+    @Override
+    public BytesRef next() throws IOException {
+      if (termsEnum != null) {
+        BytesRef next = termsEnum.next();
+        if (next != null && isFrequent(termsEnum.docFreq())) {
+          spare.copyBytes(next);
+          return spare;
+        }
      }
+      return  null;
    }

-    public boolean hasNext() {
-      if (hasNextCalled) {
-        return actualTerm != null;
+    @Override
+    public Comparator<BytesRef> comparator() {
+      try {
+        return termsEnum.getComparator();
+      } catch (IOException e) {
+        throw new RuntimeException(e);
      }
-      hasNextCalled = true;
-
-      if (termsEnum == null) {
-        return false;
-      }
-
-      while(true) {
-
-        try {
-          actualTerm = termsEnum.next();
-        } catch (IOException e) {
-          throw new RuntimeException(e);
-        }
-
-        // if there are no words return false
-        if (actualTerm == null) {
-          return false;
-        }
-
-        // got a valid term, does it pass the threshold?
-        try {
-          if (isFrequent(termsEnum.docFreq())) {
-            return true;
-          }
-        } catch (IOException ioe) {
-          throw new RuntimeException(ioe);
-        }
-      }
-    }
-
-    public void remove() {
-      throw new UnsupportedOperationException();
    }
  }
 }
--- a/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
@ -18,13 +18,7 @@ package org.apache.lucene.search.spell;
 */

 import org.apache.lucene.index.IndexReader;
-
-import java.util.Iterator;
-
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.CharsRef;
-import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.BytesRefIterator;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.MultiFields;

@ -49,50 +43,18 @@ public class LuceneDictionary implements Dictionary {
    this.field = field;
  }

-  public final Iterator<String> getWordsIterator() {
-    return new LuceneIterator();
-  }
-
-
-  final class LuceneIterator implements Iterator<String> {
-    private TermsEnum termsEnum;
-    private BytesRef pendingTerm;
-    private final CharsRef spare = new CharsRef();
-
-    LuceneIterator() {
-      try {
-        final Terms terms = MultiFields.getTerms(reader, field);
-        if (terms != null) {
-          termsEnum = terms.iterator(null);
-          pendingTerm = termsEnum.next();
-        }
-      } catch (IOException e) {
-        throw new RuntimeException(e);
+  public final BytesRefIterator getWordsIterator() {
+    
+    try {
+      final Terms terms = MultiFields.getTerms(reader, field);
+      if (terms != null) {
+        return terms.iterator(null);
+      } else {
+        return BytesRefIterator.EMPTY_ITERATOR;
      }
-    }
-
-    public String next() {
-      if (pendingTerm == null) {
-        return null;
-      }
-
-      UnicodeUtil.UTF8toUTF16(pendingTerm, spare);
-
-      try {
-        pendingTerm = termsEnum.next();
-      } catch (IOException e) {
-        throw new RuntimeException(e);
-      }
-
-      return spare.toString();
-    }
-
-    public boolean hasNext() {
-      return pendingTerm != null;
-    }
-
-    public void remove() {
-      throw new UnsupportedOperationException();
+    } catch (IOException e) {
+      throw new RuntimeException(e);
    }
  }
+  
 }
--- a/modules/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java
@ -21,6 +21,10 @@ package org.apache.lucene.search.spell;
 import java.util.Iterator;
 import java.io.*;

+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
+import org.apache.lucene.util.IOUtils;
+

 /**
 * Dictionary represented by a text file.
@ -33,8 +37,6 @@ import java.io.*;
 public class PlainTextDictionary implements Dictionary {

  private BufferedReader in;
-  private String line;
-  private boolean hasNextCalled;

  public PlainTextDictionary(File file) throws FileNotFoundException {
    in = new BufferedReader(new FileReader(file));
@ -51,31 +53,37 @@ public class PlainTextDictionary implements Dictionary {
    in = new BufferedReader(reader);
  }

-  public Iterator<String> getWordsIterator() {
-    return new fileIterator();
+  public BytesRefIterator getWordsIterator() {
+    return new FileIterator();
  }

-  final class fileIterator implements Iterator<String> {
-    public String next() {
-      if (!hasNextCalled) {
-        hasNext();
+  final class FileIterator implements BytesRefIterator {
+    private boolean done = false;
+    private final BytesRef spare = new BytesRef();
+    @Override
+    public BytesRef next() throws IOException {
+      if (done) {
+        return null;
      }
-      hasNextCalled = false;
-      return line;
-    }
-
-    public boolean hasNext() {
-      hasNextCalled = true;
+      boolean success = false;
+      BytesRef result;
      try {
-        line = in.readLine();
-      } catch (IOException ex) {
-        throw new RuntimeException(ex);
+        String line;
+        if ((line = in.readLine()) != null) {
+          spare.copyChars(line);
+          result = spare;
+        } else {
+          done = true;
+          IOUtils.close(in);
+          result = null;
+        }
+        success = true;
+      } finally {
+        if (!success) {
+          IOUtils.closeWhileHandlingException(in);
+        }
      }
-      return (line != null) ? true : false;
-    }
-
-    public void remove() {
-      throw new UnsupportedOperationException();
+      return result;
    }
  }

--- a/modules/suggest/src/java/org/apache/lucene/search/spell/SortedIterator.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/spell/SortedIterator.java
@ -17,12 +17,17 @@ package org.apache.lucene.search.spell;
 * limitations under the License.
 */

+import java.util.Comparator;
 import java.util.Iterator;

+import org.apache.lucene.util.BytesRef;
+
 /**
 * Marker interface to signal that elements coming from {@link Iterator}
 * come in ascending lexicographic order.
 */
 public interface SortedIterator {
+  
+  public Comparator<BytesRef> comparator();

 }
--- a/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
@ -46,6 +46,7 @@ import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
 import org.apache.lucene.util.ReaderUtil;
 import org.apache.lucene.util.Version;

@ -510,20 +511,18 @@ public class SpellChecker implements java.io.Closeable {
      boolean isEmpty = termsEnums.isEmpty();

      try { 
-        Iterator<String> iter = dict.getWordsIterator();
-        BytesRef currentTerm = new BytesRef();
+        BytesRefIterator iter = dict.getWordsIterator();
+        BytesRef currentTerm;
        
-        terms: while (iter.hasNext()) {
-          String word = iter.next();
+        terms: while ((currentTerm = iter.next()) != null) {
  
+          String word = currentTerm.utf8ToString();
          int len = word.length();
          if (len < 3) {
            continue; // too short we bail but "too long" is fine...
          }
  
          if (!isEmpty) {
-            // we have a non-empty index, check if the term exists
-            currentTerm.copyChars(word);
            for (TermsEnum te : termsEnums) {
              if (te.seekExact(currentTerm, false)) {
                continue terms;
--- a/modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java
@ -17,16 +17,18 @@ package org.apache.lucene.search.spell;
 * limitations under the License.
 */

-import java.util.Iterator;
+import java.io.IOException;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;

-public interface TermFreqIterator extends Iterator<String> {
+public interface TermFreqIterator extends BytesRefIterator {

  public float freq();
  
  public static class TermFreqIteratorWrapper implements TermFreqIterator {
-    private Iterator<String> wrapped;
+    private BytesRefIterator wrapped;
    
-    public TermFreqIteratorWrapper(Iterator<String> wrapped) {
+    public TermFreqIteratorWrapper(BytesRefIterator wrapped) {
      this.wrapped = wrapped;
    }

@ -34,17 +36,8 @@ public interface TermFreqIterator extends Iterator<String> {
      return 1.0f;
    }

-    public boolean hasNext() {
-      return wrapped.hasNext();
+    public BytesRef next() throws IOException {
+      return wrapped.next();
    }
-
-    public String next() {
-      return wrapped.next().toString();
-    }
-
-    public void remove() {
-      throw new UnsupportedOperationException();
-    }
-    
  }
 }
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java
@ -17,65 +17,46 @@ package org.apache.lucene.search.suggest;
 * limitations under the License.
 */

-import java.util.ArrayList;
-import java.util.List;
+import java.io.IOException;

 import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;

 /**
 * This wrapper buffers incoming elements.
 */
 public class BufferingTermFreqIteratorWrapper implements TermFreqIterator {

-  /** Entry in the buffer. */
-  public static final class Entry implements Comparable<Entry> {
-    String word;
-    float freq;
-    
-    public Entry(String word, float freq) {
-      this.word = word;
-      this.freq = freq;
+  protected BytesRefList entries = new BytesRefList();
+  protected int curPos = -1;
+  protected float[] freqs = new float[1];
+  private final BytesRef spare = new BytesRef();
+  public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
+    BytesRef spare;
+    int freqIndex = 0;
+    while((spare = source.next()) != null) {
+      entries.append(spare);
+      if (freqIndex >= freqs.length) {
+        freqs = ArrayUtil.grow(freqs, freqs.length+1);
+      }
+      freqs[freqIndex++] = source.freq();
    }
-    
-    public int compareTo(Entry o) {
-      return word.compareTo(o.word);
-    }    
-  }
-
-  protected ArrayList<Entry> entries = new ArrayList<Entry>();
-  
-  protected int curPos;
-  protected Entry curEntry;
-  
-  public BufferingTermFreqIteratorWrapper(TermFreqIterator source) {
-    // read all source data into buffer
-    while (source.hasNext()) {
-      String w = source.next();
-      Entry e = new Entry(w, source.freq());
-      entries.add(e);
-    }
-    curPos = 0;
+   
  }

  public float freq() {
-    return curEntry.freq;
+    return freqs[curPos];
  }

-  public boolean hasNext() {
-    return curPos < entries.size();
+  @Override
+  public BytesRef next() throws IOException {
+    if (++curPos < entries.size()) {
+      entries.get(spare, curPos);
+      return spare;
+    }
+    return null;
  }

-  public String next() {
-    curEntry = entries.get(curPos);
-    curPos++;
-    return curEntry.word;
-  }
-
-  public void remove() {
-    throw new UnsupportedOperationException("remove is not supported");
-  }
-  
-  public List<Entry> entries() {
-    return entries;
-  }
+ 
 }
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java
@ -0,0 +1,125 @@
+package org.apache.lucene.search.suggest;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+import java.io.IOException;
+import java.util.Comparator;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.ByteBlockPool;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
+import org.apache.lucene.util.SorterTemplate;
+
+final class BytesRefList {
+
+  private final ByteBlockPool pool;
+  private int[] offsets = new int[1];
+  private int currentElement = 0;
+  private int currentOffset = 0;
+
+  public BytesRefList() {
+    this(new ByteBlockPool(new ByteBlockPool.DirectAllocator()));
+  }
+
+  public BytesRefList(ByteBlockPool pool) {
+    this.pool = pool;
+    pool.nextBuffer();
+  }
+
+  public int append(BytesRef bytes) {
+    if (currentElement >= offsets.length) {
+      offsets = ArrayUtil.grow(offsets, offsets.length + 1);
+    }
+    pool.copy(bytes);
+    offsets[currentElement++] = currentOffset;
+    currentOffset += bytes.length;
+    return currentElement;
+  }
+
+  public int size() {
+    return currentElement;
+  }
+
+  public BytesRef get(BytesRef bytes, int pos) {
+    if (currentElement > pos) {
+      bytes.offset = offsets[pos];
+      bytes.length = pos == currentElement - 1 ? currentOffset - bytes.offset
+          : offsets[pos + 1] - bytes.offset;
+      pool.copyFrom(bytes);
+      return bytes;
+    }
+    throw new IndexOutOfBoundsException("index " + pos
+        + " must be less than the size: " + currentElement);
+
+  }
+
+  public BytesRefIterator iterator() {
+    final int numElements = currentElement;
+    
+    return new BytesRefIterator() {
+      private final BytesRef spare = new BytesRef();
+      private int pos = 0;
+
+      @Override
+      public BytesRef next() throws IOException {
+        if (pos < numElements) {
+          get(spare, pos++);
+          return spare;
+        }
+        return null;
+      }
+    };
+  }
+  
+  public int[] sort(final Comparator<BytesRef> comp) {
+    final int[] orderdEntries = new int[size()];
+    for (int i = 0; i < orderdEntries.length; i++) {
+      orderdEntries[i] = i;
+    }
+    new SorterTemplate() {
+      @Override
+      protected void swap(int i, int j) {
+        final int o = orderdEntries[i];
+        orderdEntries[i] = orderdEntries[j];
+        orderdEntries[j] = o;
+      }
+      
+      @Override
+      protected int compare(int i, int j) {
+        final int ord1 = orderdEntries[i], ord2 = orderdEntries[j];
+        return comp.compare(get(scratch1, ord1), get(scratch2, ord2));
+      }
+
+      @Override
+      protected void setPivot(int i) {
+        final int ord = orderdEntries[i];
+        get(pivot, ord);
+      }
+  
+      @Override
+      protected int comparePivot(int j) {
+        final int ord = orderdEntries[j];
+        return comp.compare(pivot, get(scratch2, ord));
+      }
+      
+      private final BytesRef pivot = new BytesRef(),
+        scratch1 = new BytesRef(), scratch2 = new BytesRef();
+    }.quickSort(0, size() - 1);
+    return orderdEntries;
+  }
+}
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
@ -22,6 +22,8 @@ import java.io.*;

 import org.apache.lucene.search.spell.Dictionary;
 import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;


 /**
@ -36,7 +38,7 @@ public class FileDictionary implements Dictionary {

  private BufferedReader in;
  private String line;
-  private boolean hasNextCalled;
+  private boolean done = false;

  public FileDictionary(InputStream dictFile) {
    in = new BufferedReader(new InputStreamReader(dictFile));
@ -50,45 +52,39 @@ public class FileDictionary implements Dictionary {
  }

  public TermFreqIterator getWordsIterator() {
-    return new fileIterator();
+    return new FileIterator();
  }

-  final class fileIterator implements TermFreqIterator {
+  final class FileIterator implements TermFreqIterator {
    private float curFreq;
+    private final BytesRef spare = new BytesRef();
    
-    public String next() {
-      if (!hasNextCalled) {
-        hasNext();
-      }
-      hasNextCalled = false;
-      return line;
-    }
-    
+   
    public float freq() {
      return curFreq;
    }

-    public boolean hasNext() {
-      hasNextCalled = true;
-      try {
-        line = in.readLine();
-        if (line != null) {
-          String[] fields = line.split("\t");
-          if (fields.length > 1) {
-            curFreq = Float.parseFloat(fields[1]);
-            line = fields[0];
-          } else {
-            curFreq = 1;
-          }
-        }
-      } catch (IOException ex) {
-        throw new RuntimeException(ex);
+    @Override
+    public BytesRef next() throws IOException {
+      if (done) {
+        return null;
+      }
+      line = in.readLine();
+      if (line != null) {
+        String[] fields = line.split("\t");
+        if (fields.length > 1) {
+          curFreq = Float.parseFloat(fields[1]);
+          spare.copyChars(fields[0]);
+        } else {
+          spare.copyChars(line);
+          curFreq = 1;
+        }
+        return spare;
+      } else {
+        done = true;
+        IOUtils.close(in);
+        return null;
      }
-      return (line != null) ? true : false;
-    }
-
-    public void remove() {
-      throw new UnsupportedOperationException();
    }
  }

--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
@ -19,11 +19,13 @@ package org.apache.lucene.search.suggest;

 import java.io.File;
 import java.io.IOException;
-import java.util.Iterator;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.util.List;

 import org.apache.lucene.search.spell.Dictionary;
 import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.util.BytesRefIterator;
 import org.apache.lucene.util.PriorityQueue;

 public abstract class Lookup {
@ -77,7 +79,7 @@ public abstract class Lookup {
   * {@link UnsortedTermFreqIteratorWrapper} in such case.
   */
  public void build(Dictionary dict) throws IOException {
-    Iterator<String> it = dict.getWordsIterator();
+    BytesRefIterator it = dict.getWordsIterator();
    TermFreqIterator tfit;
    if (it instanceof TermFreqIterator) {
      tfit = (TermFreqIterator)it;
@ -89,6 +91,52 @@ public abstract class Lookup {
  
  public abstract void build(TermFreqIterator tfit) throws IOException;
  
+  /**
+   * Look up a key and return possible completion for this key.
+   * @param key lookup key. Depending on the implementation this may be
+   * a prefix, misspelling, or even infix.
+   * @param onlyMorePopular return only more popular results
+   * @param num maximum number of results to return
+   * @return a list of possible completions, with their relative weight (e.g. popularity)
+   */
+  // TODO: this should be a BytesRef API?
+  public abstract List<LookupResult> lookup(String key, boolean onlyMorePopular, int num);
+
+  /**
+   * Modify the lookup data by recording additional data. Optional operation.
+   * @param key new lookup key
+   * @param value value to associate with this key
+   * @return true if new key is added, false if it already exists or operation
+   * is not supported.
+   */
+  // TODO: this should be a BytesRef API?
+  public abstract boolean add(String key, Object value);
+  
+  /**
+   * Get value associated with a specific key.
+   * @param key lookup key
+   * @return associated value
+   */
+  // TODO: this should be a BytesRef API?
+  public abstract Object get(String key);
+
+  /**
+   * Persist the constructed lookup data to a directory. Optional operation.
+   * @param output {@link OutputStream} to write the data to.
+   * @return true if successful, false if unsuccessful or not supported.
+   * @throws IOException when fatal IO error occurs.
+   */
+  public abstract boolean store(OutputStream output) throws IOException;
+
+  /**
+   * Discard current lookup data and load it from a previously saved copy.
+   * Optional operation.
+   * @param input the {@link InputStream} to load the lookup data.
+   * @return true if completed successfully, false if unsuccessful or not supported.
+   * @throws IOException when fatal IO error occurs.
+   */
+  public abstract boolean load(InputStream input) throws IOException;
+  
  /**
   * Persist the constructed lookup data to a directory. Optional operation.
   * @param storeDir directory where data can be stored.
@ -105,30 +153,4 @@ public abstract class Lookup {
   * @throws IOException when fatal IO error occurs.
   */
  public abstract boolean load(File storeDir) throws IOException;
-  
-  /**
-   * Look up a key and return possible completion for this key.
-   * @param key lookup key. Depending on the implementation this may be
-   * a prefix, misspelling, or even infix.
-   * @param onlyMorePopular return only more popular results
-   * @param num maximum number of results to return
-   * @return a list of possible completions, with their relative weight (e.g. popularity)
-   */
-  public abstract List<LookupResult> lookup(String key, boolean onlyMorePopular, int num);
-
-  /**
-   * Modify the lookup data by recording additional data. Optional operation.
-   * @param key new lookup key
-   * @param value value to associate with this key
-   * @return true if new key is added, false if it already exists or operation
-   * is not supported.
-   */
-  public abstract boolean add(String key, Object value);
-  
-  /**
-   * Get value associated with a specific key.
-   * @param key lookup key
-   * @return associated value
-   */
-  public abstract Object get(String key);  
 }
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java
@ -17,10 +17,12 @@ package org.apache.lucene.search.suggest;
 * limitations under the License.
 */

-import java.util.Collections;
+import java.io.IOException;
+import java.util.Comparator;

 import org.apache.lucene.search.spell.SortedIterator;
 import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.util.BytesRef;

 /**
 * This wrapper buffers incoming elements and makes sure they are sorted in
@ -28,8 +30,35 @@ import org.apache.lucene.search.spell.TermFreqIterator;
 */
 public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper implements SortedIterator {

-  public SortedTermFreqIteratorWrapper(TermFreqIterator source) {
+  private final int[] sortedOrds;
+  private int currentOrd = -1;
+  private final BytesRef spare = new BytesRef();
+  private final Comparator<BytesRef> comp;
+  
+
+  public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator<BytesRef> comp) throws IOException {
    super(source);
-    Collections.sort(entries);
+    this.sortedOrds = entries.sort(comp);
+    this.comp = comp;
  }
+
+  @Override
+  public float freq() {
+    return freqs[currentOrd];
+  }
+
+  @Override
+  public BytesRef next() throws IOException {
+    if (++curPos < entries.size()) {
+      return entries.get(spare, (currentOrd = sortedOrds[curPos]));  
+    }
+    return null;
+  }
+
+  @Override
+  public Comparator<BytesRef> comparator() {
+    return comp;
+  }
+  
+  
 }
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java
@ -17,9 +17,11 @@ package org.apache.lucene.search.suggest;
 * limitations under the License.
 */

-import java.util.Collections;
+import java.io.IOException;
+import java.util.Random;

 import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.util.BytesRef;

 /**
 * This wrapper buffers the incoming elements and makes sure they are in
@ -27,8 +29,34 @@ import org.apache.lucene.search.spell.TermFreqIterator;
 */
 public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper {

-  public UnsortedTermFreqIteratorWrapper(TermFreqIterator source) {
+  private final int[] ords;
+  private int currentOrd = -1;
+  private final BytesRef spare = new BytesRef();
+  public UnsortedTermFreqIteratorWrapper(TermFreqIterator source) throws IOException {
    super(source);
-    Collections.shuffle(entries);
+    ords = new int[entries.size()];
+    Random random = new Random();
+    for (int i = 0; i < ords.length; i++) {
+      ords[i] = i;
+    }
+    for (int i = 0; i < ords.length; i++) {
+      int randomPosition = random.nextInt(ords.length);
+      int temp = ords[i];
+      ords[i] = ords[randomPosition];
+      ords[randomPosition] = temp;
+    }
+  }
+  
+  @Override
+  public float freq() {
+    return freqs[currentOrd];
+  }
+
+  @Override
+  public BytesRef next() throws IOException {
+    if (++curPos < entries.size()) {
+      return entries.get(spare, (currentOrd = ords[curPos]));  
+    }
+    return null;
  }
 }
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
@ -19,6 +19,8 @@ package org.apache.lucene.search.suggest.fst;

 import java.io.File;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.List;

@ -29,6 +31,8 @@ import org.apache.lucene.search.suggest.fst.Sort.SortInfo;
 import org.apache.lucene.search.suggest.tst.TSTLookup;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.store.InputStreamDataInput;
+import org.apache.lucene.store.OutputStreamDataOutput;
 import org.apache.lucene.util.*;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.NoOutputs;
@ -158,20 +162,17 @@ public class FSTCompletionLookup extends Lookup {
    // If negative floats are allowed some trickery needs to be done to find their byte order.
    boolean success = false;
    try {
-      BytesRef tmp1 = new BytesRef();
      byte [] buffer = new byte [0];
      ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
-      while (tfit.hasNext()) {
-        String key = tfit.next();
-        UnicodeUtil.UTF16toUTF8(key, 0, key.length(), tmp1);
-
-        if (tmp1.length + 4 >= buffer.length) {
-          buffer = ArrayUtil.grow(buffer, tmp1.length + 4);
+      BytesRef spare;
+      while ((spare = tfit.next()) != null) {
+        if (spare.length + 4 >= buffer.length) {
+          buffer = ArrayUtil.grow(buffer, spare.length + 4);
        }

        output.reset(buffer);
        output.writeInt(FloatMagic.toSortable(tfit.freq()));
-        output.writeBytes(tmp1.bytes, tmp1.offset, tmp1.length);
+        output.writeBytes(spare.bytes, spare.offset, spare.length);
        writer.write(buffer, 0, output.getPosition());
      }
      writer.close();
@ -189,6 +190,7 @@ public class FSTCompletionLookup extends Lookup {
      int previousBucket = 0;
      float previousScore = 0;
      ByteArrayDataInput input = new ByteArrayDataInput();
+      BytesRef tmp1 = new BytesRef();
      BytesRef tmp2 = new BytesRef();
      while (reader.read(tmp1)) {
        input.reset(tmp1.bytes);
@ -293,4 +295,30 @@ public class FSTCompletionLookup extends Lookup {
    normalCompletion.getFST().save(new File(storeDir, FILENAME));
    return true;
  }
+
+  @Override
+  public synchronized boolean store(OutputStream output) throws IOException {
+
+    if (this.normalCompletion == null) 
+      return false;
+    try {
+      normalCompletion.getFST().save(new OutputStreamDataOutput(output));
+    } finally {
+      IOUtils.close(output);
+    }
+    return true;
+  }
+
+  @Override
+  public synchronized boolean load(InputStream input) throws IOException {
+    try {
+      this.higherWeightsCompletion = new FSTCompletion(new FST<Object>(
+          new InputStreamDataInput(input), NoOutputs.getSingleton()));
+      this.normalCompletion = new FSTCompletion(
+          higherWeightsCompletion.getFST(), false, exactMatchFirst);
+    } finally {
+      IOUtils.close(input);
+    }
+    return true;
+  }
 }
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
@ -19,6 +19,8 @@ package org.apache.lucene.search.suggest.fst;

 import java.io.File;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
@ -27,11 +29,12 @@ import org.apache.lucene.search.spell.TermFreqIterator;
 import org.apache.lucene.search.suggest.Lookup;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.store.InputStreamDataInput;
+import org.apache.lucene.store.OutputStreamDataOutput;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.IntsRef;
-import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.fst.Builder;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.FST.Arc;
@ -109,16 +112,14 @@ public class WFSTCompletionLookup extends Lookup {
    try {
      byte [] buffer = new byte [0];
      ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
-      while (iterator.hasNext()) {
-        String key = iterator.next();
-        UnicodeUtil.UTF16toUTF8(key, 0, key.length(), scratch);
-
-        if (scratch.length + 5 >= buffer.length) {
-          buffer = ArrayUtil.grow(buffer, scratch.length + 5);
+      BytesRef spare;
+      while ((spare = iterator.next()) != null) {
+        if (spare.length + 5 >= buffer.length) {
+          buffer = ArrayUtil.grow(buffer, spare.length + 5);
        }

        output.reset(buffer);
-        output.writeBytes(scratch.bytes, scratch.offset, scratch.length);
+        output.writeBytes(spare.bytes, spare.offset, spare.length);
        output.writeByte((byte)0); // separator: not used, just for sort order
        output.writeInt((int)encodeWeight(iterator.freq()));
        writer.write(buffer, 0, output.getPosition());
@ -177,6 +178,26 @@ public class WFSTCompletionLookup extends Lookup {
    this.fst = FST.read(new File(storeDir, FILENAME), PositiveIntOutputs.getSingleton(true));
    return true;
  }
+  
+  @Override
+  public boolean store(OutputStream output) throws IOException {
+    try {
+      fst.save(new OutputStreamDataOutput(output));
+    } finally {
+      IOUtils.close(output);
+    }
+    return true;
+  }
+
+  @Override
+  public boolean load(InputStream input) throws IOException {
+    try {
+      this.fst = new FST<Long>(new InputStreamDataInput(input), PositiveIntOutputs.getSingleton(true));
+    } finally {
+      IOUtils.close(input);
+    }
+    return true;
+  }

  @Override
  public List<LookupResult> lookup(String key, boolean onlyMorePopular, int num) {
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
@ -23,6 +23,8 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.List;

@ -31,6 +33,10 @@ import org.apache.lucene.search.spell.TermFreqIterator;
 import org.apache.lucene.search.suggest.Lookup;
 import org.apache.lucene.search.suggest.UnsortedTermFreqIteratorWrapper;
 import org.apache.lucene.search.suggest.jaspell.JaspellTernarySearchTrie.TSTNode;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.UnicodeUtil;

 public class JaspellLookup extends Lookup {
  JaspellTernarySearchTrie trie = new JaspellTernarySearchTrie();
@ -41,17 +47,22 @@ public class JaspellLookup extends Lookup {
  public void build(TermFreqIterator tfit) throws IOException {
    if (tfit instanceof SortedIterator) {
      // make sure it's unsorted
+      // WTF - this could result in yet another sorted iteration....
      tfit = new UnsortedTermFreqIteratorWrapper(tfit);
    }
    trie = new JaspellTernarySearchTrie();
    trie.setMatchAlmostDiff(editDistance);
-    while (tfit.hasNext()) {
-      String key = tfit.next();
+    BytesRef spare;
+    final CharsRef charsSpare = new CharsRef();
+
+    while ((spare = tfit.next()) != null) {
      float freq = tfit.freq();
-      if (key.length() == 0) {
+      if (spare.length == 0) {
        continue;
      }
-      trie.put(key, new Float(freq));
+      charsSpare.grow(spare.length);
+      UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
+      trie.put(charsSpare.toString(), new Float(freq));
    }
  }

@ -114,15 +125,7 @@ public class JaspellLookup extends Lookup {
    if (!data.exists() || !data.canRead()) {
      return false;
    }
-    DataInputStream in = new DataInputStream(new FileInputStream(data));
-    TSTNode root = trie.new TSTNode('\0', null);
-    try {
-      readRecursively(in, root);
-      trie.setRoot(root);
-    } finally {
-      in.close();
-    }
-    return true;
+    return load(new FileInputStream(data));
  }
  
  private void readRecursively(DataInputStream in, TSTNode node) throws IOException {
@ -153,19 +156,8 @@ public class JaspellLookup extends Lookup {
    if (!storeDir.exists() || !storeDir.isDirectory() || !storeDir.canWrite()) {
      return false;
    }
-    TSTNode root = trie.getRoot();
-    if (root == null) { // empty tree
-      return false;
-    }
    File data = new File(storeDir, FILENAME);
-    DataOutputStream out = new DataOutputStream(new FileOutputStream(data));
-    try {
-      writeRecursively(out, root);
-      out.flush();
-    } finally {
-      out.close();
-    }
-    return true;
+    return store(new FileOutputStream(data));
  }
  
  private void writeRecursively(DataOutputStream out, TSTNode node) throws IOException {
@ -186,4 +178,33 @@ public class JaspellLookup extends Lookup {
    writeRecursively(out, node.relatives[TSTNode.EQKID]);
    writeRecursively(out, node.relatives[TSTNode.HIKID]);
  }
+
+  @Override
+  public boolean store(OutputStream output) throws IOException {
+    TSTNode root = trie.getRoot();
+    if (root == null) { // empty tree
+      return false;
+    }
+    DataOutputStream out = new DataOutputStream(output);
+    try {
+      writeRecursively(out, root);
+      out.flush();
+    } finally {
+      IOUtils.close(out);
+    }
+    return true;
+  }
+
+  @Override
+  public boolean load(InputStream input) throws IOException {
+    DataInputStream in = new DataInputStream(input);
+    TSTNode root = trie.new TSTNode('\0', null);
+    try {
+      readRecursively(in, root);
+      trie.setRoot(root);
+    } finally {
+      IOUtils.close(in);
+    }
+    return true;
+  }
 }
--- a/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
+++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
@ -23,6 +23,8 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.List;

@ -30,6 +32,10 @@ import org.apache.lucene.search.suggest.Lookup;
 import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper;
 import org.apache.lucene.search.spell.SortedIterator;
 import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.UnicodeUtil;

 public class TSTLookup extends Lookup {
  TernaryTreeNode root = new TernaryTreeNode();
@ -39,15 +45,19 @@ public class TSTLookup extends Lookup {
  public void build(TermFreqIterator tfit) throws IOException {
    root = new TernaryTreeNode();
    // buffer first
-    if (!(tfit instanceof SortedIterator)) {
-      // make sure it's sorted
-      tfit = new SortedTermFreqIteratorWrapper(tfit);
+    if ((!(tfit instanceof SortedIterator)) || ((SortedIterator)tfit).comparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) {
+      // make sure it's sorted and the comparator uses UTF16 sort order
+      tfit = new SortedTermFreqIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator());
    }

    ArrayList<String> tokens = new ArrayList<String>();
    ArrayList<Float> vals = new ArrayList<Float>();
-    while (tfit.hasNext()) {
-      tokens.add(tfit.next());
+    BytesRef spare;
+    CharsRef charsSpare = new CharsRef();
+    while ((spare = tfit.next()) != null) {
+      charsSpare.grow(spare.length);
+      UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
+      tokens.add(charsSpare.toString());
      vals.add(new Float(tfit.freq()));
    }
    autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
@ -113,14 +123,7 @@ public class TSTLookup extends Lookup {
    if (!data.exists() || !data.canRead()) {
      return false;
    }
-    DataInputStream in = new DataInputStream(new FileInputStream(data));
-    root = new TernaryTreeNode();
-    try {
-      readRecursively(in, root);
-    } finally {
-      in.close();
-    }
-    return true;
+    return load(new FileInputStream(data));
  }
  
  // pre-order traversal
@ -153,14 +156,7 @@ public class TSTLookup extends Lookup {
      return false;
    }
    File data = new File(storeDir, FILENAME);
-    DataOutputStream out = new DataOutputStream(new FileOutputStream(data));
-    try {
-      writeRecursively(out, root);
-      out.flush();
-    } finally {
-      out.close();
-    }
-    return true;
+    return store(new FileOutputStream(data));
  }
  
  // pre-order traversal
@ -188,4 +184,28 @@ public class TSTLookup extends Lookup {
      writeRecursively(out, node.hiKid);
    }
  }
+
+  @Override
+  public synchronized boolean store(OutputStream output) throws IOException {
+    DataOutputStream out = new DataOutputStream(output);
+    try {
+      writeRecursively(out, root);
+      out.flush();
+    } finally {
+      IOUtils.close(output);
+    }
+    return true;
+  }
+
+  @Override
+  public synchronized boolean load(InputStream input) throws IOException {
+    DataInputStream in = new DataInputStream(input);
+    root = new TernaryTreeNode();
+    try {
+      readRecursively(in, root);
+    } finally {
+      IOUtils.close(in);
+    }
+    return true;
+  }
 }
--- a/modules/suggest/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java
+++ b/modules/suggest/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java
@ -18,15 +18,17 @@ package org.apache.lucene.search.spell;
 */

 import java.io.IOException;
-import java.util.Iterator;

 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
 import org.apache.lucene.util.LuceneTestCase;

 /**
@ -40,7 +42,8 @@ public class TestLuceneDictionary extends LuceneTestCase {

  private IndexReader indexReader = null;
  private LuceneDictionary ld;
-  private Iterator<String> it;
+  private BytesRefIterator it;
+  private BytesRef spare = new BytesRef();

  @Override
  public void setUp() throws Exception {
@ -84,13 +87,12 @@ public class TestLuceneDictionary extends LuceneTestCase {
  
  public void testFieldNonExistent() throws IOException {
    try {
-      indexReader = IndexReader.open(store);
+      indexReader = DirectoryReader.open(store);

      ld = new LuceneDictionary(indexReader, "nonexistent_field");
      it = ld.getWordsIterator();

-      assertFalse("More elements than expected", it.hasNext());
-      assertTrue("Nonexistent element is really null", it.next() == null);
+      assertNull("More elements than expected", spare = it.next());
    } finally {
      if  (indexReader != null) { indexReader.close(); }
    }
@ -98,15 +100,13 @@ public class TestLuceneDictionary extends LuceneTestCase {

  public void testFieldAaa() throws IOException {
    try {
-      indexReader = IndexReader.open(store);
+      indexReader = DirectoryReader.open(store);

      ld = new LuceneDictionary(indexReader, "aaa");
      it = ld.getWordsIterator();
-
-      assertTrue("First element doesn't exist.", it.hasNext());
-      assertTrue("First element isn't correct", it.next().equals("foo"));
-      assertFalse("More elements than expected", it.hasNext());
-      assertTrue("Nonexistent element is really null", it.next() == null);
+      assertNotNull("First element doesn't exist.", spare = it.next());
+      assertTrue("First element isn't correct", spare.utf8ToString().equals("foo"));
+      assertNull("More elements than expected", it.next());
    } finally {
      if  (indexReader != null) { indexReader.close(); }
    }
@ -114,24 +114,22 @@ public class TestLuceneDictionary extends LuceneTestCase {

  public void testFieldContents_1() throws IOException {
    try {
-      indexReader = IndexReader.open(store);
+      indexReader = DirectoryReader.open(store);

      ld = new LuceneDictionary(indexReader, "contents");
      it = ld.getWordsIterator();

-      assertTrue("First element doesn't exist.", it.hasNext());
-      assertTrue("First element isn't correct", it.next().equals("Jerry"));
-      assertTrue("Second element doesn't exist.", it.hasNext());
-      assertTrue("Second element isn't correct", it.next().equals("Tom"));
-      assertFalse("More elements than expected", it.hasNext());
-      assertTrue("Nonexistent element is really null", it.next() == null);
+      assertNotNull("First element doesn't exist.", spare = it.next());
+      assertTrue("First element isn't correct", spare.utf8ToString().equals("Jerry"));
+      assertNotNull("Second element doesn't exist.", spare = it.next());
+      assertTrue("Second element isn't correct", spare.utf8ToString().equals("Tom"));
+      assertNull("More elements than expected", it.next());

      ld = new LuceneDictionary(indexReader, "contents");
      it = ld.getWordsIterator();

      int counter = 2;
-      while (it.hasNext()) {
-        it.next();
+      while (it.next() != null) {
        counter--;
      }

@ -144,30 +142,15 @@ public class TestLuceneDictionary extends LuceneTestCase {

  public void testFieldContents_2() throws IOException {
    try {
-      indexReader = IndexReader.open(store);
+      indexReader = DirectoryReader.open(store);

      ld = new LuceneDictionary(indexReader, "contents");
      it = ld.getWordsIterator();

-      // hasNext() should have no side effects
-      assertTrue("First element isn't were it should be.", it.hasNext());
-      assertTrue("First element isn't were it should be.", it.hasNext());
-      assertTrue("First element isn't were it should be.", it.hasNext());
-
      // just iterate through words
-      assertTrue("First element isn't correct", it.next().equals("Jerry"));
-      assertTrue("Second element isn't correct", it.next().equals("Tom"));
-      assertTrue("Nonexistent element is really null", it.next() == null);
-
-      // hasNext() should still have no side effects ...
-      assertFalse("There should be any more elements", it.hasNext());
-      assertFalse("There should be any more elements", it.hasNext());
-      assertFalse("There should be any more elements", it.hasNext());
-
-      // .. and there are really no more words
-      assertTrue("Nonexistent element is really null", it.next() == null);
-      assertTrue("Nonexistent element is really null", it.next() == null);
-      assertTrue("Nonexistent element is really null", it.next() == null);
+      assertEquals("First element isn't correct", "Jerry", it.next().utf8ToString());
+      assertEquals("Second element isn't correct",  "Tom", it.next().utf8ToString());
+      assertNull("Nonexistent element is really null", it.next());
    }
    finally {
      if  (indexReader != null) { indexReader.close(); }
@ -176,15 +159,14 @@ public class TestLuceneDictionary extends LuceneTestCase {

  public void testFieldZzz() throws IOException {
    try {
-      indexReader = IndexReader.open(store);
+      indexReader = DirectoryReader.open(store);

      ld = new LuceneDictionary(indexReader, "zzz");
      it = ld.getWordsIterator();

-      assertTrue("First element doesn't exist.", it.hasNext());
-      assertTrue("First element isn't correct", it.next().equals("bar"));
-      assertFalse("More elements than expected", it.hasNext());
-      assertTrue("Nonexistent element is really null", it.next() == null);
+      assertNotNull("First element doesn't exist.", spare = it.next());
+      assertEquals("First element isn't correct", "bar", spare.utf8ToString());
+      assertNull("More elements than expected", it.next());
    }
    finally {
      if  (indexReader != null) { indexReader.close(); }
@ -194,7 +176,7 @@ public class TestLuceneDictionary extends LuceneTestCase {
  public void testSpellchecker() throws IOException {
    Directory dir = newDirectory();
    SpellChecker sc = new SpellChecker(dir);
-    indexReader = IndexReader.open(store);
+    indexReader = DirectoryReader.open(store);
    sc.indexDictionary(new LuceneDictionary(indexReader, "contents"), newIndexWriterConfig(TEST_VERSION_CURRENT, null), false);
    String[] suggestions = sc.suggestSimilar("Tam", 1);
    assertEquals(1, suggestions.length);
--- a/modules/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java
+++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java
@ -191,7 +191,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {

      final List<String> input = new ArrayList<String>(benchmarkInput.size());
      for (TermFreq tf : benchmarkInput) {
-        input.add(tf.term.substring(0, Math.min(tf.term.length(), 
+        input.add(tf.term.utf8ToString().substring(0, Math.min(tf.term.length, 
              minPrefixLen + random.nextInt(maxPrefixLen - minPrefixLen + 1))));
      }

--- a/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java
+++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java
@ -75,11 +75,11 @@ public class PersistenceTest extends LuceneTestCase {
    // Assert validity.
    float previous = Float.NEGATIVE_INFINITY;
    for (TermFreq k : keys) {
-      Float val = (Float) lookup.get(k.term);
-      assertNotNull(k.term, val);
+      Float val = (Float) lookup.get(k.term.utf8ToString());
+      assertNotNull(k.term.utf8ToString(), val);

      if (supportsExactWeights) { 
-        assertEquals(k.term, Float.valueOf(k.v), val);
+        assertEquals(k.term.utf8ToString(), Float.valueOf(k.v), val);
      } else {
        assertTrue(val + ">=" + previous, val >= previous);
        previous = val.floatValue();
--- a/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java
+++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java
@ -1,5 +1,7 @@
 package org.apache.lucene.search.suggest;

+import org.apache.lucene.util.BytesRef;
+
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
@ -18,10 +20,14 @@ package org.apache.lucene.search.suggest;
 */

 public final class TermFreq {
-  public final String term;
+  public final BytesRef term;
  public final float v;

  public TermFreq(String term, float v) {
+   this(new BytesRef(term), v);
+  }
+  
+  public TermFreq(BytesRef term, float v) {
    this.term = term;
    this.v = v;
  }
--- a/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java
+++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java
@ -17,10 +17,12 @@ package org.apache.lucene.search.suggest;
 * limitations under the License.
 */

+import java.io.IOException;
 import java.util.Arrays;
 import java.util.Iterator;

 import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.util.BytesRef;

 /**
 * A {@link TermFreqIterator} over a sequence of {@link TermFreq}s.
@ -28,6 +30,7 @@ import org.apache.lucene.search.spell.TermFreqIterator;
 public final class TermFreqArrayIterator implements TermFreqIterator {
  private final Iterator<TermFreq> i;
  private TermFreq current;
+  private final BytesRef spare = new BytesRef();

  public TermFreqArrayIterator(Iterator<TermFreq> i) {
    this.i = i;
@ -44,14 +47,14 @@ public final class TermFreqArrayIterator implements TermFreqIterator {
  public float freq() {
    return current.v;
  }
-  
-  public boolean hasNext() {
-    return i.hasNext();
-  }
-  
-  public String next() {
-    return (current = i.next()).term;
-  }

-  public void remove() { throw new UnsupportedOperationException(); }
+  @Override
+  public BytesRef next() throws IOException {
+    if (i.hasNext()) {
+      current = i.next();
+      spare.copyBytes(current.term);
+      return spare;
+    }
+    return null;
+  }
 }
--- a/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java
+++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java
@ -0,0 +1,85 @@
+package org.apache.lucene.search.suggest;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+public class TestBytesRefList extends LuceneTestCase {
+
+  public void testAppend() throws IOException {
+    BytesRefList list = new BytesRefList();
+    List<String> stringList = new ArrayList<String>();
+    int entries = atLeast(500);
+    BytesRef spare = new BytesRef();
+    for (int i = 0; i < entries; i++) {
+      String randomRealisticUnicodeString = _TestUtil
+          .randomRealisticUnicodeString(random);
+      spare.copyChars(randomRealisticUnicodeString);
+      list.append(spare);
+      stringList.add(randomRealisticUnicodeString);
+    }
+    for (int i = 0; i < entries; i++) {
+      assertNotNull(list.get(spare, i));
+      assertEquals("entry " + i + " doesn't match", stringList.get(i),
+          spare.utf8ToString());
+    }
+
+    // check random
+    for (int i = 0; i < entries; i++) {
+      int e = random.nextInt(entries);
+      assertNotNull(list.get(spare, e));
+      assertEquals("entry " + i + " doesn't match", stringList.get(e),
+          spare.utf8ToString());
+    }
+    for (int i = 0; i < 2; i++) {
+
+      BytesRefIterator iterator = list.iterator();
+      for (String string : stringList) {
+        assertEquals(string, iterator.next().utf8ToString());
+      }
+    }
+  }
+
+  public void testSort() {
+    BytesRefList list = new BytesRefList();
+    List<String> stringList = new ArrayList<String>();
+    int entries = atLeast(500);
+    BytesRef spare = new BytesRef();
+    for (int i = 0; i < entries; i++) {
+      String randomRealisticUnicodeString = _TestUtil.randomRealisticUnicodeString(random);
+      spare.copyChars(randomRealisticUnicodeString);
+      list.append(spare);
+      stringList.add(randomRealisticUnicodeString);
+    }
+    Collections.sort(stringList);
+    int[] sortedOrds = list.sort(BytesRef.getUTF8SortedAsUTF16Comparator());
+    for (int i = 0; i < entries; i++) {
+      assertNotNull(list.get(spare, sortedOrds[i]));
+      assertEquals("entry " + i + " doesn't match", stringList.get(i),
+          spare.utf8ToString());
+    }
+    
+  }
+}
--- a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java
+++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java
@ -40,7 +40,7 @@ public class FSTCompletionTest extends LuceneTestCase {

    FSTCompletionBuilder builder = new FSTCompletionBuilder();
    for (TermFreq tf : evalKeys()) {
-      builder.add(new BytesRef(tf.term), (int) tf.v);
+      builder.add(tf.term, (int) tf.v);
    }
    completion = builder.build();
    completionAlphabetical = new FSTCompletion(completion.getFST(), false, true);
@ -167,7 +167,7 @@ public class FSTCompletionTest extends LuceneTestCase {
    // are.
    Float previous = null; 
    for (TermFreq tf : keys) {
-      Float current = lookup.get(tf.term);
+      Float current = lookup.get(tf.term.utf8ToString());
      if (previous != null) {
        assertEquals(previous, current);
      }
@ -183,8 +183,8 @@ public class FSTCompletionTest extends LuceneTestCase {
    lookup.build(new TermFreqArrayIterator(input));

    for (TermFreq tf : input) {
-      assertTrue("Not found: " + tf.term, lookup.get(tf.term) != null);
-      assertEquals(tf.term, lookup.lookup(tf.term, true, 1).get(0).key);
+      assertTrue("Not found: " + tf.term, lookup.get(tf.term.utf8ToString()) != null);
+      assertEquals(tf.term, lookup.lookup(tf.term.utf8ToString(), true, 1).get(0).key);
    }

    List<LookupResult> result = lookup.lookup("wit", true, 5);
@ -211,7 +211,7 @@ public class FSTCompletionTest extends LuceneTestCase {
    lookup.build(new TermFreqArrayIterator(freqs.toArray(new TermFreq[freqs.size()])));

    for (TermFreq tf : freqs) {
-      final String term = tf.term;
+      final String term = tf.term.utf8ToString();
      for (int i = 1; i < term.length(); i++) {
        String prefix = term.substring(0, i);
        for (LookupResult lr : lookup.lookup(prefix, true, 10)) {