LUCENE-7686: add efficient de-duping to the NRT document suggester

2017-02-22 16:04:26 -05:00 · 2017-02-22 16:04:26 -05:00 · 4e2cf61ac7
parent 29a5ea44a7
commit 4e2cf61ac7
15 changed files with 517 additions and 123 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -126,6 +126,10 @@ New Features
 * LUCENE-7688: Add OneMergeWrappingMergePolicy class.
  (Keith Laban, Christine Poerschke)

+* LUCENE-7686: The near-real-time document suggester can now
+  efficiently filter out duplicate suggestions (Uwe Schindler, Mike
+  McCandless)
+
 Bug Fixes

 * LUCENE-7630: Fix (Edge)NGramTokenFilter to no longer drop payloads
--- a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
@ -248,32 +248,38 @@ public final class Util {
   *  @lucene.experimental
   */
  public static class FSTPath<T> {
+    /** Holds the last arc appended to this path */
    public FST.Arc<T> arc;
-    public T cost;
+    /** Holds cost plus any usage-specific output: */
+    public T output;
    public final IntsRefBuilder input;
    public final float boost;
    public final CharSequence context;

+    // Custom int payload for consumers; the NRT suggester uses this to record if this path has already enumerated a surface form
+    public int payload;
+
    /** Sole constructor */
-    public FSTPath(T cost, FST.Arc<T> arc, IntsRefBuilder input) {
-      this(cost, arc, input, 0, null);
+    public FSTPath(T output, FST.Arc<T> arc, IntsRefBuilder input) {
+      this(output, arc, input, 0, null, -1);
    }

-    public FSTPath(T cost, FST.Arc<T> arc, IntsRefBuilder input, float boost, CharSequence context) {
+    public FSTPath(T output, FST.Arc<T> arc, IntsRefBuilder input, float boost, CharSequence context, int payload) {
      this.arc = new FST.Arc<T>().copyFrom(arc);
-      this.cost = cost;
+      this.output = output;
      this.input = input;
      this.boost = boost;
      this.context = context;
+      this.payload = payload;
    }

-    public FSTPath<T> newPath(T cost, IntsRefBuilder input) {
-      return new FSTPath<>(cost, this.arc, input, this.boost, this.context);
+    public FSTPath<T> newPath(T output, IntsRefBuilder input) {
+      return new FSTPath<>(output, this.arc, input, this.boost, this.context, this.payload);
    }

    @Override
    public String toString() {
-      return "input=" + input.get() + " cost=" + cost + "context=" + context + "boost=" + boost;
+      return "input=" + input.get() + " output=" + output + " context=" + context + " boost=" + boost + " payload=" + payload;
    }
  }

@ -287,7 +293,7 @@ public final class Util {

    @Override
    public int compare(FSTPath<T> a, FSTPath<T> b) {
-      int cmp = comparator.compare(a.cost, b.cost);
+      int cmp = comparator.compare(a.output, b.output);
      if (cmp == 0) {
        return a.input.get().compareTo(b.input.get());
      } else {
@ -339,8 +345,7 @@ public final class Util {

      assert queue != null;

-      T cost = fst.outputs.add(path.cost, path.arc.output);
-      //System.out.println("  addIfCompetitive queue.size()=" + queue.size() + " path=" + path + " + label=" + path.arc.label);
+      T output = fst.outputs.add(path.output, path.arc.output);

      if (queue.size() == maxQueueDepth) {
        FSTPath<T> bottom = queue.last();
@ -373,32 +378,32 @@ public final class Util {
      newInput.copyInts(path.input.get());
      newInput.append(path.arc.label);

-      queue.add(path.newPath(cost, newInput));
-
-      if (queue.size() == maxQueueDepth+1) {
-        queue.pollLast();
+      FSTPath<T> newPath = path.newPath(output, newInput);
+      if (acceptPartialPath(newPath)) {
+        queue.add(newPath);
+        if (queue.size() == maxQueueDepth+1) {
+          queue.pollLast();
+        }
      }
    }

    public void addStartPaths(FST.Arc<T> node, T startOutput, boolean allowEmptyString, IntsRefBuilder input) throws IOException {
-      addStartPaths(node, startOutput, allowEmptyString, input, 0, null);
+      addStartPaths(node, startOutput, allowEmptyString, input, 0, null, -1);
    }

    /** Adds all leaving arcs, including 'finished' arc, if
     *  the node is final, from this node into the queue.  */
    public void addStartPaths(FST.Arc<T> node, T startOutput, boolean allowEmptyString, IntsRefBuilder input,
-                              float boost, CharSequence context) throws IOException {
+                              float boost, CharSequence context, int payload) throws IOException {

      // De-dup NO_OUTPUT since it must be a singleton:
      if (startOutput.equals(fst.outputs.getNoOutput())) {
        startOutput = fst.outputs.getNoOutput();
      }

-      FSTPath<T> path = new FSTPath<>(startOutput, node, input, boost, context);
+      FSTPath<T> path = new FSTPath<>(startOutput, node, input, boost, context, payload);
      fst.readFirstTargetArc(node, path.arc, bytesReader);

-      //System.out.println("add start paths");
-
      // Bootstrap: find the min starting arc
      while (true) {
        if (allowEmptyString || path.arc.label != FST.END_LABEL) {
@ -415,8 +420,6 @@ public final class Util {

      final List<Result<T>> results = new ArrayList<>();

-      //System.out.println("search topN=" + topN);
-
      final BytesReader fstReader = fst.getBytesReader();
      final T NO_OUTPUT = fst.outputs.getNoOutput();

@ -430,13 +433,11 @@ public final class Util {

      // For each top N path:
      while (results.size() < topN) {
-        //System.out.println("\nfind next path: queue.size=" + queue.size());

        FSTPath<T> path;

        if (queue == null) {
          // Ran out of paths
-          //System.out.println("  break queue=null");
          break;
        }

@ -446,15 +447,18 @@ public final class Util {

        if (path == null) {
          // There were less than topN paths available:
-          //System.out.println("  break no more paths");
          break;
        }
+        //System.out.println("pop path=" + path + " arc=" + path.arc.output);
+
+        if (acceptPartialPath(path) == false) {
+          continue;
+        }

        if (path.arc.label == FST.END_LABEL) {
-          //System.out.println("    empty string!  cost=" + path.cost);
          // Empty string!
          path.input.setLength(path.input.length() - 1);
-          results.add(new Result<>(path.input.get(), path.cost));
+          results.add(new Result<>(path.input.get(), path.output));
          continue;
        }

@ -463,8 +467,6 @@ public final class Util {
          queue = null;
        }

-        //System.out.println("  path: " + path);
-        
        // We take path and find its "0 output completion",
        // ie, just keep traversing the first arc with
        // NO_OUTPUT that we can find, since this must lead
@ -474,13 +476,11 @@ public final class Util {
        // For each input letter:
        while (true) {

-          //System.out.println("\n    cycle path: " + path);         
          fst.readFirstTargetArc(path.arc, path.arc, fstReader);

          // For each arc leaving this node:
          boolean foundZero = false;
          while(true) {
-            //System.out.println("      arc=" + (char) path.arc.label + " cost=" + path.arc.output);
            // tricky: instead of comparing output == 0, we must
            // express it via the comparator compare(output, 0) == 0
            if (comparator.compare(NO_OUTPUT, path.arc.output) == 0) {
@ -514,18 +514,19 @@ public final class Util {

          if (path.arc.label == FST.END_LABEL) {
            // Add final output:
-            //System.out.println("    done!: " + path);
-            path.cost = fst.outputs.add(path.cost, path.arc.output);
+            path.output = fst.outputs.add(path.output, path.arc.output);
            if (acceptResult(path)) {
-              //System.out.println("    add result: " + path);
-              results.add(new Result<>(path.input.get(), path.cost));
+              results.add(new Result<>(path.input.get(), path.output));
            } else {
              rejectCount++;
            }
            break;
          } else {
            path.input.append(path.arc.label);
-            path.cost = fst.outputs.add(path.cost, path.arc.output);
+            path.output = fst.outputs.add(path.output, path.arc.output);
+            if (acceptPartialPath(path) == false) {
+              break;
+            }
          }
        }
      }
@ -533,7 +534,12 @@ public final class Util {
    }

    protected boolean acceptResult(FSTPath<T> path) {
-      return acceptResult(path.input.get(), path.cost);
+      return acceptResult(path.input.get(), path.output);
+    }
+
+    /** Override this to prevent considering a path before it's complete */
+    protected boolean acceptPartialPath(FSTPath<T> path) {
+      return true;
    }

    protected boolean acceptResult(IntsRef input, T output) {
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionAnalyzer.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionAnalyzer.java
@ -81,7 +81,7 @@ public final class CompletionAnalyzer extends AnalyzerWrapper {
  private final int maxGraphExpansions;

  /**
-   * Wraps an analyzer to convert it's output token stream to an automaton
+   * Wraps an analyzer to convert its output token stream to an automaton
   *
   * @param analyzer token stream to be converted to an automaton
   * @param preserveSep Preserve separation between tokens when converting to an automaton
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionQuery.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionQuery.java
@ -34,7 +34,7 @@ import static org.apache.lucene.search.suggest.document.CompletionAnalyzer.SEP_L
 * filtered by {@link BitsProducer}. This should be used to query against any {@link SuggestField}s
 * or {@link ContextSuggestField}s of documents.
 * <p>
- * Use {@link SuggestIndexSearcher#suggest(CompletionQuery, int)} to execute any query
+ * Use {@link SuggestIndexSearcher#suggest(CompletionQuery, int, boolean)} to execute any query
 * that provides a concrete implementation of this query. Example below shows using this query
 * to retrieve the top 5 documents.
 *
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggester.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggester.java
@ -32,12 +32,11 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRefBuilder;
 import org.apache.lucene.util.fst.ByteSequenceOutputs;
 import org.apache.lucene.util.fst.FST;
-import org.apache.lucene.util.fst.PairOutputs;
 import org.apache.lucene.util.fst.PairOutputs.Pair;
+import org.apache.lucene.util.fst.PairOutputs;
 import org.apache.lucene.util.fst.PositiveIntOutputs;
 import org.apache.lucene.util.fst.Util;

-import static org.apache.lucene.search.suggest.document.NRTSuggester.PayLoadProcessor.parseDocID;
 import static org.apache.lucene.search.suggest.document.NRTSuggester.PayLoadProcessor.parseSurfaceForm;

 /**
@ -142,21 +141,74 @@ public final class NRTSuggester implements Accountable {
    // maximum number of suggestions that can be collected.
    final int topN = collector.getCountToCollect() * prefixPaths.size();
    final int queueSize = getMaxTopNSearcherQueueSize(topN, scorer.reader.numDocs(), liveDocsRatio, scorer.filtered);
+
+    final CharsRefBuilder spare = new CharsRefBuilder();
+
    Comparator<Pair<Long, BytesRef>> comparator = getComparator();
    Util.TopNSearcher<Pair<Long, BytesRef>> searcher = new Util.TopNSearcher<Pair<Long, BytesRef>>(fst, topN, queueSize, comparator,
        new ScoringPathComparator(scorer)) {

-      private final CharsRefBuilder spare = new CharsRefBuilder();
+      private final ByteArrayDataInput scratchInput = new ByteArrayDataInput();
+
+      @Override
+      protected boolean acceptPartialPath(Util.FSTPath<Pair<Long,BytesRef>> path) {
+        if (collector.doSkipDuplicates()) {
+          // We are removing dups
+          if (path.payload == -1) {
+            // This path didn't yet see the complete surface form; let's see if it just did with the arc output we just added:
+            BytesRef arcOutput = path.arc.output.output2;
+            BytesRef output = path.output.output2;
+            for(int i=0;i<arcOutput.length;i++) {
+              if (arcOutput.bytes[arcOutput.offset + i] == payloadSep) {
+                // OK this arc that the path was just extended by contains the payloadSep, so we now have a full surface form in this path
+                path.payload = output.length - arcOutput.length + i;
+                assert output.bytes[output.offset + path.payload] == payloadSep;
+                break;
+              }
+            }
+          }
+
+          if (path.payload != -1) {
+            BytesRef output = path.output.output2;
+            spare.copyUTF8Bytes(output.bytes, output.offset, path.payload);
+            if (collector.seenSurfaceForms.contains(spare.chars(), 0, spare.length())) {
+              return false;
+            }
+          }
+        }
+        return true;
+      }

      @Override
      protected boolean acceptResult(Util.FSTPath<Pair<Long, BytesRef>> path) {
-        int payloadSepIndex = parseSurfaceForm(path.cost.output2, payloadSep, spare);
-        int docID = parseDocID(path.cost.output2, payloadSepIndex);
+        BytesRef output = path.output.output2;
+        int payloadSepIndex;
+        if (path.payload != -1) {
+          payloadSepIndex = path.payload;
+          spare.copyUTF8Bytes(output.bytes, output.offset, payloadSepIndex);
+        } else {
+          assert collector.doSkipDuplicates() == false;
+          payloadSepIndex = parseSurfaceForm(output, payloadSep, spare);
+        }
+
+        scratchInput.reset(output.bytes, output.offset + payloadSepIndex + 1, output.length - payloadSepIndex - 1);
+        int docID = scratchInput.readVInt();
+        
        if (!scorer.accept(docID, acceptDocs)) {
          return false;
        }
+        if (collector.doSkipDuplicates()) {
+          // now record that we've seen this surface form:
+          char[] key = new char[spare.length()];
+          System.arraycopy(spare.chars(), 0, key, 0, spare.length());
+          if (collector.seenSurfaceForms.contains(key)) {
+            // we already collected a higher scoring document with this key, in this segment:
+            return false;
+          }
+          collector.seenSurfaceForms.add(key);
+        }
        try {
-          float score = scorer.score(decode(path.cost.output1), path.boost);
+          float score = scorer.score(decode(path.output.output1), path.boost);
          collector.collect(docID, spare.toCharsRef(), path.context, score);
          return true;
        } catch (IOException e) {
@ -167,8 +219,20 @@ public final class NRTSuggester implements Accountable {

    for (FSTUtil.Path<Pair<Long, BytesRef>> path : prefixPaths) {
      scorer.weight.setNextMatch(path.input.get());
+      BytesRef output = path.output.output2;
+      int payload = -1;
+      if (collector.doSkipDuplicates()) {
+        for(int j=0;j<output.length;j++) {
+          if (output.bytes[output.offset+j] == payloadSep) {
+            // Important to cache this, else we have a possibly O(N^2) cost where N is the length of suggestions
+            payload = j;
+            break;
+          }
+        }
+      }
+      
      searcher.addStartPaths(path.fstNode, path.output, false, path.input, scorer.weight.boost(),
-          scorer.weight.context());
+                             scorer.weight.context(), payload);
    }
    // hits are also returned by search()
    // we do not use it, instead collect at acceptResult
@ -191,8 +255,8 @@ public final class NRTSuggester implements Accountable {

    @Override
    public int compare(Util.FSTPath<Pair<Long, BytesRef>> first, Util.FSTPath<Pair<Long, BytesRef>> second) {
-      int cmp = Float.compare(scorer.score(decode(second.cost.output1), second.boost),
-          scorer.score(decode(first.cost.output1), first.boost));
+      int cmp = Float.compare(scorer.score(decode(second.output.output1), second.boost),
+          scorer.score(decode(first.output.output1), first.boost));
      return (cmp != 0) ? cmp : first.input.get().compareTo(second.input.get());
    }
  }
@ -285,13 +349,6 @@ public final class NRTSuggester implements Accountable {
      return surfaceFormLen;
    }

-    static int parseDocID(final BytesRef output, int payloadSepIndex) {
-      assert payloadSepIndex != -1 : "payload sep index can not be -1";
-      ByteArrayDataInput input = new ByteArrayDataInput(output.bytes, payloadSepIndex + output.offset + 1,
-          output.length - (payloadSepIndex + output.offset));
-      return input.readVInt();
-    }
-
    static BytesRef make(final BytesRef surface, int docID, int payloadSep) throws IOException {
      int len = surface.length + MAX_DOC_ID_LEN_WITH_SEP;
      byte[] buffer = new byte[len];
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestField.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestField.java
@ -47,7 +47,7 @@ import org.apache.lucene.util.BytesRef;
 * document.add(new SuggestField(name, "suggestion", 4));
 * </pre>
 * To perform document suggestions based on the this field, use
- * {@link SuggestIndexSearcher#suggest(CompletionQuery, int)}
+ * {@link SuggestIndexSearcher#suggest(CompletionQuery, int, boolean)}
 *
 * @lucene.experimental
 */
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestIndexSearcher.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/SuggestIndexSearcher.java
@ -38,6 +38,9 @@ import org.apache.lucene.search.Weight;
 */
 public class SuggestIndexSearcher extends IndexSearcher {

+  // NOTE: we do not accept an ExecutorService here, because at least the dedup
+  // logic in TopSuggestDocsCollector/NRTSuggester would not be thread safe (and maybe other things)
+
  /**
   * Creates a searcher with document suggest capabilities
   * for <code>reader</code>.
@ -50,8 +53,8 @@ public class SuggestIndexSearcher extends IndexSearcher {
   * Returns top <code>n</code> completion hits for
   * <code>query</code>
   */
-  public TopSuggestDocs suggest(CompletionQuery query, int n) throws IOException {
-    TopSuggestDocsCollector collector = new TopSuggestDocsCollector(n);
+  public TopSuggestDocs suggest(CompletionQuery query, int n, boolean skipDuplicates) throws IOException {
+    TopSuggestDocsCollector collector = new TopSuggestDocsCollector(n, skipDuplicates);
    suggest(query, collector);
    return collector.get();
  }
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestDocs.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestDocs.java
@ -66,6 +66,25 @@ public class TopSuggestDocs extends TopDocs {
    public int compareTo(SuggestScoreDoc o) {
      return Lookup.CHARSEQUENCE_COMPARATOR.compare(key, o.key);
    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (other instanceof SuggestScoreDoc == false) {
+        return false;
+      } else {
+        return key.equals(((SuggestScoreDoc) other).key);
+      }
+    }
+
+    @Override
+    public int hashCode() {
+      return key.hashCode();
+    }
+
+    @Override
+    public String toString() {
+      return "key=" + key + " doc=" + doc + " score=" + score + " shardIndex=" + shardIndex;      
+    }
  }

  /**
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestDocsCollector.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/TopSuggestDocsCollector.java
@ -17,7 +17,12 @@
 package org.apache.lucene.search.suggest.document;

 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;

+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.CollectionTerminatedException;
 import org.apache.lucene.search.SimpleCollector;
@ -47,9 +52,13 @@ public class TopSuggestDocsCollector extends SimpleCollector {
  private final SuggestScoreDocPriorityQueue priorityQueue;
  private final int num;

-  /**
-   * Document base offset for the current Leaf
-   */
+  /** Only set if we are deduplicating hits: holds all per-segment hits until the end, when we dedup them */
+  private final List<SuggestScoreDoc> pendingResults;
+
+  /** Only set if we are deduplicating hits: holds all surface forms seen so far in the current segment */
+  final CharArraySet seenSurfaceForms;
+
+  /** Document base offset for the current Leaf */
  protected int docBase;

  /**
@ -58,12 +67,24 @@ public class TopSuggestDocsCollector extends SimpleCollector {
   * Collects at most <code>num</code> completions
   * with corresponding document and weight
   */
-  public TopSuggestDocsCollector(int num) {
+  public TopSuggestDocsCollector(int num, boolean skipDuplicates) {
    if (num <= 0) {
      throw new IllegalArgumentException("'num' must be > 0");
    }
    this.num = num;
    this.priorityQueue = new SuggestScoreDocPriorityQueue(num);
+    if (skipDuplicates) {
+      seenSurfaceForms = new CharArraySet(num, false);
+      pendingResults = new ArrayList<>();
+    } else {
+      seenSurfaceForms = null;
+      pendingResults = null;
+    }
+  }
+
+  /** Returns true if duplicates are filtered out */
+  protected boolean doSkipDuplicates() {
+    return seenSurfaceForms != null;
  }

  /**
@ -76,6 +97,13 @@ public class TopSuggestDocsCollector extends SimpleCollector {
  @Override
  protected void doSetNextReader(LeafReaderContext context) throws IOException {
    docBase = context.docBase;
+    if (seenSurfaceForms != null) {
+      seenSurfaceForms.clear();
+      // NOTE: this also clears the priorityQueue:
+      for (SuggestScoreDoc hit : priorityQueue.getResults()) {
+        pendingResults.add(hit);
+      }
+    }
  }

  /**
@ -101,7 +129,52 @@ public class TopSuggestDocsCollector extends SimpleCollector {
   * Returns at most <code>num</code> Top scoring {@link org.apache.lucene.search.suggest.document.TopSuggestDocs}s
   */
  public TopSuggestDocs get() throws IOException {
-    SuggestScoreDoc[] suggestScoreDocs = priorityQueue.getResults();
+
+    SuggestScoreDoc[] suggestScoreDocs;
+    
+    if (seenSurfaceForms != null) {
+      // NOTE: this also clears the priorityQueue:
+      for (SuggestScoreDoc hit : priorityQueue.getResults()) {
+        pendingResults.add(hit);
+      }
+
+      // Deduplicate all hits: we already dedup'd efficiently within each segment by
+      // truncating the FST top paths search, but across segments there may still be dups:
+      seenSurfaceForms.clear();
+
+      // TODO: we could use a priority queue here to make cost O(N * log(num)) instead of O(N * log(N)), where N = O(num *
+      // numSegments), but typically numSegments is smallish and num is smallish so this won't matter much in practice:
+
+      Collections.sort(pendingResults,
+                       new Comparator<SuggestScoreDoc>() {
+                         @Override
+                         public int compare(SuggestScoreDoc a, SuggestScoreDoc b) {
+                           // sort by higher score
+                           int cmp = Float.compare(b.score, a.score);
+                           if (cmp == 0) {
+                             // tie break by lower docID:
+                             cmp = Integer.compare(a.doc, b.doc);
+                           }
+                           return cmp;
+                         }
+                       });
+
+      List<SuggestScoreDoc> hits = new ArrayList<>();
+      
+      for (SuggestScoreDoc hit : pendingResults) {
+        if (seenSurfaceForms.contains(hit.key) == false) {
+          seenSurfaceForms.add(hit.key);
+          hits.add(hit);
+          if (hits.size() == num) {
+            break;
+          }
+        }
+      }
+      suggestScoreDocs = hits.toArray(new SuggestScoreDoc[0]);
+    } else {
+      suggestScoreDocs = priorityQueue.getResults();
+    }
+
    if (suggestScoreDocs.length > 0) {
      return new TopSuggestDocs(suggestScoreDocs.length, suggestScoreDocs, suggestScoreDocs[0].score);
    } else {
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextQuery.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextQuery.java
@ -89,7 +89,7 @@ public class TestContextQuery extends LuceneTestCase {
    query.addContext("type2", 2);
    query.addContext("type3", 3);
    query.addContext("type4", 4);
-    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
    assertSuggestions(suggest,
        new Entry("suggestion4", "type4", 5 * 4),
        new Entry("suggestion3", "type3", 6 * 3),
@ -124,7 +124,7 @@ public class TestContextQuery extends LuceneTestCase {
    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
    ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "ab")));
    IllegalStateException expected = expectThrows(IllegalStateException.class, () -> {
-      suggestIndexSearcher.suggest(query, 4);
+      suggestIndexSearcher.suggest(query, 4, false);
    });
    assertTrue(expected.getMessage().contains("SuggestField"));

@ -155,7 +155,7 @@ public class TestContextQuery extends LuceneTestCase {
    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
    ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
    query.addContext("type", 1, false);
-    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
    assertSuggestions(suggest,
        new Entry("suggestion1", "type1", 4),
        new Entry("suggestion2", "type2", 3),
@ -185,7 +185,7 @@ public class TestContextQuery extends LuceneTestCase {
    ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
    query.addContext("type", 1);
    query.addContext("typetype", 2);
-    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
    assertSuggestions(suggest,
        new Entry("suggestion1", "typetype", 4 * 2),
        new Entry("suggestion2", "type", 3 * 1)
@ -215,7 +215,7 @@ public class TestContextQuery extends LuceneTestCase {
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
    ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
-    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
    assertSuggestions(suggest,
        new Entry("suggestion_no_ctx", null, 4),
        new Entry("suggestion", "type4", 1));
@ -249,7 +249,7 @@ public class TestContextQuery extends LuceneTestCase {
    ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
    query.addContext("type4", 10);
    query.addAllContexts();
-    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
    assertSuggestions(suggest,
        new Entry("suggestion4", "type4", 1 * 10),
        new Entry("suggestion1", null, 4),
@ -284,7 +284,7 @@ public class TestContextQuery extends LuceneTestCase {
    query.addContext("type2", 2);
    query.addContext("type3", 3);
    query.addContext("type4", 4);
-    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
    assertSuggestions(suggest,
        new Entry("suggestion", "type1", 4 * 10),
        new Entry("suggestion", "type3", 4 * 3),
@ -321,7 +321,7 @@ public class TestContextQuery extends LuceneTestCase {
    query.addContext("type1", 7);
    query.addContext("type2", 6);
    query.addAllContexts();
-    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
    assertSuggestions(suggest,
        new Entry("suggestion1", "type1", 4 * 7),
        new Entry("suggestion2", "type2", 3 * 6),
@ -357,7 +357,7 @@ public class TestContextQuery extends LuceneTestCase {
    ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
    query.addContext("type3", 3);
    query.addContext("type4", 4);
-    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
    assertSuggestions(suggest,
        new Entry("suggestion3", "type3", 2 * 3),
        new Entry("suggestion4", "type4", 1 * 4)
@ -389,7 +389,7 @@ public class TestContextQuery extends LuceneTestCase {
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
    CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg"));
-    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
    assertSuggestions(suggest,
        new Entry("suggestion1", "type1", 4),
        new Entry("suggestion2", "type2", 3),
@ -426,7 +426,7 @@ public class TestContextQuery extends LuceneTestCase {
    query.addContext("type2", 2);
    query.addContext("type3", 3);
    query.addContext("type4", 4);
-    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
    assertSuggestions(suggest,
        new Entry("suggestion1", "type3", 8 * 3),
        new Entry("suggestion4", "type4", 5 * 4),
@ -460,7 +460,7 @@ public class TestContextQuery extends LuceneTestCase {
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
    ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
-    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4);
+    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4, false);
    assertSuggestions(suggest,
        new Entry("suggestion1", "type1", 4),
        new Entry("suggestion2", "type2", 3),
@ -520,7 +520,7 @@ public class TestContextQuery extends LuceneTestCase {
        for (int i = 0; i < contexts.size(); i++) {
          query.addContext(contexts.get(i), i + 1);
        }
-        TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4);
+        TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4, false);
        assertSuggestions(suggest, Arrays.copyOfRange(expectedResults, 0, 4));
      }
    }
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextSuggestField.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestContextSuggestField.java
@ -172,7 +172,7 @@ public class TestContextSuggestField extends LuceneTestCase {
    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);

    CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg"));
-    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 10);
+    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 10, false);
    assertSuggestions(suggest,
        new Entry("suggestion1", 4),
        new Entry("suggestion2", 3),
@ -180,7 +180,7 @@ public class TestContextSuggestField extends LuceneTestCase {
        new Entry("suggestion4", 1));

    query = new PrefixCompletionQuery(analyzer, new Term("context_suggest_field", "sugg"));
-    suggest = suggestIndexSearcher.suggest(query, 10);
+    suggest = suggestIndexSearcher.suggest(query, 10, false);
    assertSuggestions(suggest,
        new Entry("suggestion1", "type1", 4),
        new Entry("suggestion2", "type2", 3),
@ -212,14 +212,14 @@ public class TestContextSuggestField extends LuceneTestCase {
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
    ContextQuery query = new ContextQuery(new PrefixCompletionQuery(completionAnalyzer, new Term("suggest_field", "sugg")));
-    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4);
+    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4, false);
    assertSuggestions(suggest,
        new Entry("suggestion1", "type1", 4),
        new Entry("suggestion2", "type2", 3),
        new Entry("suggestion3", "type3", 2),
        new Entry("suggestion4", "type4", 1));
    query.addContext("type1");
-    suggest = suggestIndexSearcher.suggest(query, 4);
+    suggest = suggestIndexSearcher.suggest(query, 4, false);
    assertSuggestions(suggest,
        new Entry("suggestion1", "type1", 4));
    reader.close();
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestFuzzyCompletionQuery.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestFuzzyCompletionQuery.java
@ -66,7 +66,7 @@ public class TestFuzzyCompletionQuery extends LuceneTestCase {
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
    CompletionQuery query = new FuzzyCompletionQuery(analyzer, new Term("suggest_field", "sugg"));
-    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4);
+    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4, false);
    assertSuggestions(suggest,
        new Entry("suaggestion", 4 * 2),
        new Entry("suggestion", 2 * 3),
@ -101,7 +101,7 @@ public class TestFuzzyCompletionQuery extends LuceneTestCase {
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
    CompletionQuery query =  new ContextQuery(new FuzzyCompletionQuery(analyzer, new Term("suggest_field", "sugge")));
-    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
    assertSuggestions(suggest,
        new Entry("suggestion", "type4", 4),
        new Entry("suggdestion", "type4", 4),
@ -140,7 +140,7 @@ public class TestFuzzyCompletionQuery extends LuceneTestCase {
    ContextQuery contextQuery = new ContextQuery(fuzzyQuery);
    contextQuery.addContext("type1", 6);
    contextQuery.addContext("type3", 2);
-    TopSuggestDocs suggest = suggestIndexSearcher.suggest(contextQuery, 5);
+    TopSuggestDocs suggest = suggestIndexSearcher.suggest(contextQuery, 5, false);
    assertSuggestions(suggest,
        new Entry("sduggestion", "type1", 1 * (1 + 6)),
        new Entry("sugdgestion", "type3", 1 * (3 + 2))
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestPrefixCompletionQuery.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestPrefixCompletionQuery.java
@ -135,7 +135,7 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "ab"));
-    TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, 3);
+    TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, 3, false);
    assertSuggestions(lookupDocs, new Entry("abcdd", 5), new Entry("abd", 4), new Entry("abc", 3));

    reader.close();
@ -165,7 +165,7 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
    // if at most half of the top scoring documents have been filtered out
    // the search should be admissible for a single segment
-    TopSuggestDocs suggest = indexSearcher.suggest(query, num);
+    TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
    assertTrue(suggest.totalHits >= 1);
    assertThat(suggest.scoreLookupDocs()[0].key.toString(), equalTo("abc_" + topScore));
    assertThat(suggest.scoreLookupDocs()[0].score, equalTo((float) topScore));
@ -174,14 +174,14 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
    query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
    // if more than half of the top scoring documents have been filtered out
    // search is not admissible, so # of suggestions requested is num instead of 1
-    suggest = indexSearcher.suggest(query, num);
+    suggest = indexSearcher.suggest(query, num, false);
    assertSuggestions(suggest, new Entry("abc_0", 0));

    filter = new NumericRangeBitsProducer("filter_int_fld", num - 1, num - 1);
    query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
    // if only lower scoring documents are filtered out
    // search is admissible
-    suggest = indexSearcher.suggest(query, 1);
+    suggest = indexSearcher.suggest(query, 1, false);
    assertSuggestions(suggest, new Entry("abc_" + (num - 1), num - 1));

    reader.close();
@ -216,13 +216,13 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {

    // suggest without filter
    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"));
-    TopSuggestDocs suggest = indexSearcher.suggest(query, 3);
+    TopSuggestDocs suggest = indexSearcher.suggest(query, 3, false);
    assertSuggestions(suggest, new Entry("apple", 5), new Entry("applle", 4), new Entry("apples", 3));

    // suggest with filter
    BitsProducer filter = new NumericRangeBitsProducer("filter_int_fld", 5, 12);
    query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"), filter);
-    suggest = indexSearcher.suggest(query, 3);
+    suggest = indexSearcher.suggest(query, 3, false);
    assertSuggestions(suggest, new Entry("applle", 4), new Entry("apples", 3));

    reader.close();
@ -243,10 +243,10 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
    CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep_or_pos_inc", "fo"));
-    TopSuggestDocs suggest = indexSearcher.suggest(query, 4); // all 4
+    TopSuggestDocs suggest = indexSearcher.suggest(query, 4, false); // all 4
    assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("the fo", 9), new Entry("foo bar", 8), new Entry("foobar", 7));
    query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep_or_pos_inc", "foob"));
-    suggest = indexSearcher.suggest(query, 4); // not the fo
+    suggest = indexSearcher.suggest(query, 4, false); // not the fo
    assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("foo bar", 8), new Entry("foobar", 7));
    reader.close();
    iw.close();
@ -266,10 +266,10 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
    CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_pos_inc", "fo"));
-    TopSuggestDocs suggest = indexSearcher.suggest(query, 4); //matches all 4
+    TopSuggestDocs suggest = indexSearcher.suggest(query, 4, false); //matches all 4
    assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("the fo", 9), new Entry("foo bar", 8), new Entry("foobar", 7));
    query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_pos_inc", "foob"));
-    suggest = indexSearcher.suggest(query, 4); // only foobar
+    suggest = indexSearcher.suggest(query, 4, false); // only foobar
    assertSuggestions(suggest, new Entry("foobar", 7));
    reader.close();
    iw.close();
@ -289,10 +289,10 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
    CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep", "fo"));
-    TopSuggestDocs suggest = indexSearcher.suggest(query, 4); // matches all 4
+    TopSuggestDocs suggest = indexSearcher.suggest(query, 4, false); // matches all 4
    assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("the fo", 9), new Entry("foo bar", 8), new Entry("foobar", 7));
    query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep", "foob"));
-    suggest = indexSearcher.suggest(query, 4); // except the fo
+    suggest = indexSearcher.suggest(query, 4, false); // except the fo
    assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("foo bar", 8), new Entry("foobar", 7));
    reader.close();
    iw.close();
@ -329,10 +329,10 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);

    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"));
-    assertEquals(0, indexSearcher.suggest(query, 3).totalHits);
+    assertEquals(0, indexSearcher.suggest(query, 3, false).totalHits);

    query = new PrefixCompletionQuery(analyzer, new Term("suggest_field2", "app"));
-    assertSuggestions(indexSearcher.suggest(query, 3), new Entry("apples", 3));
+    assertSuggestions(indexSearcher.suggest(query, 3, false), new Entry("apples", 3));

    reader.close();
    iw.close();
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestRegexCompletionQuery.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestRegexCompletionQuery.java
@ -67,7 +67,7 @@ public class TestRegexCompletionQuery extends LuceneTestCase {
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
    RegexCompletionQuery query = new RegexCompletionQuery(new Term("suggest_field", "[a|w|s]s?ugg"));
-    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4);
+    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4, false);
    assertSuggestions(suggest, new Entry("wsuggestion", 4), new Entry("ssuggestion", 3),
        new Entry("asuggestion", 2), new Entry("suggestion", 1));

@ -98,7 +98,7 @@ public class TestRegexCompletionQuery extends LuceneTestCase {
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
    CompletionQuery query = new RegexCompletionQuery(new Term("suggest_field", "[a|s][d|u|s][u|d|g]"));
-    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
+    TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
    assertSuggestions(suggest,
        new Entry("sduggestion", "type1", 5),
        new Entry("sudggestion", "type2", 4),
@ -137,7 +137,7 @@ public class TestRegexCompletionQuery extends LuceneTestCase {
    contextQuery.addContext("type1", 6);
    contextQuery.addContext("type3", 7);
    contextQuery.addAllContexts();
-    TopSuggestDocs suggest = suggestIndexSearcher.suggest(contextQuery, 5);
+    TopSuggestDocs suggest = suggestIndexSearcher.suggest(contextQuery, 5, false);
    assertSuggestions(suggest,
        new Entry("sduggestion", "type1", 5 * 6),
        new Entry("sugdgestion", "type3", 3 * 7),
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
@ -20,7 +20,10 @@ import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@ -33,9 +36,9 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.lucene70.Lucene70Codec;
-import org.apache.lucene.document.IntPoint;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.IntPoint;
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexWriter;
@ -122,7 +125,7 @@ public class TestSuggestField extends LuceneTestCase {
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "ab"));
-    TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, 3);
+    TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, 3, false);
    assertThat(lookupDocs.totalHits, equalTo(0));
    reader.close();
    iw.close();
@ -157,7 +160,7 @@ public class TestSuggestField extends LuceneTestCase {
    int[] weights = new int[num];
    for(int i = 0; i < num; i++) {
      Document document = new Document();
-      weights[i] = Math.abs(random().nextInt());
+      weights[i] = random().nextInt(Integer.MAX_VALUE);
      document.add(new SuggestField("suggest_field", "abc", weights[i]));
      iw.addDocument(document);

@ -175,13 +178,231 @@ public class TestSuggestField extends LuceneTestCase {

    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc"));
-    TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, num);
+    TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, num, false);
    assertSuggestions(lookupDocs, expectedEntries);

    reader.close();
    iw.close();
  }

+  public void testDeduplication() throws Exception {
+    Analyzer analyzer = new MockAnalyzer(random());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
+    final int num = TestUtil.nextInt(random(), 2, 20);
+    int[] weights = new int[num];
+    int bestABCWeight = Integer.MIN_VALUE;
+    int bestABDWeight = Integer.MIN_VALUE;
+    for(int i = 0; i < num; i++) {
+      Document document = new Document();
+      weights[i] = random().nextInt(Integer.MAX_VALUE);
+      String suggestValue;
+      boolean doABC;
+      if (i == 0) {
+        doABC = true;
+      } else if (i == 1) {
+        doABC = false;
+      } else {
+        doABC = random().nextBoolean();
+      }
+      if (doABC) {
+        suggestValue = "abc";
+        bestABCWeight = Math.max(bestABCWeight, weights[i]);
+      } else {
+        suggestValue = "abd";
+        bestABDWeight = Math.max(bestABDWeight, weights[i]);
+      }
+      document.add(new SuggestField("suggest_field", suggestValue, weights[i]));
+      iw.addDocument(document);
+
+      if (usually()) {
+        iw.commit();
+      }
+    }
+
+    DirectoryReader reader = iw.getReader();
+    Entry[] expectedEntries = new Entry[2];
+    if (bestABDWeight > bestABCWeight) {
+      expectedEntries[0] = new Entry("abd", bestABDWeight);
+      expectedEntries[1] = new Entry("abc", bestABCWeight);
+    } else {
+      expectedEntries[0] = new Entry("abc", bestABCWeight);
+      expectedEntries[1] = new Entry("abd", bestABDWeight);
+    }
+
+    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
+    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "a"));
+    TopSuggestDocsCollector collector = new TopSuggestDocsCollector(2, true);
+    suggestIndexSearcher.suggest(query, collector);
+    TopSuggestDocs lookupDocs = collector.get();
+    assertSuggestions(lookupDocs, expectedEntries);
+
+    reader.close();
+    iw.close();
+  }
+
+  public void testExtremeDeduplication() throws Exception {
+    Analyzer analyzer = new MockAnalyzer(random());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
+    final int num = atLeast(5000);
+    int bestWeight = Integer.MIN_VALUE;
+    for(int i = 0; i < num; i++) {
+      Document document = new Document();
+      int weight = TestUtil.nextInt(random(), 10, 100);
+      bestWeight = Math.max(weight, bestWeight);
+      document.add(new SuggestField("suggest_field", "abc", weight));
+      iw.addDocument(document);
+      if (rarely()) {
+        iw.commit();
+      }
+    }
+    Document document = new Document();
+    document.add(new SuggestField("suggest_field", "abd", 7));
+    iw.addDocument(document);
+
+    if (random().nextBoolean()) {
+      iw.forceMerge(1);
+    }
+
+    DirectoryReader reader = iw.getReader();
+    Entry[] expectedEntries = new Entry[2];
+    expectedEntries[0] = new Entry("abc", bestWeight);
+    expectedEntries[1] = new Entry("abd", 7);
+
+    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
+    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "a"));
+    TopSuggestDocsCollector collector = new TopSuggestDocsCollector(2, true);
+    suggestIndexSearcher.suggest(query, collector);
+    TopSuggestDocs lookupDocs = collector.get();
+    assertSuggestions(lookupDocs, expectedEntries);
+
+    reader.close();
+    iw.close();
+  }
+  
+  private static String randomSimpleString(int numDigits, int maxLen) {
+    final int len = TestUtil.nextInt(random(), 1, maxLen);
+    final char[] chars = new char[len];
+    for(int j=0;j<len;j++) {
+      chars[j] = (char) ('a' + random().nextInt(numDigits));
+    }
+    return new String(chars);
+  }
+
+  public void testRandom() throws Exception {
+    int numDigits = TestUtil.nextInt(random(), 1, 6);
+    Set<String> keys = new HashSet<>();
+    int keyCount = TestUtil.nextInt(random(), 1, 20);
+    if (numDigits == 1) {
+      keyCount = Math.min(9, keyCount);
+    }
+    while (keys.size() < keyCount) {
+      keys.add(randomSimpleString(numDigits, 10));
+    }
+    List<String> keysList = new ArrayList<>(keys);
+
+    Analyzer analyzer = new MockAnalyzer(random());
+    IndexWriterConfig iwc = iwcWithSuggestField(analyzer, "suggest_field");
+    // we rely on docID order:
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    int docCount = TestUtil.nextInt(random(), 1, 200);
+    Entry[] docs = new Entry[docCount];
+    for(int i=0;i<docCount;i++) {
+      int weight = random().nextInt(40);
+      String key = keysList.get(random().nextInt(keyCount));
+      //System.out.println("KEY: " + key);
+      docs[i] = new Entry(key, null, weight, i);
+      Document doc = new Document();
+      doc.add(new SuggestField("suggest_field", key, weight));
+      iw.addDocument(doc);
+      if (usually()) {
+        iw.commit();
+      }
+    }
+
+    DirectoryReader reader = iw.getReader();
+    SuggestIndexSearcher searcher = new SuggestIndexSearcher(reader);
+
+    int iters = atLeast(200);
+    for(int iter=0;iter<iters;iter++) {
+      String prefix = randomSimpleString(numDigits, 2);
+      if (VERBOSE) {
+        System.out.println("\nTEST: prefix=" + prefix);
+      }
+
+      // slow but hopefully correct suggester:
+      List<Entry> expected = new ArrayList<>();
+      for(Entry doc : docs) {
+        if (doc.output.startsWith(prefix)) {
+          expected.add(doc);
+        }
+      }
+      Collections.sort(expected,
+                       new Comparator<Entry>() {
+                         @Override
+                         public int compare(Entry a, Entry b) {
+                           // sort by higher score:
+                           int cmp = Float.compare(b.value, a.value);
+                           if (cmp == 0) {
+                             // tie break by smaller docID:
+                             cmp = Integer.compare(a.id, b.id);
+                           }
+                           return cmp;
+                         }
+                       });
+
+      boolean dedup = random().nextBoolean();
+      if (dedup) {
+        List<Entry> deduped = new ArrayList<>();
+        Set<String> seen = new HashSet<>();
+        for(Entry entry : expected) {
+          if (seen.contains(entry.output) == false) {
+            seen.add(entry.output);
+            deduped.add(entry);
+          }
+        }
+        expected = deduped;
+      }
+
+      // TODO: re-enable this, except something is buggy about tie breaks at the topN threshold now:
+      //int topN = TestUtil.nextInt(random(), 1, docCount+10);
+      int topN = docCount;
+      
+      if (VERBOSE) {
+        if (dedup) {
+          System.out.println("  expected (dedup'd) topN=" + topN + ":");
+        } else {
+          System.out.println("  expected topN=" + topN + ":");
+        }
+        for(int i=0;i<expected.size();i++) {
+          if (i >= topN) {
+            System.out.println("    leftover: " + i + ": " + expected.get(i));
+          } else {
+            System.out.println("    " + i + ": " + expected.get(i));
+          }
+        }
+      }
+      expected = expected.subList(0, Math.min(topN, expected.size()));
+      
+      PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", prefix));
+      TopSuggestDocsCollector collector = new TopSuggestDocsCollector(topN, dedup);
+      searcher.suggest(query, collector);
+      TopSuggestDocs actual = collector.get();
+      if (VERBOSE) {
+        System.out.println("  actual:");
+        SuggestScoreDoc[] suggestScoreDocs = (SuggestScoreDoc[]) actual.scoreDocs;
+        for(int i=0;i<suggestScoreDocs.length;i++) {
+          System.out.println("    " + i + ": " + suggestScoreDocs[i]);
+        }
+      }
+
+      assertSuggestions(actual, expected.toArray(new Entry[expected.size()]));
+    }
+    
+    reader.close();
+    iw.close();
+  }
+
  @Test
  public void testNRTDeletedDocFiltering() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
@ -214,7 +435,7 @@ public class TestSuggestField extends LuceneTestCase {
    DirectoryReader reader = DirectoryReader.open(iw);
    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
-    TopSuggestDocs suggest = indexSearcher.suggest(query, numLive);
+    TopSuggestDocs suggest = indexSearcher.suggest(query, numLive, false);
    assertSuggestions(suggest, expectedEntries.toArray(new Entry[expectedEntries.size()]));

    reader.close();
@ -248,7 +469,7 @@ public class TestSuggestField extends LuceneTestCase {
    // no random access required;
    // calling suggest with filter that does not match any documents should early terminate
    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
-    TopSuggestDocs suggest = indexSearcher.suggest(query, num);
+    TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
    assertThat(suggest.totalHits, equalTo(0));
    reader.close();
    iw.close();
@ -276,7 +497,7 @@ public class TestSuggestField extends LuceneTestCase {
    DirectoryReader reader = DirectoryReader.open(iw);
    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
-    TopSuggestDocs suggest = indexSearcher.suggest(query, num);
+    TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
    assertThat(suggest.totalHits, equalTo(0));

    reader.close();
@ -306,7 +527,7 @@ public class TestSuggestField extends LuceneTestCase {
    DirectoryReader reader = DirectoryReader.open(iw);
    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
-    TopSuggestDocs suggest = indexSearcher.suggest(query, 1);
+    TopSuggestDocs suggest = indexSearcher.suggest(query, 1, false);
    assertSuggestions(suggest, new Entry("abc_1", 1));

    reader.close();
@ -335,10 +556,10 @@ public class TestSuggestField extends LuceneTestCase {

    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("sug_field_1", "ap"));
-    TopSuggestDocs suggestDocs1 = suggestIndexSearcher.suggest(query, 4);
+    TopSuggestDocs suggestDocs1 = suggestIndexSearcher.suggest(query, 4, false);
    assertSuggestions(suggestDocs1, new Entry("apple", 4), new Entry("aples", 3));
    query = new PrefixCompletionQuery(analyzer, new Term("sug_field_2", "ap"));
-    TopSuggestDocs suggestDocs2 = suggestIndexSearcher.suggest(query, 4);
+    TopSuggestDocs suggestDocs2 = suggestIndexSearcher.suggest(query, 4, false);
    assertSuggestions(suggestDocs2, new Entry("april", 3), new Entry("apartment", 2));

    // check that the doc ids are consistent
@ -372,7 +593,7 @@ public class TestSuggestField extends LuceneTestCase {
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
-    TopSuggestDocs suggest = indexSearcher.suggest(query, 1);
+    TopSuggestDocs suggest = indexSearcher.suggest(query, 1, false);
    assertSuggestions(suggest, new Entry("abc_" + num, num));

    reader.close();
@ -402,7 +623,7 @@ public class TestSuggestField extends LuceneTestCase {
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
-    TopSuggestDocs suggest = indexSearcher.suggest(query, (entries.size() == 0) ? 1 : entries.size());
+    TopSuggestDocs suggest = indexSearcher.suggest(query, (entries.size() == 0) ? 1 : entries.size(), false);
    assertSuggestions(suggest, entries.toArray(new Entry[entries.size()]));

    reader.close();
@ -430,7 +651,7 @@ public class TestSuggestField extends LuceneTestCase {
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
-    TopSuggestDocs suggest = indexSearcher.suggest(query, num);
+    TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
    assertEquals(num, suggest.totalHits);
    for (SuggestScoreDoc suggestScoreDoc : suggest.scoreLookupDocs()) {
      String key = suggestScoreDoc.key.toString();
@ -456,7 +677,7 @@ public class TestSuggestField extends LuceneTestCase {
    for (int i = 0; i < num; i++) {
      Document document = new Document();
      String suggest = prefixes[i % 3] + TestUtil.randomSimpleString(random(), 10) + "_" +String.valueOf(i);
-      int weight = Math.abs(random().nextInt());
+      int weight = random().nextInt(Integer.MAX_VALUE);
      document.add(new SuggestField("suggest_field", suggest, weight));
      mappings.put(suggest, weight);
      iw.addDocument(document);
@ -470,7 +691,7 @@ public class TestSuggestField extends LuceneTestCase {
    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
    for (String prefix : prefixes) {
      PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", prefix));
-      TopSuggestDocs suggest = indexSearcher.suggest(query, num);
+      TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
      assertTrue(suggest.totalHits > 0);
      float topScore = -1;
      for (SuggestScoreDoc scoreDoc : suggest.scoreLookupDocs()) {
@ -498,7 +719,7 @@ public class TestSuggestField extends LuceneTestCase {
    for (int i = 0; i < num; i++) {
      Document document = lineFileDocs.nextDoc();
      String title = document.getField("title").stringValue();
-      int weight = Math.abs(random().nextInt());
+      int weight = random().nextInt(Integer.MAX_VALUE);
      Integer prevWeight = mappings.get(title);
      if (prevWeight == null || prevWeight < weight) {
        mappings.put(title, weight);
@ -519,7 +740,7 @@ public class TestSuggestField extends LuceneTestCase {
      String title = entry.getKey();

      PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", title));
-      TopSuggestDocs suggest = indexSearcher.suggest(query, mappings.size());
+      TopSuggestDocs suggest = indexSearcher.suggest(query, mappings.size(), false);
      assertTrue(suggest.totalHits > 0);
      boolean matched = false;
      for (ScoreDoc scoreDoc : suggest.scoreDocs) {
@ -577,13 +798,13 @@ public class TestSuggestField extends LuceneTestCase {
          try {
            startingGun.await();
            PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_1", prefix1));
-            TopSuggestDocs suggest = indexSearcher.suggest(query, num);
+            TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
            assertSuggestions(suggest, entries1);
            query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_2", prefix2));
-            suggest = indexSearcher.suggest(query, num);
+            suggest = indexSearcher.suggest(query, num, false);
            assertSuggestions(suggest, entries2);
            query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_3", prefix3));
-            suggest = indexSearcher.suggest(query, num);
+            suggest = indexSearcher.suggest(query, num, false);
            assertSuggestions(suggest, entries3);
          } catch (Throwable e) {
            errors.add(e);
@ -607,28 +828,39 @@ public class TestSuggestField extends LuceneTestCase {
    final String output;
    final float value;
    final String context;
+    final int id;

    Entry(String output, float value) {
      this(output, null, value);
    }

    Entry(String output, String context, float value) {
+      this(output, context, value, -1);
+    }
+
+    Entry(String output, String context, float value, int id) {
      this.output = output;
      this.value = value;
      this.context = context;
+      this.id = id;
+    }
+
+    @Override
+    public String toString() {
+      return "key=" + output + " score=" + value + " context=" + context + " id=" + id;
    }
  }

  static void assertSuggestions(TopDocs actual, Entry... expected) {
    SuggestScoreDoc[] suggestScoreDocs = (SuggestScoreDoc[]) actual.scoreDocs;
-    assertThat(suggestScoreDocs.length, equalTo(expected.length));
-    for (int i = 0; i < suggestScoreDocs.length; i++) {
+    for (int i = 0; i < Math.min(expected.length, suggestScoreDocs.length); i++) {
      SuggestScoreDoc lookupDoc = suggestScoreDocs[i];
-      String msg = "Expected: " + toString(expected[i]) + " Actual: " + toString(lookupDoc);
+      String msg = "Hit " + i + ": expected: " + toString(expected[i]) + " but actual: " + toString(lookupDoc);
      assertThat(msg, lookupDoc.key.toString(), equalTo(expected[i].output));
      assertThat(msg, lookupDoc.score, equalTo(expected[i].value));
      assertThat(msg, lookupDoc.context, equalTo(expected[i].context));
    }
+    assertThat(suggestScoreDocs.length, equalTo(expected.length));
  }

  private static String toString(Entry expected) {