LUCENE-7686: add efficient de-duping to the NRT document suggester

This commit is contained in:
Mike McCandless 2017-02-22 16:04:26 -05:00
parent 29a5ea44a7
commit 4e2cf61ac7
15 changed files with 517 additions and 123 deletions

View File

@ -126,6 +126,10 @@ New Features
* LUCENE-7688: Add OneMergeWrappingMergePolicy class. * LUCENE-7688: Add OneMergeWrappingMergePolicy class.
(Keith Laban, Christine Poerschke) (Keith Laban, Christine Poerschke)
* LUCENE-7686: The near-real-time document suggester can now
efficiently filter out duplicate suggestions (Uwe Schindler, Mike
McCandless)
Bug Fixes Bug Fixes
* LUCENE-7630: Fix (Edge)NGramTokenFilter to no longer drop payloads * LUCENE-7630: Fix (Edge)NGramTokenFilter to no longer drop payloads

View File

@ -248,32 +248,38 @@ public final class Util {
* @lucene.experimental * @lucene.experimental
*/ */
public static class FSTPath<T> { public static class FSTPath<T> {
/** Holds the last arc appended to this path */
public FST.Arc<T> arc; public FST.Arc<T> arc;
public T cost; /** Holds cost plus any usage-specific output: */
public T output;
public final IntsRefBuilder input; public final IntsRefBuilder input;
public final float boost; public final float boost;
public final CharSequence context; public final CharSequence context;
// Custom int payload for consumers; the NRT suggester uses this to record if this path has already enumerated a surface form
public int payload;
/** Sole constructor */ /** Sole constructor */
public FSTPath(T cost, FST.Arc<T> arc, IntsRefBuilder input) { public FSTPath(T output, FST.Arc<T> arc, IntsRefBuilder input) {
this(cost, arc, input, 0, null); this(output, arc, input, 0, null, -1);
} }
public FSTPath(T cost, FST.Arc<T> arc, IntsRefBuilder input, float boost, CharSequence context) { public FSTPath(T output, FST.Arc<T> arc, IntsRefBuilder input, float boost, CharSequence context, int payload) {
this.arc = new FST.Arc<T>().copyFrom(arc); this.arc = new FST.Arc<T>().copyFrom(arc);
this.cost = cost; this.output = output;
this.input = input; this.input = input;
this.boost = boost; this.boost = boost;
this.context = context; this.context = context;
this.payload = payload;
} }
public FSTPath<T> newPath(T cost, IntsRefBuilder input) { public FSTPath<T> newPath(T output, IntsRefBuilder input) {
return new FSTPath<>(cost, this.arc, input, this.boost, this.context); return new FSTPath<>(output, this.arc, input, this.boost, this.context, this.payload);
} }
@Override @Override
public String toString() { public String toString() {
return "input=" + input.get() + " cost=" + cost + "context=" + context + "boost=" + boost; return "input=" + input.get() + " output=" + output + " context=" + context + " boost=" + boost + " payload=" + payload;
} }
} }
@ -287,7 +293,7 @@ public final class Util {
@Override @Override
public int compare(FSTPath<T> a, FSTPath<T> b) { public int compare(FSTPath<T> a, FSTPath<T> b) {
int cmp = comparator.compare(a.cost, b.cost); int cmp = comparator.compare(a.output, b.output);
if (cmp == 0) { if (cmp == 0) {
return a.input.get().compareTo(b.input.get()); return a.input.get().compareTo(b.input.get());
} else { } else {
@ -339,8 +345,7 @@ public final class Util {
assert queue != null; assert queue != null;
T cost = fst.outputs.add(path.cost, path.arc.output); T output = fst.outputs.add(path.output, path.arc.output);
//System.out.println(" addIfCompetitive queue.size()=" + queue.size() + " path=" + path + " + label=" + path.arc.label);
if (queue.size() == maxQueueDepth) { if (queue.size() == maxQueueDepth) {
FSTPath<T> bottom = queue.last(); FSTPath<T> bottom = queue.last();
@ -373,32 +378,32 @@ public final class Util {
newInput.copyInts(path.input.get()); newInput.copyInts(path.input.get());
newInput.append(path.arc.label); newInput.append(path.arc.label);
queue.add(path.newPath(cost, newInput)); FSTPath<T> newPath = path.newPath(output, newInput);
if (acceptPartialPath(newPath)) {
queue.add(newPath);
if (queue.size() == maxQueueDepth+1) { if (queue.size() == maxQueueDepth+1) {
queue.pollLast(); queue.pollLast();
} }
} }
}
public void addStartPaths(FST.Arc<T> node, T startOutput, boolean allowEmptyString, IntsRefBuilder input) throws IOException { public void addStartPaths(FST.Arc<T> node, T startOutput, boolean allowEmptyString, IntsRefBuilder input) throws IOException {
addStartPaths(node, startOutput, allowEmptyString, input, 0, null); addStartPaths(node, startOutput, allowEmptyString, input, 0, null, -1);
} }
/** Adds all leaving arcs, including 'finished' arc, if /** Adds all leaving arcs, including 'finished' arc, if
* the node is final, from this node into the queue. */ * the node is final, from this node into the queue. */
public void addStartPaths(FST.Arc<T> node, T startOutput, boolean allowEmptyString, IntsRefBuilder input, public void addStartPaths(FST.Arc<T> node, T startOutput, boolean allowEmptyString, IntsRefBuilder input,
float boost, CharSequence context) throws IOException { float boost, CharSequence context, int payload) throws IOException {
// De-dup NO_OUTPUT since it must be a singleton: // De-dup NO_OUTPUT since it must be a singleton:
if (startOutput.equals(fst.outputs.getNoOutput())) { if (startOutput.equals(fst.outputs.getNoOutput())) {
startOutput = fst.outputs.getNoOutput(); startOutput = fst.outputs.getNoOutput();
} }
FSTPath<T> path = new FSTPath<>(startOutput, node, input, boost, context); FSTPath<T> path = new FSTPath<>(startOutput, node, input, boost, context, payload);
fst.readFirstTargetArc(node, path.arc, bytesReader); fst.readFirstTargetArc(node, path.arc, bytesReader);
//System.out.println("add start paths");
// Bootstrap: find the min starting arc // Bootstrap: find the min starting arc
while (true) { while (true) {
if (allowEmptyString || path.arc.label != FST.END_LABEL) { if (allowEmptyString || path.arc.label != FST.END_LABEL) {
@ -415,8 +420,6 @@ public final class Util {
final List<Result<T>> results = new ArrayList<>(); final List<Result<T>> results = new ArrayList<>();
//System.out.println("search topN=" + topN);
final BytesReader fstReader = fst.getBytesReader(); final BytesReader fstReader = fst.getBytesReader();
final T NO_OUTPUT = fst.outputs.getNoOutput(); final T NO_OUTPUT = fst.outputs.getNoOutput();
@ -430,13 +433,11 @@ public final class Util {
// For each top N path: // For each top N path:
while (results.size() < topN) { while (results.size() < topN) {
//System.out.println("\nfind next path: queue.size=" + queue.size());
FSTPath<T> path; FSTPath<T> path;
if (queue == null) { if (queue == null) {
// Ran out of paths // Ran out of paths
//System.out.println(" break queue=null");
break; break;
} }
@ -446,15 +447,18 @@ public final class Util {
if (path == null) { if (path == null) {
// There were less than topN paths available: // There were less than topN paths available:
//System.out.println(" break no more paths");
break; break;
} }
//System.out.println("pop path=" + path + " arc=" + path.arc.output);
if (acceptPartialPath(path) == false) {
continue;
}
if (path.arc.label == FST.END_LABEL) { if (path.arc.label == FST.END_LABEL) {
//System.out.println(" empty string! cost=" + path.cost);
// Empty string! // Empty string!
path.input.setLength(path.input.length() - 1); path.input.setLength(path.input.length() - 1);
results.add(new Result<>(path.input.get(), path.cost)); results.add(new Result<>(path.input.get(), path.output));
continue; continue;
} }
@ -463,8 +467,6 @@ public final class Util {
queue = null; queue = null;
} }
//System.out.println(" path: " + path);
// We take path and find its "0 output completion", // We take path and find its "0 output completion",
// ie, just keep traversing the first arc with // ie, just keep traversing the first arc with
// NO_OUTPUT that we can find, since this must lead // NO_OUTPUT that we can find, since this must lead
@ -474,13 +476,11 @@ public final class Util {
// For each input letter: // For each input letter:
while (true) { while (true) {
//System.out.println("\n cycle path: " + path);
fst.readFirstTargetArc(path.arc, path.arc, fstReader); fst.readFirstTargetArc(path.arc, path.arc, fstReader);
// For each arc leaving this node: // For each arc leaving this node:
boolean foundZero = false; boolean foundZero = false;
while(true) { while(true) {
//System.out.println(" arc=" + (char) path.arc.label + " cost=" + path.arc.output);
// tricky: instead of comparing output == 0, we must // tricky: instead of comparing output == 0, we must
// express it via the comparator compare(output, 0) == 0 // express it via the comparator compare(output, 0) == 0
if (comparator.compare(NO_OUTPUT, path.arc.output) == 0) { if (comparator.compare(NO_OUTPUT, path.arc.output) == 0) {
@ -514,18 +514,19 @@ public final class Util {
if (path.arc.label == FST.END_LABEL) { if (path.arc.label == FST.END_LABEL) {
// Add final output: // Add final output:
//System.out.println(" done!: " + path); path.output = fst.outputs.add(path.output, path.arc.output);
path.cost = fst.outputs.add(path.cost, path.arc.output);
if (acceptResult(path)) { if (acceptResult(path)) {
//System.out.println(" add result: " + path); results.add(new Result<>(path.input.get(), path.output));
results.add(new Result<>(path.input.get(), path.cost));
} else { } else {
rejectCount++; rejectCount++;
} }
break; break;
} else { } else {
path.input.append(path.arc.label); path.input.append(path.arc.label);
path.cost = fst.outputs.add(path.cost, path.arc.output); path.output = fst.outputs.add(path.output, path.arc.output);
if (acceptPartialPath(path) == false) {
break;
}
} }
} }
} }
@ -533,7 +534,12 @@ public final class Util {
} }
protected boolean acceptResult(FSTPath<T> path) { protected boolean acceptResult(FSTPath<T> path) {
return acceptResult(path.input.get(), path.cost); return acceptResult(path.input.get(), path.output);
}
/** Override this to prevent considering a path before it's complete */
protected boolean acceptPartialPath(FSTPath<T> path) {
return true;
} }
protected boolean acceptResult(IntsRef input, T output) { protected boolean acceptResult(IntsRef input, T output) {

View File

@ -81,7 +81,7 @@ public final class CompletionAnalyzer extends AnalyzerWrapper {
private final int maxGraphExpansions; private final int maxGraphExpansions;
/** /**
* Wraps an analyzer to convert it's output token stream to an automaton * Wraps an analyzer to convert its output token stream to an automaton
* *
* @param analyzer token stream to be converted to an automaton * @param analyzer token stream to be converted to an automaton
* @param preserveSep Preserve separation between tokens when converting to an automaton * @param preserveSep Preserve separation between tokens when converting to an automaton

View File

@ -34,7 +34,7 @@ import static org.apache.lucene.search.suggest.document.CompletionAnalyzer.SEP_L
* filtered by {@link BitsProducer}. This should be used to query against any {@link SuggestField}s * filtered by {@link BitsProducer}. This should be used to query against any {@link SuggestField}s
* or {@link ContextSuggestField}s of documents. * or {@link ContextSuggestField}s of documents.
* <p> * <p>
* Use {@link SuggestIndexSearcher#suggest(CompletionQuery, int)} to execute any query * Use {@link SuggestIndexSearcher#suggest(CompletionQuery, int, boolean)} to execute any query
* that provides a concrete implementation of this query. Example below shows using this query * that provides a concrete implementation of this query. Example below shows using this query
* to retrieve the top 5 documents. * to retrieve the top 5 documents.
* *

View File

@ -32,12 +32,11 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.fst.ByteSequenceOutputs; import org.apache.lucene.util.fst.ByteSequenceOutputs;
import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PairOutputs;
import org.apache.lucene.util.fst.PairOutputs.Pair; import org.apache.lucene.util.fst.PairOutputs.Pair;
import org.apache.lucene.util.fst.PairOutputs;
import org.apache.lucene.util.fst.PositiveIntOutputs; import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util; import org.apache.lucene.util.fst.Util;
import static org.apache.lucene.search.suggest.document.NRTSuggester.PayLoadProcessor.parseDocID;
import static org.apache.lucene.search.suggest.document.NRTSuggester.PayLoadProcessor.parseSurfaceForm; import static org.apache.lucene.search.suggest.document.NRTSuggester.PayLoadProcessor.parseSurfaceForm;
/** /**
@ -142,21 +141,74 @@ public final class NRTSuggester implements Accountable {
// maximum number of suggestions that can be collected. // maximum number of suggestions that can be collected.
final int topN = collector.getCountToCollect() * prefixPaths.size(); final int topN = collector.getCountToCollect() * prefixPaths.size();
final int queueSize = getMaxTopNSearcherQueueSize(topN, scorer.reader.numDocs(), liveDocsRatio, scorer.filtered); final int queueSize = getMaxTopNSearcherQueueSize(topN, scorer.reader.numDocs(), liveDocsRatio, scorer.filtered);
final CharsRefBuilder spare = new CharsRefBuilder();
Comparator<Pair<Long, BytesRef>> comparator = getComparator(); Comparator<Pair<Long, BytesRef>> comparator = getComparator();
Util.TopNSearcher<Pair<Long, BytesRef>> searcher = new Util.TopNSearcher<Pair<Long, BytesRef>>(fst, topN, queueSize, comparator, Util.TopNSearcher<Pair<Long, BytesRef>> searcher = new Util.TopNSearcher<Pair<Long, BytesRef>>(fst, topN, queueSize, comparator,
new ScoringPathComparator(scorer)) { new ScoringPathComparator(scorer)) {
private final CharsRefBuilder spare = new CharsRefBuilder(); private final ByteArrayDataInput scratchInput = new ByteArrayDataInput();
@Override
protected boolean acceptPartialPath(Util.FSTPath<Pair<Long,BytesRef>> path) {
if (collector.doSkipDuplicates()) {
// We are removing dups
if (path.payload == -1) {
// This path didn't yet see the complete surface form; let's see if it just did with the arc output we just added:
BytesRef arcOutput = path.arc.output.output2;
BytesRef output = path.output.output2;
for(int i=0;i<arcOutput.length;i++) {
if (arcOutput.bytes[arcOutput.offset + i] == payloadSep) {
// OK this arc that the path was just extended by contains the payloadSep, so we now have a full surface form in this path
path.payload = output.length - arcOutput.length + i;
assert output.bytes[output.offset + path.payload] == payloadSep;
break;
}
}
}
if (path.payload != -1) {
BytesRef output = path.output.output2;
spare.copyUTF8Bytes(output.bytes, output.offset, path.payload);
if (collector.seenSurfaceForms.contains(spare.chars(), 0, spare.length())) {
return false;
}
}
}
return true;
}
@Override @Override
protected boolean acceptResult(Util.FSTPath<Pair<Long, BytesRef>> path) { protected boolean acceptResult(Util.FSTPath<Pair<Long, BytesRef>> path) {
int payloadSepIndex = parseSurfaceForm(path.cost.output2, payloadSep, spare); BytesRef output = path.output.output2;
int docID = parseDocID(path.cost.output2, payloadSepIndex); int payloadSepIndex;
if (path.payload != -1) {
payloadSepIndex = path.payload;
spare.copyUTF8Bytes(output.bytes, output.offset, payloadSepIndex);
} else {
assert collector.doSkipDuplicates() == false;
payloadSepIndex = parseSurfaceForm(output, payloadSep, spare);
}
scratchInput.reset(output.bytes, output.offset + payloadSepIndex + 1, output.length - payloadSepIndex - 1);
int docID = scratchInput.readVInt();
if (!scorer.accept(docID, acceptDocs)) { if (!scorer.accept(docID, acceptDocs)) {
return false; return false;
} }
if (collector.doSkipDuplicates()) {
// now record that we've seen this surface form:
char[] key = new char[spare.length()];
System.arraycopy(spare.chars(), 0, key, 0, spare.length());
if (collector.seenSurfaceForms.contains(key)) {
// we already collected a higher scoring document with this key, in this segment:
return false;
}
collector.seenSurfaceForms.add(key);
}
try { try {
float score = scorer.score(decode(path.cost.output1), path.boost); float score = scorer.score(decode(path.output.output1), path.boost);
collector.collect(docID, spare.toCharsRef(), path.context, score); collector.collect(docID, spare.toCharsRef(), path.context, score);
return true; return true;
} catch (IOException e) { } catch (IOException e) {
@ -167,8 +219,20 @@ public final class NRTSuggester implements Accountable {
for (FSTUtil.Path<Pair<Long, BytesRef>> path : prefixPaths) { for (FSTUtil.Path<Pair<Long, BytesRef>> path : prefixPaths) {
scorer.weight.setNextMatch(path.input.get()); scorer.weight.setNextMatch(path.input.get());
BytesRef output = path.output.output2;
int payload = -1;
if (collector.doSkipDuplicates()) {
for(int j=0;j<output.length;j++) {
if (output.bytes[output.offset+j] == payloadSep) {
// Important to cache this, else we have a possibly O(N^2) cost where N is the length of suggestions
payload = j;
break;
}
}
}
searcher.addStartPaths(path.fstNode, path.output, false, path.input, scorer.weight.boost(), searcher.addStartPaths(path.fstNode, path.output, false, path.input, scorer.weight.boost(),
scorer.weight.context()); scorer.weight.context(), payload);
} }
// hits are also returned by search() // hits are also returned by search()
// we do not use it, instead collect at acceptResult // we do not use it, instead collect at acceptResult
@ -191,8 +255,8 @@ public final class NRTSuggester implements Accountable {
@Override @Override
public int compare(Util.FSTPath<Pair<Long, BytesRef>> first, Util.FSTPath<Pair<Long, BytesRef>> second) { public int compare(Util.FSTPath<Pair<Long, BytesRef>> first, Util.FSTPath<Pair<Long, BytesRef>> second) {
int cmp = Float.compare(scorer.score(decode(second.cost.output1), second.boost), int cmp = Float.compare(scorer.score(decode(second.output.output1), second.boost),
scorer.score(decode(first.cost.output1), first.boost)); scorer.score(decode(first.output.output1), first.boost));
return (cmp != 0) ? cmp : first.input.get().compareTo(second.input.get()); return (cmp != 0) ? cmp : first.input.get().compareTo(second.input.get());
} }
} }
@ -285,13 +349,6 @@ public final class NRTSuggester implements Accountable {
return surfaceFormLen; return surfaceFormLen;
} }
static int parseDocID(final BytesRef output, int payloadSepIndex) {
assert payloadSepIndex != -1 : "payload sep index can not be -1";
ByteArrayDataInput input = new ByteArrayDataInput(output.bytes, payloadSepIndex + output.offset + 1,
output.length - (payloadSepIndex + output.offset));
return input.readVInt();
}
static BytesRef make(final BytesRef surface, int docID, int payloadSep) throws IOException { static BytesRef make(final BytesRef surface, int docID, int payloadSep) throws IOException {
int len = surface.length + MAX_DOC_ID_LEN_WITH_SEP; int len = surface.length + MAX_DOC_ID_LEN_WITH_SEP;
byte[] buffer = new byte[len]; byte[] buffer = new byte[len];

View File

@ -47,7 +47,7 @@ import org.apache.lucene.util.BytesRef;
* document.add(new SuggestField(name, "suggestion", 4)); * document.add(new SuggestField(name, "suggestion", 4));
* </pre> * </pre>
* To perform document suggestions based on the this field, use * To perform document suggestions based on the this field, use
* {@link SuggestIndexSearcher#suggest(CompletionQuery, int)} * {@link SuggestIndexSearcher#suggest(CompletionQuery, int, boolean)}
* *
* @lucene.experimental * @lucene.experimental
*/ */

View File

@ -38,6 +38,9 @@ import org.apache.lucene.search.Weight;
*/ */
public class SuggestIndexSearcher extends IndexSearcher { public class SuggestIndexSearcher extends IndexSearcher {
// NOTE: we do not accept an ExecutorService here, because at least the dedup
// logic in TopSuggestDocsCollector/NRTSuggester would not be thread safe (and maybe other things)
/** /**
* Creates a searcher with document suggest capabilities * Creates a searcher with document suggest capabilities
* for <code>reader</code>. * for <code>reader</code>.
@ -50,8 +53,8 @@ public class SuggestIndexSearcher extends IndexSearcher {
* Returns top <code>n</code> completion hits for * Returns top <code>n</code> completion hits for
* <code>query</code> * <code>query</code>
*/ */
public TopSuggestDocs suggest(CompletionQuery query, int n) throws IOException { public TopSuggestDocs suggest(CompletionQuery query, int n, boolean skipDuplicates) throws IOException {
TopSuggestDocsCollector collector = new TopSuggestDocsCollector(n); TopSuggestDocsCollector collector = new TopSuggestDocsCollector(n, skipDuplicates);
suggest(query, collector); suggest(query, collector);
return collector.get(); return collector.get();
} }

View File

@ -66,6 +66,25 @@ public class TopSuggestDocs extends TopDocs {
public int compareTo(SuggestScoreDoc o) { public int compareTo(SuggestScoreDoc o) {
return Lookup.CHARSEQUENCE_COMPARATOR.compare(key, o.key); return Lookup.CHARSEQUENCE_COMPARATOR.compare(key, o.key);
} }
@Override
public boolean equals(Object other) {
if (other instanceof SuggestScoreDoc == false) {
return false;
} else {
return key.equals(((SuggestScoreDoc) other).key);
}
}
@Override
public int hashCode() {
return key.hashCode();
}
@Override
public String toString() {
return "key=" + key + " doc=" + doc + " score=" + score + " shardIndex=" + shardIndex;
}
} }
/** /**

View File

@ -17,7 +17,12 @@
package org.apache.lucene.search.suggest.document; package org.apache.lucene.search.suggest.document;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.CollectionTerminatedException; import org.apache.lucene.search.CollectionTerminatedException;
import org.apache.lucene.search.SimpleCollector; import org.apache.lucene.search.SimpleCollector;
@ -47,9 +52,13 @@ public class TopSuggestDocsCollector extends SimpleCollector {
private final SuggestScoreDocPriorityQueue priorityQueue; private final SuggestScoreDocPriorityQueue priorityQueue;
private final int num; private final int num;
/** /** Only set if we are deduplicating hits: holds all per-segment hits until the end, when we dedup them */
* Document base offset for the current Leaf private final List<SuggestScoreDoc> pendingResults;
*/
/** Only set if we are deduplicating hits: holds all surface forms seen so far in the current segment */
final CharArraySet seenSurfaceForms;
/** Document base offset for the current Leaf */
protected int docBase; protected int docBase;
/** /**
@ -58,12 +67,24 @@ public class TopSuggestDocsCollector extends SimpleCollector {
* Collects at most <code>num</code> completions * Collects at most <code>num</code> completions
* with corresponding document and weight * with corresponding document and weight
*/ */
public TopSuggestDocsCollector(int num) { public TopSuggestDocsCollector(int num, boolean skipDuplicates) {
if (num <= 0) { if (num <= 0) {
throw new IllegalArgumentException("'num' must be > 0"); throw new IllegalArgumentException("'num' must be > 0");
} }
this.num = num; this.num = num;
this.priorityQueue = new SuggestScoreDocPriorityQueue(num); this.priorityQueue = new SuggestScoreDocPriorityQueue(num);
if (skipDuplicates) {
seenSurfaceForms = new CharArraySet(num, false);
pendingResults = new ArrayList<>();
} else {
seenSurfaceForms = null;
pendingResults = null;
}
}
/** Returns true if duplicates are filtered out */
protected boolean doSkipDuplicates() {
return seenSurfaceForms != null;
} }
/** /**
@ -76,6 +97,13 @@ public class TopSuggestDocsCollector extends SimpleCollector {
@Override @Override
protected void doSetNextReader(LeafReaderContext context) throws IOException { protected void doSetNextReader(LeafReaderContext context) throws IOException {
docBase = context.docBase; docBase = context.docBase;
if (seenSurfaceForms != null) {
seenSurfaceForms.clear();
// NOTE: this also clears the priorityQueue:
for (SuggestScoreDoc hit : priorityQueue.getResults()) {
pendingResults.add(hit);
}
}
} }
/** /**
@ -101,7 +129,52 @@ public class TopSuggestDocsCollector extends SimpleCollector {
* Returns at most <code>num</code> Top scoring {@link org.apache.lucene.search.suggest.document.TopSuggestDocs}s * Returns at most <code>num</code> Top scoring {@link org.apache.lucene.search.suggest.document.TopSuggestDocs}s
*/ */
public TopSuggestDocs get() throws IOException { public TopSuggestDocs get() throws IOException {
SuggestScoreDoc[] suggestScoreDocs = priorityQueue.getResults();
SuggestScoreDoc[] suggestScoreDocs;
if (seenSurfaceForms != null) {
// NOTE: this also clears the priorityQueue:
for (SuggestScoreDoc hit : priorityQueue.getResults()) {
pendingResults.add(hit);
}
// Deduplicate all hits: we already dedup'd efficiently within each segment by
// truncating the FST top paths search, but across segments there may still be dups:
seenSurfaceForms.clear();
// TODO: we could use a priority queue here to make cost O(N * log(num)) instead of O(N * log(N)), where N = O(num *
// numSegments), but typically numSegments is smallish and num is smallish so this won't matter much in practice:
Collections.sort(pendingResults,
new Comparator<SuggestScoreDoc>() {
@Override
public int compare(SuggestScoreDoc a, SuggestScoreDoc b) {
// sort by higher score
int cmp = Float.compare(b.score, a.score);
if (cmp == 0) {
// tie break by lower docID:
cmp = Integer.compare(a.doc, b.doc);
}
return cmp;
}
});
List<SuggestScoreDoc> hits = new ArrayList<>();
for (SuggestScoreDoc hit : pendingResults) {
if (seenSurfaceForms.contains(hit.key) == false) {
seenSurfaceForms.add(hit.key);
hits.add(hit);
if (hits.size() == num) {
break;
}
}
}
suggestScoreDocs = hits.toArray(new SuggestScoreDoc[0]);
} else {
suggestScoreDocs = priorityQueue.getResults();
}
if (suggestScoreDocs.length > 0) { if (suggestScoreDocs.length > 0) {
return new TopSuggestDocs(suggestScoreDocs.length, suggestScoreDocs, suggestScoreDocs[0].score); return new TopSuggestDocs(suggestScoreDocs.length, suggestScoreDocs, suggestScoreDocs[0].score);
} else { } else {

View File

@ -89,7 +89,7 @@ public class TestContextQuery extends LuceneTestCase {
query.addContext("type2", 2); query.addContext("type2", 2);
query.addContext("type3", 3); query.addContext("type3", 3);
query.addContext("type4", 4); query.addContext("type4", 4);
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5); TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest, assertSuggestions(suggest,
new Entry("suggestion4", "type4", 5 * 4), new Entry("suggestion4", "type4", 5 * 4),
new Entry("suggestion3", "type3", 6 * 3), new Entry("suggestion3", "type3", 6 * 3),
@ -124,7 +124,7 @@ public class TestContextQuery extends LuceneTestCase {
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "ab"))); ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "ab")));
IllegalStateException expected = expectThrows(IllegalStateException.class, () -> { IllegalStateException expected = expectThrows(IllegalStateException.class, () -> {
suggestIndexSearcher.suggest(query, 4); suggestIndexSearcher.suggest(query, 4, false);
}); });
assertTrue(expected.getMessage().contains("SuggestField")); assertTrue(expected.getMessage().contains("SuggestField"));
@ -155,7 +155,7 @@ public class TestContextQuery extends LuceneTestCase {
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg"))); ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
query.addContext("type", 1, false); query.addContext("type", 1, false);
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5); TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest, assertSuggestions(suggest,
new Entry("suggestion1", "type1", 4), new Entry("suggestion1", "type1", 4),
new Entry("suggestion2", "type2", 3), new Entry("suggestion2", "type2", 3),
@ -185,7 +185,7 @@ public class TestContextQuery extends LuceneTestCase {
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg"))); ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
query.addContext("type", 1); query.addContext("type", 1);
query.addContext("typetype", 2); query.addContext("typetype", 2);
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5); TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest, assertSuggestions(suggest,
new Entry("suggestion1", "typetype", 4 * 2), new Entry("suggestion1", "typetype", 4 * 2),
new Entry("suggestion2", "type", 3 * 1) new Entry("suggestion2", "type", 3 * 1)
@ -215,7 +215,7 @@ public class TestContextQuery extends LuceneTestCase {
DirectoryReader reader = iw.getReader(); DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg"))); ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5); TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest, assertSuggestions(suggest,
new Entry("suggestion_no_ctx", null, 4), new Entry("suggestion_no_ctx", null, 4),
new Entry("suggestion", "type4", 1)); new Entry("suggestion", "type4", 1));
@ -249,7 +249,7 @@ public class TestContextQuery extends LuceneTestCase {
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg"))); ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
query.addContext("type4", 10); query.addContext("type4", 10);
query.addAllContexts(); query.addAllContexts();
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5); TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest, assertSuggestions(suggest,
new Entry("suggestion4", "type4", 1 * 10), new Entry("suggestion4", "type4", 1 * 10),
new Entry("suggestion1", null, 4), new Entry("suggestion1", null, 4),
@ -284,7 +284,7 @@ public class TestContextQuery extends LuceneTestCase {
query.addContext("type2", 2); query.addContext("type2", 2);
query.addContext("type3", 3); query.addContext("type3", 3);
query.addContext("type4", 4); query.addContext("type4", 4);
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5); TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest, assertSuggestions(suggest,
new Entry("suggestion", "type1", 4 * 10), new Entry("suggestion", "type1", 4 * 10),
new Entry("suggestion", "type3", 4 * 3), new Entry("suggestion", "type3", 4 * 3),
@ -321,7 +321,7 @@ public class TestContextQuery extends LuceneTestCase {
query.addContext("type1", 7); query.addContext("type1", 7);
query.addContext("type2", 6); query.addContext("type2", 6);
query.addAllContexts(); query.addAllContexts();
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5); TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest, assertSuggestions(suggest,
new Entry("suggestion1", "type1", 4 * 7), new Entry("suggestion1", "type1", 4 * 7),
new Entry("suggestion2", "type2", 3 * 6), new Entry("suggestion2", "type2", 3 * 6),
@ -357,7 +357,7 @@ public class TestContextQuery extends LuceneTestCase {
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg"))); ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
query.addContext("type3", 3); query.addContext("type3", 3);
query.addContext("type4", 4); query.addContext("type4", 4);
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5); TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest, assertSuggestions(suggest,
new Entry("suggestion3", "type3", 2 * 3), new Entry("suggestion3", "type3", 2 * 3),
new Entry("suggestion4", "type4", 1 * 4) new Entry("suggestion4", "type4", 1 * 4)
@ -389,7 +389,7 @@ public class TestContextQuery extends LuceneTestCase {
DirectoryReader reader = iw.getReader(); DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")); CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg"));
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5); TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest, assertSuggestions(suggest,
new Entry("suggestion1", "type1", 4), new Entry("suggestion1", "type1", 4),
new Entry("suggestion2", "type2", 3), new Entry("suggestion2", "type2", 3),
@ -426,7 +426,7 @@ public class TestContextQuery extends LuceneTestCase {
query.addContext("type2", 2); query.addContext("type2", 2);
query.addContext("type3", 3); query.addContext("type3", 3);
query.addContext("type4", 4); query.addContext("type4", 4);
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5); TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest, assertSuggestions(suggest,
new Entry("suggestion1", "type3", 8 * 3), new Entry("suggestion1", "type3", 8 * 3),
new Entry("suggestion4", "type4", 5 * 4), new Entry("suggestion4", "type4", 5 * 4),
@ -460,7 +460,7 @@ public class TestContextQuery extends LuceneTestCase {
DirectoryReader reader = iw.getReader(); DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg"))); ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4); TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4, false);
assertSuggestions(suggest, assertSuggestions(suggest,
new Entry("suggestion1", "type1", 4), new Entry("suggestion1", "type1", 4),
new Entry("suggestion2", "type2", 3), new Entry("suggestion2", "type2", 3),
@ -520,7 +520,7 @@ public class TestContextQuery extends LuceneTestCase {
for (int i = 0; i < contexts.size(); i++) { for (int i = 0; i < contexts.size(); i++) {
query.addContext(contexts.get(i), i + 1); query.addContext(contexts.get(i), i + 1);
} }
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4); TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4, false);
assertSuggestions(suggest, Arrays.copyOfRange(expectedResults, 0, 4)); assertSuggestions(suggest, Arrays.copyOfRange(expectedResults, 0, 4));
} }
} }

View File

@ -172,7 +172,7 @@ public class TestContextSuggestField extends LuceneTestCase {
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")); CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg"));
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 10); TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 10, false);
assertSuggestions(suggest, assertSuggestions(suggest,
new Entry("suggestion1", 4), new Entry("suggestion1", 4),
new Entry("suggestion2", 3), new Entry("suggestion2", 3),
@ -180,7 +180,7 @@ public class TestContextSuggestField extends LuceneTestCase {
new Entry("suggestion4", 1)); new Entry("suggestion4", 1));
query = new PrefixCompletionQuery(analyzer, new Term("context_suggest_field", "sugg")); query = new PrefixCompletionQuery(analyzer, new Term("context_suggest_field", "sugg"));
suggest = suggestIndexSearcher.suggest(query, 10); suggest = suggestIndexSearcher.suggest(query, 10, false);
assertSuggestions(suggest, assertSuggestions(suggest,
new Entry("suggestion1", "type1", 4), new Entry("suggestion1", "type1", 4),
new Entry("suggestion2", "type2", 3), new Entry("suggestion2", "type2", 3),
@ -212,14 +212,14 @@ public class TestContextSuggestField extends LuceneTestCase {
DirectoryReader reader = iw.getReader(); DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(completionAnalyzer, new Term("suggest_field", "sugg"))); ContextQuery query = new ContextQuery(new PrefixCompletionQuery(completionAnalyzer, new Term("suggest_field", "sugg")));
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4); TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4, false);
assertSuggestions(suggest, assertSuggestions(suggest,
new Entry("suggestion1", "type1", 4), new Entry("suggestion1", "type1", 4),
new Entry("suggestion2", "type2", 3), new Entry("suggestion2", "type2", 3),
new Entry("suggestion3", "type3", 2), new Entry("suggestion3", "type3", 2),
new Entry("suggestion4", "type4", 1)); new Entry("suggestion4", "type4", 1));
query.addContext("type1"); query.addContext("type1");
suggest = suggestIndexSearcher.suggest(query, 4); suggest = suggestIndexSearcher.suggest(query, 4, false);
assertSuggestions(suggest, assertSuggestions(suggest,
new Entry("suggestion1", "type1", 4)); new Entry("suggestion1", "type1", 4));
reader.close(); reader.close();

View File

@ -66,7 +66,7 @@ public class TestFuzzyCompletionQuery extends LuceneTestCase {
DirectoryReader reader = iw.getReader(); DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
CompletionQuery query = new FuzzyCompletionQuery(analyzer, new Term("suggest_field", "sugg")); CompletionQuery query = new FuzzyCompletionQuery(analyzer, new Term("suggest_field", "sugg"));
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4); TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4, false);
assertSuggestions(suggest, assertSuggestions(suggest,
new Entry("suaggestion", 4 * 2), new Entry("suaggestion", 4 * 2),
new Entry("suggestion", 2 * 3), new Entry("suggestion", 2 * 3),
@ -101,7 +101,7 @@ public class TestFuzzyCompletionQuery extends LuceneTestCase {
DirectoryReader reader = iw.getReader(); DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
CompletionQuery query = new ContextQuery(new FuzzyCompletionQuery(analyzer, new Term("suggest_field", "sugge"))); CompletionQuery query = new ContextQuery(new FuzzyCompletionQuery(analyzer, new Term("suggest_field", "sugge")));
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5); TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest, assertSuggestions(suggest,
new Entry("suggestion", "type4", 4), new Entry("suggestion", "type4", 4),
new Entry("suggdestion", "type4", 4), new Entry("suggdestion", "type4", 4),
@ -140,7 +140,7 @@ public class TestFuzzyCompletionQuery extends LuceneTestCase {
ContextQuery contextQuery = new ContextQuery(fuzzyQuery); ContextQuery contextQuery = new ContextQuery(fuzzyQuery);
contextQuery.addContext("type1", 6); contextQuery.addContext("type1", 6);
contextQuery.addContext("type3", 2); contextQuery.addContext("type3", 2);
TopSuggestDocs suggest = suggestIndexSearcher.suggest(contextQuery, 5); TopSuggestDocs suggest = suggestIndexSearcher.suggest(contextQuery, 5, false);
assertSuggestions(suggest, assertSuggestions(suggest,
new Entry("sduggestion", "type1", 1 * (1 + 6)), new Entry("sduggestion", "type1", 1 * (1 + 6)),
new Entry("sugdgestion", "type3", 1 * (3 + 2)) new Entry("sugdgestion", "type3", 1 * (3 + 2))

View File

@ -135,7 +135,7 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
DirectoryReader reader = iw.getReader(); DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "ab")); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "ab"));
TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, 3); TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, 3, false);
assertSuggestions(lookupDocs, new Entry("abcdd", 5), new Entry("abd", 4), new Entry("abc", 3)); assertSuggestions(lookupDocs, new Entry("abcdd", 5), new Entry("abd", 4), new Entry("abc", 3));
reader.close(); reader.close();
@ -165,7 +165,7 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
// if at most half of the top scoring documents have been filtered out // if at most half of the top scoring documents have been filtered out
// the search should be admissible for a single segment // the search should be admissible for a single segment
TopSuggestDocs suggest = indexSearcher.suggest(query, num); TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertTrue(suggest.totalHits >= 1); assertTrue(suggest.totalHits >= 1);
assertThat(suggest.scoreLookupDocs()[0].key.toString(), equalTo("abc_" + topScore)); assertThat(suggest.scoreLookupDocs()[0].key.toString(), equalTo("abc_" + topScore));
assertThat(suggest.scoreLookupDocs()[0].score, equalTo((float) topScore)); assertThat(suggest.scoreLookupDocs()[0].score, equalTo((float) topScore));
@ -174,14 +174,14 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter); query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
// if more than half of the top scoring documents have been filtered out // if more than half of the top scoring documents have been filtered out
// search is not admissible, so # of suggestions requested is num instead of 1 // search is not admissible, so # of suggestions requested is num instead of 1
suggest = indexSearcher.suggest(query, num); suggest = indexSearcher.suggest(query, num, false);
assertSuggestions(suggest, new Entry("abc_0", 0)); assertSuggestions(suggest, new Entry("abc_0", 0));
filter = new NumericRangeBitsProducer("filter_int_fld", num - 1, num - 1); filter = new NumericRangeBitsProducer("filter_int_fld", num - 1, num - 1);
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter); query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
// if only lower scoring documents are filtered out // if only lower scoring documents are filtered out
// search is admissible // search is admissible
suggest = indexSearcher.suggest(query, 1); suggest = indexSearcher.suggest(query, 1, false);
assertSuggestions(suggest, new Entry("abc_" + (num - 1), num - 1)); assertSuggestions(suggest, new Entry("abc_" + (num - 1), num - 1));
reader.close(); reader.close();
@ -216,13 +216,13 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
// suggest without filter // suggest without filter
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app")); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"));
TopSuggestDocs suggest = indexSearcher.suggest(query, 3); TopSuggestDocs suggest = indexSearcher.suggest(query, 3, false);
assertSuggestions(suggest, new Entry("apple", 5), new Entry("applle", 4), new Entry("apples", 3)); assertSuggestions(suggest, new Entry("apple", 5), new Entry("applle", 4), new Entry("apples", 3));
// suggest with filter // suggest with filter
BitsProducer filter = new NumericRangeBitsProducer("filter_int_fld", 5, 12); BitsProducer filter = new NumericRangeBitsProducer("filter_int_fld", 5, 12);
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"), filter); query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"), filter);
suggest = indexSearcher.suggest(query, 3); suggest = indexSearcher.suggest(query, 3, false);
assertSuggestions(suggest, new Entry("applle", 4), new Entry("apples", 3)); assertSuggestions(suggest, new Entry("applle", 4), new Entry("apples", 3));
reader.close(); reader.close();
@ -243,10 +243,10 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
DirectoryReader reader = iw.getReader(); DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep_or_pos_inc", "fo")); CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep_or_pos_inc", "fo"));
TopSuggestDocs suggest = indexSearcher.suggest(query, 4); // all 4 TopSuggestDocs suggest = indexSearcher.suggest(query, 4, false); // all 4
assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("the fo", 9), new Entry("foo bar", 8), new Entry("foobar", 7)); assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("the fo", 9), new Entry("foo bar", 8), new Entry("foobar", 7));
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep_or_pos_inc", "foob")); query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep_or_pos_inc", "foob"));
suggest = indexSearcher.suggest(query, 4); // not the fo suggest = indexSearcher.suggest(query, 4, false); // not the fo
assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("foo bar", 8), new Entry("foobar", 7)); assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("foo bar", 8), new Entry("foobar", 7));
reader.close(); reader.close();
iw.close(); iw.close();
@ -266,10 +266,10 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
DirectoryReader reader = iw.getReader(); DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_pos_inc", "fo")); CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_pos_inc", "fo"));
TopSuggestDocs suggest = indexSearcher.suggest(query, 4); //matches all 4 TopSuggestDocs suggest = indexSearcher.suggest(query, 4, false); //matches all 4
assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("the fo", 9), new Entry("foo bar", 8), new Entry("foobar", 7)); assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("the fo", 9), new Entry("foo bar", 8), new Entry("foobar", 7));
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_pos_inc", "foob")); query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_pos_inc", "foob"));
suggest = indexSearcher.suggest(query, 4); // only foobar suggest = indexSearcher.suggest(query, 4, false); // only foobar
assertSuggestions(suggest, new Entry("foobar", 7)); assertSuggestions(suggest, new Entry("foobar", 7));
reader.close(); reader.close();
iw.close(); iw.close();
@ -289,10 +289,10 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
DirectoryReader reader = iw.getReader(); DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep", "fo")); CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep", "fo"));
TopSuggestDocs suggest = indexSearcher.suggest(query, 4); // matches all 4 TopSuggestDocs suggest = indexSearcher.suggest(query, 4, false); // matches all 4
assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("the fo", 9), new Entry("foo bar", 8), new Entry("foobar", 7)); assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("the fo", 9), new Entry("foo bar", 8), new Entry("foobar", 7));
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep", "foob")); query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep", "foob"));
suggest = indexSearcher.suggest(query, 4); // except the fo suggest = indexSearcher.suggest(query, 4, false); // except the fo
assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("foo bar", 8), new Entry("foobar", 7)); assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("foo bar", 8), new Entry("foobar", 7));
reader.close(); reader.close();
iw.close(); iw.close();
@ -329,10 +329,10 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app")); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"));
assertEquals(0, indexSearcher.suggest(query, 3).totalHits); assertEquals(0, indexSearcher.suggest(query, 3, false).totalHits);
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field2", "app")); query = new PrefixCompletionQuery(analyzer, new Term("suggest_field2", "app"));
assertSuggestions(indexSearcher.suggest(query, 3), new Entry("apples", 3)); assertSuggestions(indexSearcher.suggest(query, 3, false), new Entry("apples", 3));
reader.close(); reader.close();
iw.close(); iw.close();

View File

@ -67,7 +67,7 @@ public class TestRegexCompletionQuery extends LuceneTestCase {
DirectoryReader reader = iw.getReader(); DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
RegexCompletionQuery query = new RegexCompletionQuery(new Term("suggest_field", "[a|w|s]s?ugg")); RegexCompletionQuery query = new RegexCompletionQuery(new Term("suggest_field", "[a|w|s]s?ugg"));
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4); TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4, false);
assertSuggestions(suggest, new Entry("wsuggestion", 4), new Entry("ssuggestion", 3), assertSuggestions(suggest, new Entry("wsuggestion", 4), new Entry("ssuggestion", 3),
new Entry("asuggestion", 2), new Entry("suggestion", 1)); new Entry("asuggestion", 2), new Entry("suggestion", 1));
@ -98,7 +98,7 @@ public class TestRegexCompletionQuery extends LuceneTestCase {
DirectoryReader reader = iw.getReader(); DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
CompletionQuery query = new RegexCompletionQuery(new Term("suggest_field", "[a|s][d|u|s][u|d|g]")); CompletionQuery query = new RegexCompletionQuery(new Term("suggest_field", "[a|s][d|u|s][u|d|g]"));
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5); TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
assertSuggestions(suggest, assertSuggestions(suggest,
new Entry("sduggestion", "type1", 5), new Entry("sduggestion", "type1", 5),
new Entry("sudggestion", "type2", 4), new Entry("sudggestion", "type2", 4),
@ -137,7 +137,7 @@ public class TestRegexCompletionQuery extends LuceneTestCase {
contextQuery.addContext("type1", 6); contextQuery.addContext("type1", 6);
contextQuery.addContext("type3", 7); contextQuery.addContext("type3", 7);
contextQuery.addAllContexts(); contextQuery.addAllContexts();
TopSuggestDocs suggest = suggestIndexSearcher.suggest(contextQuery, 5); TopSuggestDocs suggest = suggestIndexSearcher.suggest(contextQuery, 5, false);
assertSuggestions(suggest, assertSuggestions(suggest,
new Entry("sduggestion", "type1", 5 * 6), new Entry("sduggestion", "type1", 5 * 6),
new Entry("sugdgestion", "type3", 3 * 7), new Entry("sugdgestion", "type3", 3 * 7),

View File

@ -20,7 +20,10 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
@ -33,9 +36,9 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene70.Lucene70Codec; import org.apache.lucene.codecs.lucene70.Lucene70Codec;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.StoredField; import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
@ -122,7 +125,7 @@ public class TestSuggestField extends LuceneTestCase {
DirectoryReader reader = iw.getReader(); DirectoryReader reader = iw.getReader();
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "ab")); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "ab"));
TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, 3); TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, 3, false);
assertThat(lookupDocs.totalHits, equalTo(0)); assertThat(lookupDocs.totalHits, equalTo(0));
reader.close(); reader.close();
iw.close(); iw.close();
@ -157,7 +160,7 @@ public class TestSuggestField extends LuceneTestCase {
int[] weights = new int[num]; int[] weights = new int[num];
for(int i = 0; i < num; i++) { for(int i = 0; i < num; i++) {
Document document = new Document(); Document document = new Document();
weights[i] = Math.abs(random().nextInt()); weights[i] = random().nextInt(Integer.MAX_VALUE);
document.add(new SuggestField("suggest_field", "abc", weights[i])); document.add(new SuggestField("suggest_field", "abc", weights[i]));
iw.addDocument(document); iw.addDocument(document);
@ -175,13 +178,231 @@ public class TestSuggestField extends LuceneTestCase {
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc")); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc"));
TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, num); TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, num, false);
assertSuggestions(lookupDocs, expectedEntries); assertSuggestions(lookupDocs, expectedEntries);
reader.close(); reader.close();
iw.close(); iw.close();
} }
public void testDeduplication() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
final int num = TestUtil.nextInt(random(), 2, 20);
int[] weights = new int[num];
int bestABCWeight = Integer.MIN_VALUE;
int bestABDWeight = Integer.MIN_VALUE;
for(int i = 0; i < num; i++) {
Document document = new Document();
weights[i] = random().nextInt(Integer.MAX_VALUE);
String suggestValue;
boolean doABC;
if (i == 0) {
doABC = true;
} else if (i == 1) {
doABC = false;
} else {
doABC = random().nextBoolean();
}
if (doABC) {
suggestValue = "abc";
bestABCWeight = Math.max(bestABCWeight, weights[i]);
} else {
suggestValue = "abd";
bestABDWeight = Math.max(bestABDWeight, weights[i]);
}
document.add(new SuggestField("suggest_field", suggestValue, weights[i]));
iw.addDocument(document);
if (usually()) {
iw.commit();
}
}
DirectoryReader reader = iw.getReader();
Entry[] expectedEntries = new Entry[2];
if (bestABDWeight > bestABCWeight) {
expectedEntries[0] = new Entry("abd", bestABDWeight);
expectedEntries[1] = new Entry("abc", bestABCWeight);
} else {
expectedEntries[0] = new Entry("abc", bestABCWeight);
expectedEntries[1] = new Entry("abd", bestABDWeight);
}
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "a"));
TopSuggestDocsCollector collector = new TopSuggestDocsCollector(2, true);
suggestIndexSearcher.suggest(query, collector);
TopSuggestDocs lookupDocs = collector.get();
assertSuggestions(lookupDocs, expectedEntries);
reader.close();
iw.close();
}
public void testExtremeDeduplication() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
final int num = atLeast(5000);
int bestWeight = Integer.MIN_VALUE;
for(int i = 0; i < num; i++) {
Document document = new Document();
int weight = TestUtil.nextInt(random(), 10, 100);
bestWeight = Math.max(weight, bestWeight);
document.add(new SuggestField("suggest_field", "abc", weight));
iw.addDocument(document);
if (rarely()) {
iw.commit();
}
}
Document document = new Document();
document.add(new SuggestField("suggest_field", "abd", 7));
iw.addDocument(document);
if (random().nextBoolean()) {
iw.forceMerge(1);
}
DirectoryReader reader = iw.getReader();
Entry[] expectedEntries = new Entry[2];
expectedEntries[0] = new Entry("abc", bestWeight);
expectedEntries[1] = new Entry("abd", 7);
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "a"));
TopSuggestDocsCollector collector = new TopSuggestDocsCollector(2, true);
suggestIndexSearcher.suggest(query, collector);
TopSuggestDocs lookupDocs = collector.get();
assertSuggestions(lookupDocs, expectedEntries);
reader.close();
iw.close();
}
private static String randomSimpleString(int numDigits, int maxLen) {
final int len = TestUtil.nextInt(random(), 1, maxLen);
final char[] chars = new char[len];
for(int j=0;j<len;j++) {
chars[j] = (char) ('a' + random().nextInt(numDigits));
}
return new String(chars);
}
public void testRandom() throws Exception {
int numDigits = TestUtil.nextInt(random(), 1, 6);
Set<String> keys = new HashSet<>();
int keyCount = TestUtil.nextInt(random(), 1, 20);
if (numDigits == 1) {
keyCount = Math.min(9, keyCount);
}
while (keys.size() < keyCount) {
keys.add(randomSimpleString(numDigits, 10));
}
List<String> keysList = new ArrayList<>(keys);
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwc = iwcWithSuggestField(analyzer, "suggest_field");
// we rely on docID order:
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
int docCount = TestUtil.nextInt(random(), 1, 200);
Entry[] docs = new Entry[docCount];
for(int i=0;i<docCount;i++) {
int weight = random().nextInt(40);
String key = keysList.get(random().nextInt(keyCount));
//System.out.println("KEY: " + key);
docs[i] = new Entry(key, null, weight, i);
Document doc = new Document();
doc.add(new SuggestField("suggest_field", key, weight));
iw.addDocument(doc);
if (usually()) {
iw.commit();
}
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher searcher = new SuggestIndexSearcher(reader);
int iters = atLeast(200);
for(int iter=0;iter<iters;iter++) {
String prefix = randomSimpleString(numDigits, 2);
if (VERBOSE) {
System.out.println("\nTEST: prefix=" + prefix);
}
// slow but hopefully correct suggester:
List<Entry> expected = new ArrayList<>();
for(Entry doc : docs) {
if (doc.output.startsWith(prefix)) {
expected.add(doc);
}
}
Collections.sort(expected,
new Comparator<Entry>() {
@Override
public int compare(Entry a, Entry b) {
// sort by higher score:
int cmp = Float.compare(b.value, a.value);
if (cmp == 0) {
// tie break by smaller docID:
cmp = Integer.compare(a.id, b.id);
}
return cmp;
}
});
boolean dedup = random().nextBoolean();
if (dedup) {
List<Entry> deduped = new ArrayList<>();
Set<String> seen = new HashSet<>();
for(Entry entry : expected) {
if (seen.contains(entry.output) == false) {
seen.add(entry.output);
deduped.add(entry);
}
}
expected = deduped;
}
// TODO: re-enable this, except something is buggy about tie breaks at the topN threshold now:
//int topN = TestUtil.nextInt(random(), 1, docCount+10);
int topN = docCount;
if (VERBOSE) {
if (dedup) {
System.out.println(" expected (dedup'd) topN=" + topN + ":");
} else {
System.out.println(" expected topN=" + topN + ":");
}
for(int i=0;i<expected.size();i++) {
if (i >= topN) {
System.out.println(" leftover: " + i + ": " + expected.get(i));
} else {
System.out.println(" " + i + ": " + expected.get(i));
}
}
}
expected = expected.subList(0, Math.min(topN, expected.size()));
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", prefix));
TopSuggestDocsCollector collector = new TopSuggestDocsCollector(topN, dedup);
searcher.suggest(query, collector);
TopSuggestDocs actual = collector.get();
if (VERBOSE) {
System.out.println(" actual:");
SuggestScoreDoc[] suggestScoreDocs = (SuggestScoreDoc[]) actual.scoreDocs;
for(int i=0;i<suggestScoreDocs.length;i++) {
System.out.println(" " + i + ": " + suggestScoreDocs[i]);
}
}
assertSuggestions(actual, expected.toArray(new Entry[expected.size()]));
}
reader.close();
iw.close();
}
@Test @Test
public void testNRTDeletedDocFiltering() throws Exception { public void testNRTDeletedDocFiltering() throws Exception {
Analyzer analyzer = new MockAnalyzer(random()); Analyzer analyzer = new MockAnalyzer(random());
@ -214,7 +435,7 @@ public class TestSuggestField extends LuceneTestCase {
DirectoryReader reader = DirectoryReader.open(iw); DirectoryReader reader = DirectoryReader.open(iw);
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_")); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
TopSuggestDocs suggest = indexSearcher.suggest(query, numLive); TopSuggestDocs suggest = indexSearcher.suggest(query, numLive, false);
assertSuggestions(suggest, expectedEntries.toArray(new Entry[expectedEntries.size()])); assertSuggestions(suggest, expectedEntries.toArray(new Entry[expectedEntries.size()]));
reader.close(); reader.close();
@ -248,7 +469,7 @@ public class TestSuggestField extends LuceneTestCase {
// no random access required; // no random access required;
// calling suggest with filter that does not match any documents should early terminate // calling suggest with filter that does not match any documents should early terminate
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
TopSuggestDocs suggest = indexSearcher.suggest(query, num); TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertThat(suggest.totalHits, equalTo(0)); assertThat(suggest.totalHits, equalTo(0));
reader.close(); reader.close();
iw.close(); iw.close();
@ -276,7 +497,7 @@ public class TestSuggestField extends LuceneTestCase {
DirectoryReader reader = DirectoryReader.open(iw); DirectoryReader reader = DirectoryReader.open(iw);
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_")); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
TopSuggestDocs suggest = indexSearcher.suggest(query, num); TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertThat(suggest.totalHits, equalTo(0)); assertThat(suggest.totalHits, equalTo(0));
reader.close(); reader.close();
@ -306,7 +527,7 @@ public class TestSuggestField extends LuceneTestCase {
DirectoryReader reader = DirectoryReader.open(iw); DirectoryReader reader = DirectoryReader.open(iw);
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_")); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
TopSuggestDocs suggest = indexSearcher.suggest(query, 1); TopSuggestDocs suggest = indexSearcher.suggest(query, 1, false);
assertSuggestions(suggest, new Entry("abc_1", 1)); assertSuggestions(suggest, new Entry("abc_1", 1));
reader.close(); reader.close();
@ -335,10 +556,10 @@ public class TestSuggestField extends LuceneTestCase {
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("sug_field_1", "ap")); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("sug_field_1", "ap"));
TopSuggestDocs suggestDocs1 = suggestIndexSearcher.suggest(query, 4); TopSuggestDocs suggestDocs1 = suggestIndexSearcher.suggest(query, 4, false);
assertSuggestions(suggestDocs1, new Entry("apple", 4), new Entry("aples", 3)); assertSuggestions(suggestDocs1, new Entry("apple", 4), new Entry("aples", 3));
query = new PrefixCompletionQuery(analyzer, new Term("sug_field_2", "ap")); query = new PrefixCompletionQuery(analyzer, new Term("sug_field_2", "ap"));
TopSuggestDocs suggestDocs2 = suggestIndexSearcher.suggest(query, 4); TopSuggestDocs suggestDocs2 = suggestIndexSearcher.suggest(query, 4, false);
assertSuggestions(suggestDocs2, new Entry("april", 3), new Entry("apartment", 2)); assertSuggestions(suggestDocs2, new Entry("april", 3), new Entry("apartment", 2));
// check that the doc ids are consistent // check that the doc ids are consistent
@ -372,7 +593,7 @@ public class TestSuggestField extends LuceneTestCase {
DirectoryReader reader = iw.getReader(); DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_")); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
TopSuggestDocs suggest = indexSearcher.suggest(query, 1); TopSuggestDocs suggest = indexSearcher.suggest(query, 1, false);
assertSuggestions(suggest, new Entry("abc_" + num, num)); assertSuggestions(suggest, new Entry("abc_" + num, num));
reader.close(); reader.close();
@ -402,7 +623,7 @@ public class TestSuggestField extends LuceneTestCase {
DirectoryReader reader = iw.getReader(); DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_")); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
TopSuggestDocs suggest = indexSearcher.suggest(query, (entries.size() == 0) ? 1 : entries.size()); TopSuggestDocs suggest = indexSearcher.suggest(query, (entries.size() == 0) ? 1 : entries.size(), false);
assertSuggestions(suggest, entries.toArray(new Entry[entries.size()])); assertSuggestions(suggest, entries.toArray(new Entry[entries.size()]));
reader.close(); reader.close();
@ -430,7 +651,7 @@ public class TestSuggestField extends LuceneTestCase {
DirectoryReader reader = iw.getReader(); DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_")); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
TopSuggestDocs suggest = indexSearcher.suggest(query, num); TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertEquals(num, suggest.totalHits); assertEquals(num, suggest.totalHits);
for (SuggestScoreDoc suggestScoreDoc : suggest.scoreLookupDocs()) { for (SuggestScoreDoc suggestScoreDoc : suggest.scoreLookupDocs()) {
String key = suggestScoreDoc.key.toString(); String key = suggestScoreDoc.key.toString();
@ -456,7 +677,7 @@ public class TestSuggestField extends LuceneTestCase {
for (int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
Document document = new Document(); Document document = new Document();
String suggest = prefixes[i % 3] + TestUtil.randomSimpleString(random(), 10) + "_" +String.valueOf(i); String suggest = prefixes[i % 3] + TestUtil.randomSimpleString(random(), 10) + "_" +String.valueOf(i);
int weight = Math.abs(random().nextInt()); int weight = random().nextInt(Integer.MAX_VALUE);
document.add(new SuggestField("suggest_field", suggest, weight)); document.add(new SuggestField("suggest_field", suggest, weight));
mappings.put(suggest, weight); mappings.put(suggest, weight);
iw.addDocument(document); iw.addDocument(document);
@ -470,7 +691,7 @@ public class TestSuggestField extends LuceneTestCase {
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
for (String prefix : prefixes) { for (String prefix : prefixes) {
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", prefix)); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", prefix));
TopSuggestDocs suggest = indexSearcher.suggest(query, num); TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertTrue(suggest.totalHits > 0); assertTrue(suggest.totalHits > 0);
float topScore = -1; float topScore = -1;
for (SuggestScoreDoc scoreDoc : suggest.scoreLookupDocs()) { for (SuggestScoreDoc scoreDoc : suggest.scoreLookupDocs()) {
@ -498,7 +719,7 @@ public class TestSuggestField extends LuceneTestCase {
for (int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
Document document = lineFileDocs.nextDoc(); Document document = lineFileDocs.nextDoc();
String title = document.getField("title").stringValue(); String title = document.getField("title").stringValue();
int weight = Math.abs(random().nextInt()); int weight = random().nextInt(Integer.MAX_VALUE);
Integer prevWeight = mappings.get(title); Integer prevWeight = mappings.get(title);
if (prevWeight == null || prevWeight < weight) { if (prevWeight == null || prevWeight < weight) {
mappings.put(title, weight); mappings.put(title, weight);
@ -519,7 +740,7 @@ public class TestSuggestField extends LuceneTestCase {
String title = entry.getKey(); String title = entry.getKey();
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", title)); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", title));
TopSuggestDocs suggest = indexSearcher.suggest(query, mappings.size()); TopSuggestDocs suggest = indexSearcher.suggest(query, mappings.size(), false);
assertTrue(suggest.totalHits > 0); assertTrue(suggest.totalHits > 0);
boolean matched = false; boolean matched = false;
for (ScoreDoc scoreDoc : suggest.scoreDocs) { for (ScoreDoc scoreDoc : suggest.scoreDocs) {
@ -577,13 +798,13 @@ public class TestSuggestField extends LuceneTestCase {
try { try {
startingGun.await(); startingGun.await();
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_1", prefix1)); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_1", prefix1));
TopSuggestDocs suggest = indexSearcher.suggest(query, num); TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertSuggestions(suggest, entries1); assertSuggestions(suggest, entries1);
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_2", prefix2)); query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_2", prefix2));
suggest = indexSearcher.suggest(query, num); suggest = indexSearcher.suggest(query, num, false);
assertSuggestions(suggest, entries2); assertSuggestions(suggest, entries2);
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_3", prefix3)); query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_3", prefix3));
suggest = indexSearcher.suggest(query, num); suggest = indexSearcher.suggest(query, num, false);
assertSuggestions(suggest, entries3); assertSuggestions(suggest, entries3);
} catch (Throwable e) { } catch (Throwable e) {
errors.add(e); errors.add(e);
@ -607,28 +828,39 @@ public class TestSuggestField extends LuceneTestCase {
final String output; final String output;
final float value; final float value;
final String context; final String context;
final int id;
Entry(String output, float value) { Entry(String output, float value) {
this(output, null, value); this(output, null, value);
} }
Entry(String output, String context, float value) { Entry(String output, String context, float value) {
this(output, context, value, -1);
}
Entry(String output, String context, float value, int id) {
this.output = output; this.output = output;
this.value = value; this.value = value;
this.context = context; this.context = context;
this.id = id;
}
@Override
public String toString() {
return "key=" + output + " score=" + value + " context=" + context + " id=" + id;
} }
} }
static void assertSuggestions(TopDocs actual, Entry... expected) { static void assertSuggestions(TopDocs actual, Entry... expected) {
SuggestScoreDoc[] suggestScoreDocs = (SuggestScoreDoc[]) actual.scoreDocs; SuggestScoreDoc[] suggestScoreDocs = (SuggestScoreDoc[]) actual.scoreDocs;
assertThat(suggestScoreDocs.length, equalTo(expected.length)); for (int i = 0; i < Math.min(expected.length, suggestScoreDocs.length); i++) {
for (int i = 0; i < suggestScoreDocs.length; i++) {
SuggestScoreDoc lookupDoc = suggestScoreDocs[i]; SuggestScoreDoc lookupDoc = suggestScoreDocs[i];
String msg = "Expected: " + toString(expected[i]) + " Actual: " + toString(lookupDoc); String msg = "Hit " + i + ": expected: " + toString(expected[i]) + " but actual: " + toString(lookupDoc);
assertThat(msg, lookupDoc.key.toString(), equalTo(expected[i].output)); assertThat(msg, lookupDoc.key.toString(), equalTo(expected[i].output));
assertThat(msg, lookupDoc.score, equalTo(expected[i].value)); assertThat(msg, lookupDoc.score, equalTo(expected[i].value));
assertThat(msg, lookupDoc.context, equalTo(expected[i].context)); assertThat(msg, lookupDoc.context, equalTo(expected[i].context));
} }
assertThat(suggestScoreDocs.length, equalTo(expected.length));
} }
private static String toString(Entry expected) { private static String toString(Entry expected) {