mirror of https://github.com/apache/lucene.git
LUCENE-7686: add efficient de-duping to the NRT document suggester
This commit is contained in:
parent
29a5ea44a7
commit
4e2cf61ac7
|
@ -126,6 +126,10 @@ New Features
|
|||
* LUCENE-7688: Add OneMergeWrappingMergePolicy class.
|
||||
(Keith Laban, Christine Poerschke)
|
||||
|
||||
* LUCENE-7686: The near-real-time document suggester can now
|
||||
efficiently filter out duplicate suggestions (Uwe Schindler, Mike
|
||||
McCandless)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-7630: Fix (Edge)NGramTokenFilter to no longer drop payloads
|
||||
|
|
|
@ -248,32 +248,38 @@ public final class Util {
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public static class FSTPath<T> {
|
||||
/** Holds the last arc appended to this path */
|
||||
public FST.Arc<T> arc;
|
||||
public T cost;
|
||||
/** Holds cost plus any usage-specific output: */
|
||||
public T output;
|
||||
public final IntsRefBuilder input;
|
||||
public final float boost;
|
||||
public final CharSequence context;
|
||||
|
||||
// Custom int payload for consumers; the NRT suggester uses this to record if this path has already enumerated a surface form
|
||||
public int payload;
|
||||
|
||||
/** Sole constructor */
|
||||
public FSTPath(T cost, FST.Arc<T> arc, IntsRefBuilder input) {
|
||||
this(cost, arc, input, 0, null);
|
||||
public FSTPath(T output, FST.Arc<T> arc, IntsRefBuilder input) {
|
||||
this(output, arc, input, 0, null, -1);
|
||||
}
|
||||
|
||||
public FSTPath(T cost, FST.Arc<T> arc, IntsRefBuilder input, float boost, CharSequence context) {
|
||||
public FSTPath(T output, FST.Arc<T> arc, IntsRefBuilder input, float boost, CharSequence context, int payload) {
|
||||
this.arc = new FST.Arc<T>().copyFrom(arc);
|
||||
this.cost = cost;
|
||||
this.output = output;
|
||||
this.input = input;
|
||||
this.boost = boost;
|
||||
this.context = context;
|
||||
this.payload = payload;
|
||||
}
|
||||
|
||||
public FSTPath<T> newPath(T cost, IntsRefBuilder input) {
|
||||
return new FSTPath<>(cost, this.arc, input, this.boost, this.context);
|
||||
public FSTPath<T> newPath(T output, IntsRefBuilder input) {
|
||||
return new FSTPath<>(output, this.arc, input, this.boost, this.context, this.payload);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "input=" + input.get() + " cost=" + cost + "context=" + context + "boost=" + boost;
|
||||
return "input=" + input.get() + " output=" + output + " context=" + context + " boost=" + boost + " payload=" + payload;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -287,7 +293,7 @@ public final class Util {
|
|||
|
||||
@Override
|
||||
public int compare(FSTPath<T> a, FSTPath<T> b) {
|
||||
int cmp = comparator.compare(a.cost, b.cost);
|
||||
int cmp = comparator.compare(a.output, b.output);
|
||||
if (cmp == 0) {
|
||||
return a.input.get().compareTo(b.input.get());
|
||||
} else {
|
||||
|
@ -339,8 +345,7 @@ public final class Util {
|
|||
|
||||
assert queue != null;
|
||||
|
||||
T cost = fst.outputs.add(path.cost, path.arc.output);
|
||||
//System.out.println(" addIfCompetitive queue.size()=" + queue.size() + " path=" + path + " + label=" + path.arc.label);
|
||||
T output = fst.outputs.add(path.output, path.arc.output);
|
||||
|
||||
if (queue.size() == maxQueueDepth) {
|
||||
FSTPath<T> bottom = queue.last();
|
||||
|
@ -373,32 +378,32 @@ public final class Util {
|
|||
newInput.copyInts(path.input.get());
|
||||
newInput.append(path.arc.label);
|
||||
|
||||
queue.add(path.newPath(cost, newInput));
|
||||
|
||||
if (queue.size() == maxQueueDepth+1) {
|
||||
queue.pollLast();
|
||||
FSTPath<T> newPath = path.newPath(output, newInput);
|
||||
if (acceptPartialPath(newPath)) {
|
||||
queue.add(newPath);
|
||||
if (queue.size() == maxQueueDepth+1) {
|
||||
queue.pollLast();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void addStartPaths(FST.Arc<T> node, T startOutput, boolean allowEmptyString, IntsRefBuilder input) throws IOException {
|
||||
addStartPaths(node, startOutput, allowEmptyString, input, 0, null);
|
||||
addStartPaths(node, startOutput, allowEmptyString, input, 0, null, -1);
|
||||
}
|
||||
|
||||
/** Adds all leaving arcs, including 'finished' arc, if
|
||||
* the node is final, from this node into the queue. */
|
||||
public void addStartPaths(FST.Arc<T> node, T startOutput, boolean allowEmptyString, IntsRefBuilder input,
|
||||
float boost, CharSequence context) throws IOException {
|
||||
float boost, CharSequence context, int payload) throws IOException {
|
||||
|
||||
// De-dup NO_OUTPUT since it must be a singleton:
|
||||
if (startOutput.equals(fst.outputs.getNoOutput())) {
|
||||
startOutput = fst.outputs.getNoOutput();
|
||||
}
|
||||
|
||||
FSTPath<T> path = new FSTPath<>(startOutput, node, input, boost, context);
|
||||
FSTPath<T> path = new FSTPath<>(startOutput, node, input, boost, context, payload);
|
||||
fst.readFirstTargetArc(node, path.arc, bytesReader);
|
||||
|
||||
//System.out.println("add start paths");
|
||||
|
||||
// Bootstrap: find the min starting arc
|
||||
while (true) {
|
||||
if (allowEmptyString || path.arc.label != FST.END_LABEL) {
|
||||
|
@ -415,8 +420,6 @@ public final class Util {
|
|||
|
||||
final List<Result<T>> results = new ArrayList<>();
|
||||
|
||||
//System.out.println("search topN=" + topN);
|
||||
|
||||
final BytesReader fstReader = fst.getBytesReader();
|
||||
final T NO_OUTPUT = fst.outputs.getNoOutput();
|
||||
|
||||
|
@ -430,13 +433,11 @@ public final class Util {
|
|||
|
||||
// For each top N path:
|
||||
while (results.size() < topN) {
|
||||
//System.out.println("\nfind next path: queue.size=" + queue.size());
|
||||
|
||||
FSTPath<T> path;
|
||||
|
||||
if (queue == null) {
|
||||
// Ran out of paths
|
||||
//System.out.println(" break queue=null");
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -446,15 +447,18 @@ public final class Util {
|
|||
|
||||
if (path == null) {
|
||||
// There were less than topN paths available:
|
||||
//System.out.println(" break no more paths");
|
||||
break;
|
||||
}
|
||||
//System.out.println("pop path=" + path + " arc=" + path.arc.output);
|
||||
|
||||
if (acceptPartialPath(path) == false) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (path.arc.label == FST.END_LABEL) {
|
||||
//System.out.println(" empty string! cost=" + path.cost);
|
||||
// Empty string!
|
||||
path.input.setLength(path.input.length() - 1);
|
||||
results.add(new Result<>(path.input.get(), path.cost));
|
||||
results.add(new Result<>(path.input.get(), path.output));
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -463,8 +467,6 @@ public final class Util {
|
|||
queue = null;
|
||||
}
|
||||
|
||||
//System.out.println(" path: " + path);
|
||||
|
||||
// We take path and find its "0 output completion",
|
||||
// ie, just keep traversing the first arc with
|
||||
// NO_OUTPUT that we can find, since this must lead
|
||||
|
@ -474,13 +476,11 @@ public final class Util {
|
|||
// For each input letter:
|
||||
while (true) {
|
||||
|
||||
//System.out.println("\n cycle path: " + path);
|
||||
fst.readFirstTargetArc(path.arc, path.arc, fstReader);
|
||||
|
||||
// For each arc leaving this node:
|
||||
boolean foundZero = false;
|
||||
while(true) {
|
||||
//System.out.println(" arc=" + (char) path.arc.label + " cost=" + path.arc.output);
|
||||
// tricky: instead of comparing output == 0, we must
|
||||
// express it via the comparator compare(output, 0) == 0
|
||||
if (comparator.compare(NO_OUTPUT, path.arc.output) == 0) {
|
||||
|
@ -514,18 +514,19 @@ public final class Util {
|
|||
|
||||
if (path.arc.label == FST.END_LABEL) {
|
||||
// Add final output:
|
||||
//System.out.println(" done!: " + path);
|
||||
path.cost = fst.outputs.add(path.cost, path.arc.output);
|
||||
path.output = fst.outputs.add(path.output, path.arc.output);
|
||||
if (acceptResult(path)) {
|
||||
//System.out.println(" add result: " + path);
|
||||
results.add(new Result<>(path.input.get(), path.cost));
|
||||
results.add(new Result<>(path.input.get(), path.output));
|
||||
} else {
|
||||
rejectCount++;
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
path.input.append(path.arc.label);
|
||||
path.cost = fst.outputs.add(path.cost, path.arc.output);
|
||||
path.output = fst.outputs.add(path.output, path.arc.output);
|
||||
if (acceptPartialPath(path) == false) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -533,7 +534,12 @@ public final class Util {
|
|||
}
|
||||
|
||||
protected boolean acceptResult(FSTPath<T> path) {
|
||||
return acceptResult(path.input.get(), path.cost);
|
||||
return acceptResult(path.input.get(), path.output);
|
||||
}
|
||||
|
||||
/** Override this to prevent considering a path before it's complete */
|
||||
protected boolean acceptPartialPath(FSTPath<T> path) {
|
||||
return true;
|
||||
}
|
||||
|
||||
protected boolean acceptResult(IntsRef input, T output) {
|
||||
|
|
|
@ -81,7 +81,7 @@ public final class CompletionAnalyzer extends AnalyzerWrapper {
|
|||
private final int maxGraphExpansions;
|
||||
|
||||
/**
|
||||
* Wraps an analyzer to convert it's output token stream to an automaton
|
||||
* Wraps an analyzer to convert its output token stream to an automaton
|
||||
*
|
||||
* @param analyzer token stream to be converted to an automaton
|
||||
* @param preserveSep Preserve separation between tokens when converting to an automaton
|
||||
|
|
|
@ -34,7 +34,7 @@ import static org.apache.lucene.search.suggest.document.CompletionAnalyzer.SEP_L
|
|||
* filtered by {@link BitsProducer}. This should be used to query against any {@link SuggestField}s
|
||||
* or {@link ContextSuggestField}s of documents.
|
||||
* <p>
|
||||
* Use {@link SuggestIndexSearcher#suggest(CompletionQuery, int)} to execute any query
|
||||
* Use {@link SuggestIndexSearcher#suggest(CompletionQuery, int, boolean)} to execute any query
|
||||
* that provides a concrete implementation of this query. Example below shows using this query
|
||||
* to retrieve the top 5 documents.
|
||||
*
|
||||
|
|
|
@ -32,12 +32,11 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.fst.ByteSequenceOutputs;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.PairOutputs;
|
||||
import org.apache.lucene.util.fst.PairOutputs.Pair;
|
||||
import org.apache.lucene.util.fst.PairOutputs;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
|
||||
import static org.apache.lucene.search.suggest.document.NRTSuggester.PayLoadProcessor.parseDocID;
|
||||
import static org.apache.lucene.search.suggest.document.NRTSuggester.PayLoadProcessor.parseSurfaceForm;
|
||||
|
||||
/**
|
||||
|
@ -142,21 +141,74 @@ public final class NRTSuggester implements Accountable {
|
|||
// maximum number of suggestions that can be collected.
|
||||
final int topN = collector.getCountToCollect() * prefixPaths.size();
|
||||
final int queueSize = getMaxTopNSearcherQueueSize(topN, scorer.reader.numDocs(), liveDocsRatio, scorer.filtered);
|
||||
|
||||
final CharsRefBuilder spare = new CharsRefBuilder();
|
||||
|
||||
Comparator<Pair<Long, BytesRef>> comparator = getComparator();
|
||||
Util.TopNSearcher<Pair<Long, BytesRef>> searcher = new Util.TopNSearcher<Pair<Long, BytesRef>>(fst, topN, queueSize, comparator,
|
||||
new ScoringPathComparator(scorer)) {
|
||||
|
||||
private final CharsRefBuilder spare = new CharsRefBuilder();
|
||||
private final ByteArrayDataInput scratchInput = new ByteArrayDataInput();
|
||||
|
||||
@Override
|
||||
protected boolean acceptPartialPath(Util.FSTPath<Pair<Long,BytesRef>> path) {
|
||||
if (collector.doSkipDuplicates()) {
|
||||
// We are removing dups
|
||||
if (path.payload == -1) {
|
||||
// This path didn't yet see the complete surface form; let's see if it just did with the arc output we just added:
|
||||
BytesRef arcOutput = path.arc.output.output2;
|
||||
BytesRef output = path.output.output2;
|
||||
for(int i=0;i<arcOutput.length;i++) {
|
||||
if (arcOutput.bytes[arcOutput.offset + i] == payloadSep) {
|
||||
// OK this arc that the path was just extended by contains the payloadSep, so we now have a full surface form in this path
|
||||
path.payload = output.length - arcOutput.length + i;
|
||||
assert output.bytes[output.offset + path.payload] == payloadSep;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (path.payload != -1) {
|
||||
BytesRef output = path.output.output2;
|
||||
spare.copyUTF8Bytes(output.bytes, output.offset, path.payload);
|
||||
if (collector.seenSurfaceForms.contains(spare.chars(), 0, spare.length())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean acceptResult(Util.FSTPath<Pair<Long, BytesRef>> path) {
|
||||
int payloadSepIndex = parseSurfaceForm(path.cost.output2, payloadSep, spare);
|
||||
int docID = parseDocID(path.cost.output2, payloadSepIndex);
|
||||
BytesRef output = path.output.output2;
|
||||
int payloadSepIndex;
|
||||
if (path.payload != -1) {
|
||||
payloadSepIndex = path.payload;
|
||||
spare.copyUTF8Bytes(output.bytes, output.offset, payloadSepIndex);
|
||||
} else {
|
||||
assert collector.doSkipDuplicates() == false;
|
||||
payloadSepIndex = parseSurfaceForm(output, payloadSep, spare);
|
||||
}
|
||||
|
||||
scratchInput.reset(output.bytes, output.offset + payloadSepIndex + 1, output.length - payloadSepIndex - 1);
|
||||
int docID = scratchInput.readVInt();
|
||||
|
||||
if (!scorer.accept(docID, acceptDocs)) {
|
||||
return false;
|
||||
}
|
||||
if (collector.doSkipDuplicates()) {
|
||||
// now record that we've seen this surface form:
|
||||
char[] key = new char[spare.length()];
|
||||
System.arraycopy(spare.chars(), 0, key, 0, spare.length());
|
||||
if (collector.seenSurfaceForms.contains(key)) {
|
||||
// we already collected a higher scoring document with this key, in this segment:
|
||||
return false;
|
||||
}
|
||||
collector.seenSurfaceForms.add(key);
|
||||
}
|
||||
try {
|
||||
float score = scorer.score(decode(path.cost.output1), path.boost);
|
||||
float score = scorer.score(decode(path.output.output1), path.boost);
|
||||
collector.collect(docID, spare.toCharsRef(), path.context, score);
|
||||
return true;
|
||||
} catch (IOException e) {
|
||||
|
@ -167,8 +219,20 @@ public final class NRTSuggester implements Accountable {
|
|||
|
||||
for (FSTUtil.Path<Pair<Long, BytesRef>> path : prefixPaths) {
|
||||
scorer.weight.setNextMatch(path.input.get());
|
||||
BytesRef output = path.output.output2;
|
||||
int payload = -1;
|
||||
if (collector.doSkipDuplicates()) {
|
||||
for(int j=0;j<output.length;j++) {
|
||||
if (output.bytes[output.offset+j] == payloadSep) {
|
||||
// Important to cache this, else we have a possibly O(N^2) cost where N is the length of suggestions
|
||||
payload = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
searcher.addStartPaths(path.fstNode, path.output, false, path.input, scorer.weight.boost(),
|
||||
scorer.weight.context());
|
||||
scorer.weight.context(), payload);
|
||||
}
|
||||
// hits are also returned by search()
|
||||
// we do not use it, instead collect at acceptResult
|
||||
|
@ -191,8 +255,8 @@ public final class NRTSuggester implements Accountable {
|
|||
|
||||
@Override
|
||||
public int compare(Util.FSTPath<Pair<Long, BytesRef>> first, Util.FSTPath<Pair<Long, BytesRef>> second) {
|
||||
int cmp = Float.compare(scorer.score(decode(second.cost.output1), second.boost),
|
||||
scorer.score(decode(first.cost.output1), first.boost));
|
||||
int cmp = Float.compare(scorer.score(decode(second.output.output1), second.boost),
|
||||
scorer.score(decode(first.output.output1), first.boost));
|
||||
return (cmp != 0) ? cmp : first.input.get().compareTo(second.input.get());
|
||||
}
|
||||
}
|
||||
|
@ -285,13 +349,6 @@ public final class NRTSuggester implements Accountable {
|
|||
return surfaceFormLen;
|
||||
}
|
||||
|
||||
static int parseDocID(final BytesRef output, int payloadSepIndex) {
|
||||
assert payloadSepIndex != -1 : "payload sep index can not be -1";
|
||||
ByteArrayDataInput input = new ByteArrayDataInput(output.bytes, payloadSepIndex + output.offset + 1,
|
||||
output.length - (payloadSepIndex + output.offset));
|
||||
return input.readVInt();
|
||||
}
|
||||
|
||||
static BytesRef make(final BytesRef surface, int docID, int payloadSep) throws IOException {
|
||||
int len = surface.length + MAX_DOC_ID_LEN_WITH_SEP;
|
||||
byte[] buffer = new byte[len];
|
||||
|
|
|
@ -47,7 +47,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
* document.add(new SuggestField(name, "suggestion", 4));
|
||||
* </pre>
|
||||
* To perform document suggestions based on the this field, use
|
||||
* {@link SuggestIndexSearcher#suggest(CompletionQuery, int)}
|
||||
* {@link SuggestIndexSearcher#suggest(CompletionQuery, int, boolean)}
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
|
|
@ -38,6 +38,9 @@ import org.apache.lucene.search.Weight;
|
|||
*/
|
||||
public class SuggestIndexSearcher extends IndexSearcher {
|
||||
|
||||
// NOTE: we do not accept an ExecutorService here, because at least the dedup
|
||||
// logic in TopSuggestDocsCollector/NRTSuggester would not be thread safe (and maybe other things)
|
||||
|
||||
/**
|
||||
* Creates a searcher with document suggest capabilities
|
||||
* for <code>reader</code>.
|
||||
|
@ -50,8 +53,8 @@ public class SuggestIndexSearcher extends IndexSearcher {
|
|||
* Returns top <code>n</code> completion hits for
|
||||
* <code>query</code>
|
||||
*/
|
||||
public TopSuggestDocs suggest(CompletionQuery query, int n) throws IOException {
|
||||
TopSuggestDocsCollector collector = new TopSuggestDocsCollector(n);
|
||||
public TopSuggestDocs suggest(CompletionQuery query, int n, boolean skipDuplicates) throws IOException {
|
||||
TopSuggestDocsCollector collector = new TopSuggestDocsCollector(n, skipDuplicates);
|
||||
suggest(query, collector);
|
||||
return collector.get();
|
||||
}
|
||||
|
|
|
@ -66,6 +66,25 @@ public class TopSuggestDocs extends TopDocs {
|
|||
public int compareTo(SuggestScoreDoc o) {
|
||||
return Lookup.CHARSEQUENCE_COMPARATOR.compare(key, o.key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other instanceof SuggestScoreDoc == false) {
|
||||
return false;
|
||||
} else {
|
||||
return key.equals(((SuggestScoreDoc) other).key);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return key.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "key=" + key + " doc=" + doc + " score=" + score + " shardIndex=" + shardIndex;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -17,7 +17,12 @@
|
|||
package org.apache.lucene.search.suggest.document;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.CollectionTerminatedException;
|
||||
import org.apache.lucene.search.SimpleCollector;
|
||||
|
@ -47,9 +52,13 @@ public class TopSuggestDocsCollector extends SimpleCollector {
|
|||
private final SuggestScoreDocPriorityQueue priorityQueue;
|
||||
private final int num;
|
||||
|
||||
/**
|
||||
* Document base offset for the current Leaf
|
||||
*/
|
||||
/** Only set if we are deduplicating hits: holds all per-segment hits until the end, when we dedup them */
|
||||
private final List<SuggestScoreDoc> pendingResults;
|
||||
|
||||
/** Only set if we are deduplicating hits: holds all surface forms seen so far in the current segment */
|
||||
final CharArraySet seenSurfaceForms;
|
||||
|
||||
/** Document base offset for the current Leaf */
|
||||
protected int docBase;
|
||||
|
||||
/**
|
||||
|
@ -58,12 +67,24 @@ public class TopSuggestDocsCollector extends SimpleCollector {
|
|||
* Collects at most <code>num</code> completions
|
||||
* with corresponding document and weight
|
||||
*/
|
||||
public TopSuggestDocsCollector(int num) {
|
||||
public TopSuggestDocsCollector(int num, boolean skipDuplicates) {
|
||||
if (num <= 0) {
|
||||
throw new IllegalArgumentException("'num' must be > 0");
|
||||
}
|
||||
this.num = num;
|
||||
this.priorityQueue = new SuggestScoreDocPriorityQueue(num);
|
||||
if (skipDuplicates) {
|
||||
seenSurfaceForms = new CharArraySet(num, false);
|
||||
pendingResults = new ArrayList<>();
|
||||
} else {
|
||||
seenSurfaceForms = null;
|
||||
pendingResults = null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns true if duplicates are filtered out */
|
||||
protected boolean doSkipDuplicates() {
|
||||
return seenSurfaceForms != null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -76,6 +97,13 @@ public class TopSuggestDocsCollector extends SimpleCollector {
|
|||
@Override
|
||||
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||
docBase = context.docBase;
|
||||
if (seenSurfaceForms != null) {
|
||||
seenSurfaceForms.clear();
|
||||
// NOTE: this also clears the priorityQueue:
|
||||
for (SuggestScoreDoc hit : priorityQueue.getResults()) {
|
||||
pendingResults.add(hit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -101,7 +129,52 @@ public class TopSuggestDocsCollector extends SimpleCollector {
|
|||
* Returns at most <code>num</code> Top scoring {@link org.apache.lucene.search.suggest.document.TopSuggestDocs}s
|
||||
*/
|
||||
public TopSuggestDocs get() throws IOException {
|
||||
SuggestScoreDoc[] suggestScoreDocs = priorityQueue.getResults();
|
||||
|
||||
SuggestScoreDoc[] suggestScoreDocs;
|
||||
|
||||
if (seenSurfaceForms != null) {
|
||||
// NOTE: this also clears the priorityQueue:
|
||||
for (SuggestScoreDoc hit : priorityQueue.getResults()) {
|
||||
pendingResults.add(hit);
|
||||
}
|
||||
|
||||
// Deduplicate all hits: we already dedup'd efficiently within each segment by
|
||||
// truncating the FST top paths search, but across segments there may still be dups:
|
||||
seenSurfaceForms.clear();
|
||||
|
||||
// TODO: we could use a priority queue here to make cost O(N * log(num)) instead of O(N * log(N)), where N = O(num *
|
||||
// numSegments), but typically numSegments is smallish and num is smallish so this won't matter much in practice:
|
||||
|
||||
Collections.sort(pendingResults,
|
||||
new Comparator<SuggestScoreDoc>() {
|
||||
@Override
|
||||
public int compare(SuggestScoreDoc a, SuggestScoreDoc b) {
|
||||
// sort by higher score
|
||||
int cmp = Float.compare(b.score, a.score);
|
||||
if (cmp == 0) {
|
||||
// tie break by lower docID:
|
||||
cmp = Integer.compare(a.doc, b.doc);
|
||||
}
|
||||
return cmp;
|
||||
}
|
||||
});
|
||||
|
||||
List<SuggestScoreDoc> hits = new ArrayList<>();
|
||||
|
||||
for (SuggestScoreDoc hit : pendingResults) {
|
||||
if (seenSurfaceForms.contains(hit.key) == false) {
|
||||
seenSurfaceForms.add(hit.key);
|
||||
hits.add(hit);
|
||||
if (hits.size() == num) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
suggestScoreDocs = hits.toArray(new SuggestScoreDoc[0]);
|
||||
} else {
|
||||
suggestScoreDocs = priorityQueue.getResults();
|
||||
}
|
||||
|
||||
if (suggestScoreDocs.length > 0) {
|
||||
return new TopSuggestDocs(suggestScoreDocs.length, suggestScoreDocs, suggestScoreDocs[0].score);
|
||||
} else {
|
||||
|
|
|
@ -89,7 +89,7 @@ public class TestContextQuery extends LuceneTestCase {
|
|||
query.addContext("type2", 2);
|
||||
query.addContext("type3", 3);
|
||||
query.addContext("type4", 4);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
|
||||
assertSuggestions(suggest,
|
||||
new Entry("suggestion4", "type4", 5 * 4),
|
||||
new Entry("suggestion3", "type3", 6 * 3),
|
||||
|
@ -124,7 +124,7 @@ public class TestContextQuery extends LuceneTestCase {
|
|||
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
|
||||
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "ab")));
|
||||
IllegalStateException expected = expectThrows(IllegalStateException.class, () -> {
|
||||
suggestIndexSearcher.suggest(query, 4);
|
||||
suggestIndexSearcher.suggest(query, 4, false);
|
||||
});
|
||||
assertTrue(expected.getMessage().contains("SuggestField"));
|
||||
|
||||
|
@ -155,7 +155,7 @@ public class TestContextQuery extends LuceneTestCase {
|
|||
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
|
||||
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
|
||||
query.addContext("type", 1, false);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
|
||||
assertSuggestions(suggest,
|
||||
new Entry("suggestion1", "type1", 4),
|
||||
new Entry("suggestion2", "type2", 3),
|
||||
|
@ -185,7 +185,7 @@ public class TestContextQuery extends LuceneTestCase {
|
|||
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
|
||||
query.addContext("type", 1);
|
||||
query.addContext("typetype", 2);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
|
||||
assertSuggestions(suggest,
|
||||
new Entry("suggestion1", "typetype", 4 * 2),
|
||||
new Entry("suggestion2", "type", 3 * 1)
|
||||
|
@ -215,7 +215,7 @@ public class TestContextQuery extends LuceneTestCase {
|
|||
DirectoryReader reader = iw.getReader();
|
||||
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
|
||||
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
|
||||
assertSuggestions(suggest,
|
||||
new Entry("suggestion_no_ctx", null, 4),
|
||||
new Entry("suggestion", "type4", 1));
|
||||
|
@ -249,7 +249,7 @@ public class TestContextQuery extends LuceneTestCase {
|
|||
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
|
||||
query.addContext("type4", 10);
|
||||
query.addAllContexts();
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
|
||||
assertSuggestions(suggest,
|
||||
new Entry("suggestion4", "type4", 1 * 10),
|
||||
new Entry("suggestion1", null, 4),
|
||||
|
@ -284,7 +284,7 @@ public class TestContextQuery extends LuceneTestCase {
|
|||
query.addContext("type2", 2);
|
||||
query.addContext("type3", 3);
|
||||
query.addContext("type4", 4);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
|
||||
assertSuggestions(suggest,
|
||||
new Entry("suggestion", "type1", 4 * 10),
|
||||
new Entry("suggestion", "type3", 4 * 3),
|
||||
|
@ -321,7 +321,7 @@ public class TestContextQuery extends LuceneTestCase {
|
|||
query.addContext("type1", 7);
|
||||
query.addContext("type2", 6);
|
||||
query.addAllContexts();
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
|
||||
assertSuggestions(suggest,
|
||||
new Entry("suggestion1", "type1", 4 * 7),
|
||||
new Entry("suggestion2", "type2", 3 * 6),
|
||||
|
@ -357,7 +357,7 @@ public class TestContextQuery extends LuceneTestCase {
|
|||
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
|
||||
query.addContext("type3", 3);
|
||||
query.addContext("type4", 4);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
|
||||
assertSuggestions(suggest,
|
||||
new Entry("suggestion3", "type3", 2 * 3),
|
||||
new Entry("suggestion4", "type4", 1 * 4)
|
||||
|
@ -389,7 +389,7 @@ public class TestContextQuery extends LuceneTestCase {
|
|||
DirectoryReader reader = iw.getReader();
|
||||
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
|
||||
CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg"));
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
|
||||
assertSuggestions(suggest,
|
||||
new Entry("suggestion1", "type1", 4),
|
||||
new Entry("suggestion2", "type2", 3),
|
||||
|
@ -426,7 +426,7 @@ public class TestContextQuery extends LuceneTestCase {
|
|||
query.addContext("type2", 2);
|
||||
query.addContext("type3", 3);
|
||||
query.addContext("type4", 4);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
|
||||
assertSuggestions(suggest,
|
||||
new Entry("suggestion1", "type3", 8 * 3),
|
||||
new Entry("suggestion4", "type4", 5 * 4),
|
||||
|
@ -460,7 +460,7 @@ public class TestContextQuery extends LuceneTestCase {
|
|||
DirectoryReader reader = iw.getReader();
|
||||
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
|
||||
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg")));
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4, false);
|
||||
assertSuggestions(suggest,
|
||||
new Entry("suggestion1", "type1", 4),
|
||||
new Entry("suggestion2", "type2", 3),
|
||||
|
@ -520,7 +520,7 @@ public class TestContextQuery extends LuceneTestCase {
|
|||
for (int i = 0; i < contexts.size(); i++) {
|
||||
query.addContext(contexts.get(i), i + 1);
|
||||
}
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4, false);
|
||||
assertSuggestions(suggest, Arrays.copyOfRange(expectedResults, 0, 4));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -172,7 +172,7 @@ public class TestContextSuggestField extends LuceneTestCase {
|
|||
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
|
||||
|
||||
CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "sugg"));
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 10);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 10, false);
|
||||
assertSuggestions(suggest,
|
||||
new Entry("suggestion1", 4),
|
||||
new Entry("suggestion2", 3),
|
||||
|
@ -180,7 +180,7 @@ public class TestContextSuggestField extends LuceneTestCase {
|
|||
new Entry("suggestion4", 1));
|
||||
|
||||
query = new PrefixCompletionQuery(analyzer, new Term("context_suggest_field", "sugg"));
|
||||
suggest = suggestIndexSearcher.suggest(query, 10);
|
||||
suggest = suggestIndexSearcher.suggest(query, 10, false);
|
||||
assertSuggestions(suggest,
|
||||
new Entry("suggestion1", "type1", 4),
|
||||
new Entry("suggestion2", "type2", 3),
|
||||
|
@ -212,14 +212,14 @@ public class TestContextSuggestField extends LuceneTestCase {
|
|||
DirectoryReader reader = iw.getReader();
|
||||
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
|
||||
ContextQuery query = new ContextQuery(new PrefixCompletionQuery(completionAnalyzer, new Term("suggest_field", "sugg")));
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4, false);
|
||||
assertSuggestions(suggest,
|
||||
new Entry("suggestion1", "type1", 4),
|
||||
new Entry("suggestion2", "type2", 3),
|
||||
new Entry("suggestion3", "type3", 2),
|
||||
new Entry("suggestion4", "type4", 1));
|
||||
query.addContext("type1");
|
||||
suggest = suggestIndexSearcher.suggest(query, 4);
|
||||
suggest = suggestIndexSearcher.suggest(query, 4, false);
|
||||
assertSuggestions(suggest,
|
||||
new Entry("suggestion1", "type1", 4));
|
||||
reader.close();
|
||||
|
|
|
@ -66,7 +66,7 @@ public class TestFuzzyCompletionQuery extends LuceneTestCase {
|
|||
DirectoryReader reader = iw.getReader();
|
||||
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
|
||||
CompletionQuery query = new FuzzyCompletionQuery(analyzer, new Term("suggest_field", "sugg"));
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4, false);
|
||||
assertSuggestions(suggest,
|
||||
new Entry("suaggestion", 4 * 2),
|
||||
new Entry("suggestion", 2 * 3),
|
||||
|
@ -101,7 +101,7 @@ public class TestFuzzyCompletionQuery extends LuceneTestCase {
|
|||
DirectoryReader reader = iw.getReader();
|
||||
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
|
||||
CompletionQuery query = new ContextQuery(new FuzzyCompletionQuery(analyzer, new Term("suggest_field", "sugge")));
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
|
||||
assertSuggestions(suggest,
|
||||
new Entry("suggestion", "type4", 4),
|
||||
new Entry("suggdestion", "type4", 4),
|
||||
|
@ -140,7 +140,7 @@ public class TestFuzzyCompletionQuery extends LuceneTestCase {
|
|||
ContextQuery contextQuery = new ContextQuery(fuzzyQuery);
|
||||
contextQuery.addContext("type1", 6);
|
||||
contextQuery.addContext("type3", 2);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(contextQuery, 5);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(contextQuery, 5, false);
|
||||
assertSuggestions(suggest,
|
||||
new Entry("sduggestion", "type1", 1 * (1 + 6)),
|
||||
new Entry("sugdgestion", "type3", 1 * (3 + 2))
|
||||
|
|
|
@ -135,7 +135,7 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
|
|||
DirectoryReader reader = iw.getReader();
|
||||
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
|
||||
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "ab"));
|
||||
TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, 3);
|
||||
TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, 3, false);
|
||||
assertSuggestions(lookupDocs, new Entry("abcdd", 5), new Entry("abd", 4), new Entry("abc", 3));
|
||||
|
||||
reader.close();
|
||||
|
@ -165,7 +165,7 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
|
|||
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
|
||||
// if at most half of the top scoring documents have been filtered out
|
||||
// the search should be admissible for a single segment
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, num);
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
|
||||
assertTrue(suggest.totalHits >= 1);
|
||||
assertThat(suggest.scoreLookupDocs()[0].key.toString(), equalTo("abc_" + topScore));
|
||||
assertThat(suggest.scoreLookupDocs()[0].score, equalTo((float) topScore));
|
||||
|
@ -174,14 +174,14 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
|
|||
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
|
||||
// if more than half of the top scoring documents have been filtered out
|
||||
// search is not admissible, so # of suggestions requested is num instead of 1
|
||||
suggest = indexSearcher.suggest(query, num);
|
||||
suggest = indexSearcher.suggest(query, num, false);
|
||||
assertSuggestions(suggest, new Entry("abc_0", 0));
|
||||
|
||||
filter = new NumericRangeBitsProducer("filter_int_fld", num - 1, num - 1);
|
||||
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
|
||||
// if only lower scoring documents are filtered out
|
||||
// search is admissible
|
||||
suggest = indexSearcher.suggest(query, 1);
|
||||
suggest = indexSearcher.suggest(query, 1, false);
|
||||
assertSuggestions(suggest, new Entry("abc_" + (num - 1), num - 1));
|
||||
|
||||
reader.close();
|
||||
|
@ -216,13 +216,13 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
|
|||
|
||||
// suggest without filter
|
||||
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"));
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, 3);
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, 3, false);
|
||||
assertSuggestions(suggest, new Entry("apple", 5), new Entry("applle", 4), new Entry("apples", 3));
|
||||
|
||||
// suggest with filter
|
||||
BitsProducer filter = new NumericRangeBitsProducer("filter_int_fld", 5, 12);
|
||||
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"), filter);
|
||||
suggest = indexSearcher.suggest(query, 3);
|
||||
suggest = indexSearcher.suggest(query, 3, false);
|
||||
assertSuggestions(suggest, new Entry("applle", 4), new Entry("apples", 3));
|
||||
|
||||
reader.close();
|
||||
|
@ -243,10 +243,10 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
|
|||
DirectoryReader reader = iw.getReader();
|
||||
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
|
||||
CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep_or_pos_inc", "fo"));
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, 4); // all 4
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, 4, false); // all 4
|
||||
assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("the fo", 9), new Entry("foo bar", 8), new Entry("foobar", 7));
|
||||
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep_or_pos_inc", "foob"));
|
||||
suggest = indexSearcher.suggest(query, 4); // not the fo
|
||||
suggest = indexSearcher.suggest(query, 4, false); // not the fo
|
||||
assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("foo bar", 8), new Entry("foobar", 7));
|
||||
reader.close();
|
||||
iw.close();
|
||||
|
@ -266,10 +266,10 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
|
|||
DirectoryReader reader = iw.getReader();
|
||||
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
|
||||
CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_pos_inc", "fo"));
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, 4); //matches all 4
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, 4, false); //matches all 4
|
||||
assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("the fo", 9), new Entry("foo bar", 8), new Entry("foobar", 7));
|
||||
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_pos_inc", "foob"));
|
||||
suggest = indexSearcher.suggest(query, 4); // only foobar
|
||||
suggest = indexSearcher.suggest(query, 4, false); // only foobar
|
||||
assertSuggestions(suggest, new Entry("foobar", 7));
|
||||
reader.close();
|
||||
iw.close();
|
||||
|
@ -289,10 +289,10 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
|
|||
DirectoryReader reader = iw.getReader();
|
||||
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
|
||||
CompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep", "fo"));
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, 4); // matches all 4
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, 4, false); // matches all 4
|
||||
assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("the fo", 9), new Entry("foo bar", 8), new Entry("foobar", 7));
|
||||
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_no_p_sep", "foob"));
|
||||
suggest = indexSearcher.suggest(query, 4); // except the fo
|
||||
suggest = indexSearcher.suggest(query, 4, false); // except the fo
|
||||
assertSuggestions(suggest, new Entry("the foo bar", 10), new Entry("foo bar", 8), new Entry("foobar", 7));
|
||||
reader.close();
|
||||
iw.close();
|
||||
|
@ -329,10 +329,10 @@ public class TestPrefixCompletionQuery extends LuceneTestCase {
|
|||
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
|
||||
|
||||
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "app"));
|
||||
assertEquals(0, indexSearcher.suggest(query, 3).totalHits);
|
||||
assertEquals(0, indexSearcher.suggest(query, 3, false).totalHits);
|
||||
|
||||
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field2", "app"));
|
||||
assertSuggestions(indexSearcher.suggest(query, 3), new Entry("apples", 3));
|
||||
assertSuggestions(indexSearcher.suggest(query, 3, false), new Entry("apples", 3));
|
||||
|
||||
reader.close();
|
||||
iw.close();
|
||||
|
|
|
@ -67,7 +67,7 @@ public class TestRegexCompletionQuery extends LuceneTestCase {
|
|||
DirectoryReader reader = iw.getReader();
|
||||
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
|
||||
RegexCompletionQuery query = new RegexCompletionQuery(new Term("suggest_field", "[a|w|s]s?ugg"));
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 4, false);
|
||||
assertSuggestions(suggest, new Entry("wsuggestion", 4), new Entry("ssuggestion", 3),
|
||||
new Entry("asuggestion", 2), new Entry("suggestion", 1));
|
||||
|
||||
|
@ -98,7 +98,7 @@ public class TestRegexCompletionQuery extends LuceneTestCase {
|
|||
DirectoryReader reader = iw.getReader();
|
||||
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
|
||||
CompletionQuery query = new RegexCompletionQuery(new Term("suggest_field", "[a|s][d|u|s][u|d|g]"));
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(query, 5, false);
|
||||
assertSuggestions(suggest,
|
||||
new Entry("sduggestion", "type1", 5),
|
||||
new Entry("sudggestion", "type2", 4),
|
||||
|
@ -137,7 +137,7 @@ public class TestRegexCompletionQuery extends LuceneTestCase {
|
|||
contextQuery.addContext("type1", 6);
|
||||
contextQuery.addContext("type3", 7);
|
||||
contextQuery.addAllContexts();
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(contextQuery, 5);
|
||||
TopSuggestDocs suggest = suggestIndexSearcher.suggest(contextQuery, 5, false);
|
||||
assertSuggestions(suggest,
|
||||
new Entry("sduggestion", "type1", 5 * 6),
|
||||
new Entry("sugdgestion", "type3", 3 * 7),
|
||||
|
|
|
@ -20,7 +20,10 @@ import java.io.ByteArrayOutputStream;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
@ -33,9 +36,9 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene70.Lucene70Codec;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
@ -122,7 +125,7 @@ public class TestSuggestField extends LuceneTestCase {
|
|||
DirectoryReader reader = iw.getReader();
|
||||
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
|
||||
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "ab"));
|
||||
TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, 3);
|
||||
TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, 3, false);
|
||||
assertThat(lookupDocs.totalHits, equalTo(0));
|
||||
reader.close();
|
||||
iw.close();
|
||||
|
@ -157,7 +160,7 @@ public class TestSuggestField extends LuceneTestCase {
|
|||
int[] weights = new int[num];
|
||||
for(int i = 0; i < num; i++) {
|
||||
Document document = new Document();
|
||||
weights[i] = Math.abs(random().nextInt());
|
||||
weights[i] = random().nextInt(Integer.MAX_VALUE);
|
||||
document.add(new SuggestField("suggest_field", "abc", weights[i]));
|
||||
iw.addDocument(document);
|
||||
|
||||
|
@ -175,13 +178,231 @@ public class TestSuggestField extends LuceneTestCase {
|
|||
|
||||
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
|
||||
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc"));
|
||||
TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, num);
|
||||
TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, num, false);
|
||||
assertSuggestions(lookupDocs, expectedEntries);
|
||||
|
||||
reader.close();
|
||||
iw.close();
|
||||
}
|
||||
|
||||
public void testDeduplication() throws Exception {
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
|
||||
final int num = TestUtil.nextInt(random(), 2, 20);
|
||||
int[] weights = new int[num];
|
||||
int bestABCWeight = Integer.MIN_VALUE;
|
||||
int bestABDWeight = Integer.MIN_VALUE;
|
||||
for(int i = 0; i < num; i++) {
|
||||
Document document = new Document();
|
||||
weights[i] = random().nextInt(Integer.MAX_VALUE);
|
||||
String suggestValue;
|
||||
boolean doABC;
|
||||
if (i == 0) {
|
||||
doABC = true;
|
||||
} else if (i == 1) {
|
||||
doABC = false;
|
||||
} else {
|
||||
doABC = random().nextBoolean();
|
||||
}
|
||||
if (doABC) {
|
||||
suggestValue = "abc";
|
||||
bestABCWeight = Math.max(bestABCWeight, weights[i]);
|
||||
} else {
|
||||
suggestValue = "abd";
|
||||
bestABDWeight = Math.max(bestABDWeight, weights[i]);
|
||||
}
|
||||
document.add(new SuggestField("suggest_field", suggestValue, weights[i]));
|
||||
iw.addDocument(document);
|
||||
|
||||
if (usually()) {
|
||||
iw.commit();
|
||||
}
|
||||
}
|
||||
|
||||
DirectoryReader reader = iw.getReader();
|
||||
Entry[] expectedEntries = new Entry[2];
|
||||
if (bestABDWeight > bestABCWeight) {
|
||||
expectedEntries[0] = new Entry("abd", bestABDWeight);
|
||||
expectedEntries[1] = new Entry("abc", bestABCWeight);
|
||||
} else {
|
||||
expectedEntries[0] = new Entry("abc", bestABCWeight);
|
||||
expectedEntries[1] = new Entry("abd", bestABDWeight);
|
||||
}
|
||||
|
||||
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
|
||||
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "a"));
|
||||
TopSuggestDocsCollector collector = new TopSuggestDocsCollector(2, true);
|
||||
suggestIndexSearcher.suggest(query, collector);
|
||||
TopSuggestDocs lookupDocs = collector.get();
|
||||
assertSuggestions(lookupDocs, expectedEntries);
|
||||
|
||||
reader.close();
|
||||
iw.close();
|
||||
}
|
||||
|
||||
public void testExtremeDeduplication() throws Exception {
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
|
||||
final int num = atLeast(5000);
|
||||
int bestWeight = Integer.MIN_VALUE;
|
||||
for(int i = 0; i < num; i++) {
|
||||
Document document = new Document();
|
||||
int weight = TestUtil.nextInt(random(), 10, 100);
|
||||
bestWeight = Math.max(weight, bestWeight);
|
||||
document.add(new SuggestField("suggest_field", "abc", weight));
|
||||
iw.addDocument(document);
|
||||
if (rarely()) {
|
||||
iw.commit();
|
||||
}
|
||||
}
|
||||
Document document = new Document();
|
||||
document.add(new SuggestField("suggest_field", "abd", 7));
|
||||
iw.addDocument(document);
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
iw.forceMerge(1);
|
||||
}
|
||||
|
||||
DirectoryReader reader = iw.getReader();
|
||||
Entry[] expectedEntries = new Entry[2];
|
||||
expectedEntries[0] = new Entry("abc", bestWeight);
|
||||
expectedEntries[1] = new Entry("abd", 7);
|
||||
|
||||
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
|
||||
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "a"));
|
||||
TopSuggestDocsCollector collector = new TopSuggestDocsCollector(2, true);
|
||||
suggestIndexSearcher.suggest(query, collector);
|
||||
TopSuggestDocs lookupDocs = collector.get();
|
||||
assertSuggestions(lookupDocs, expectedEntries);
|
||||
|
||||
reader.close();
|
||||
iw.close();
|
||||
}
|
||||
|
||||
private static String randomSimpleString(int numDigits, int maxLen) {
|
||||
final int len = TestUtil.nextInt(random(), 1, maxLen);
|
||||
final char[] chars = new char[len];
|
||||
for(int j=0;j<len;j++) {
|
||||
chars[j] = (char) ('a' + random().nextInt(numDigits));
|
||||
}
|
||||
return new String(chars);
|
||||
}
|
||||
|
||||
public void testRandom() throws Exception {
|
||||
int numDigits = TestUtil.nextInt(random(), 1, 6);
|
||||
Set<String> keys = new HashSet<>();
|
||||
int keyCount = TestUtil.nextInt(random(), 1, 20);
|
||||
if (numDigits == 1) {
|
||||
keyCount = Math.min(9, keyCount);
|
||||
}
|
||||
while (keys.size() < keyCount) {
|
||||
keys.add(randomSimpleString(numDigits, 10));
|
||||
}
|
||||
List<String> keysList = new ArrayList<>(keys);
|
||||
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
IndexWriterConfig iwc = iwcWithSuggestField(analyzer, "suggest_field");
|
||||
// we rely on docID order:
|
||||
iwc.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||
int docCount = TestUtil.nextInt(random(), 1, 200);
|
||||
Entry[] docs = new Entry[docCount];
|
||||
for(int i=0;i<docCount;i++) {
|
||||
int weight = random().nextInt(40);
|
||||
String key = keysList.get(random().nextInt(keyCount));
|
||||
//System.out.println("KEY: " + key);
|
||||
docs[i] = new Entry(key, null, weight, i);
|
||||
Document doc = new Document();
|
||||
doc.add(new SuggestField("suggest_field", key, weight));
|
||||
iw.addDocument(doc);
|
||||
if (usually()) {
|
||||
iw.commit();
|
||||
}
|
||||
}
|
||||
|
||||
DirectoryReader reader = iw.getReader();
|
||||
SuggestIndexSearcher searcher = new SuggestIndexSearcher(reader);
|
||||
|
||||
int iters = atLeast(200);
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
String prefix = randomSimpleString(numDigits, 2);
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: prefix=" + prefix);
|
||||
}
|
||||
|
||||
// slow but hopefully correct suggester:
|
||||
List<Entry> expected = new ArrayList<>();
|
||||
for(Entry doc : docs) {
|
||||
if (doc.output.startsWith(prefix)) {
|
||||
expected.add(doc);
|
||||
}
|
||||
}
|
||||
Collections.sort(expected,
|
||||
new Comparator<Entry>() {
|
||||
@Override
|
||||
public int compare(Entry a, Entry b) {
|
||||
// sort by higher score:
|
||||
int cmp = Float.compare(b.value, a.value);
|
||||
if (cmp == 0) {
|
||||
// tie break by smaller docID:
|
||||
cmp = Integer.compare(a.id, b.id);
|
||||
}
|
||||
return cmp;
|
||||
}
|
||||
});
|
||||
|
||||
boolean dedup = random().nextBoolean();
|
||||
if (dedup) {
|
||||
List<Entry> deduped = new ArrayList<>();
|
||||
Set<String> seen = new HashSet<>();
|
||||
for(Entry entry : expected) {
|
||||
if (seen.contains(entry.output) == false) {
|
||||
seen.add(entry.output);
|
||||
deduped.add(entry);
|
||||
}
|
||||
}
|
||||
expected = deduped;
|
||||
}
|
||||
|
||||
// TODO: re-enable this, except something is buggy about tie breaks at the topN threshold now:
|
||||
//int topN = TestUtil.nextInt(random(), 1, docCount+10);
|
||||
int topN = docCount;
|
||||
|
||||
if (VERBOSE) {
|
||||
if (dedup) {
|
||||
System.out.println(" expected (dedup'd) topN=" + topN + ":");
|
||||
} else {
|
||||
System.out.println(" expected topN=" + topN + ":");
|
||||
}
|
||||
for(int i=0;i<expected.size();i++) {
|
||||
if (i >= topN) {
|
||||
System.out.println(" leftover: " + i + ": " + expected.get(i));
|
||||
} else {
|
||||
System.out.println(" " + i + ": " + expected.get(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
expected = expected.subList(0, Math.min(topN, expected.size()));
|
||||
|
||||
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", prefix));
|
||||
TopSuggestDocsCollector collector = new TopSuggestDocsCollector(topN, dedup);
|
||||
searcher.suggest(query, collector);
|
||||
TopSuggestDocs actual = collector.get();
|
||||
if (VERBOSE) {
|
||||
System.out.println(" actual:");
|
||||
SuggestScoreDoc[] suggestScoreDocs = (SuggestScoreDoc[]) actual.scoreDocs;
|
||||
for(int i=0;i<suggestScoreDocs.length;i++) {
|
||||
System.out.println(" " + i + ": " + suggestScoreDocs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
assertSuggestions(actual, expected.toArray(new Entry[expected.size()]));
|
||||
}
|
||||
|
||||
reader.close();
|
||||
iw.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNRTDeletedDocFiltering() throws Exception {
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
|
@ -214,7 +435,7 @@ public class TestSuggestField extends LuceneTestCase {
|
|||
DirectoryReader reader = DirectoryReader.open(iw);
|
||||
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
|
||||
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, numLive);
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, numLive, false);
|
||||
assertSuggestions(suggest, expectedEntries.toArray(new Entry[expectedEntries.size()]));
|
||||
|
||||
reader.close();
|
||||
|
@ -248,7 +469,7 @@ public class TestSuggestField extends LuceneTestCase {
|
|||
// no random access required;
|
||||
// calling suggest with filter that does not match any documents should early terminate
|
||||
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, num);
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
|
||||
assertThat(suggest.totalHits, equalTo(0));
|
||||
reader.close();
|
||||
iw.close();
|
||||
|
@ -276,7 +497,7 @@ public class TestSuggestField extends LuceneTestCase {
|
|||
DirectoryReader reader = DirectoryReader.open(iw);
|
||||
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
|
||||
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, num);
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
|
||||
assertThat(suggest.totalHits, equalTo(0));
|
||||
|
||||
reader.close();
|
||||
|
@ -306,7 +527,7 @@ public class TestSuggestField extends LuceneTestCase {
|
|||
DirectoryReader reader = DirectoryReader.open(iw);
|
||||
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
|
||||
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, 1);
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, 1, false);
|
||||
assertSuggestions(suggest, new Entry("abc_1", 1));
|
||||
|
||||
reader.close();
|
||||
|
@ -335,10 +556,10 @@ public class TestSuggestField extends LuceneTestCase {
|
|||
|
||||
SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
|
||||
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("sug_field_1", "ap"));
|
||||
TopSuggestDocs suggestDocs1 = suggestIndexSearcher.suggest(query, 4);
|
||||
TopSuggestDocs suggestDocs1 = suggestIndexSearcher.suggest(query, 4, false);
|
||||
assertSuggestions(suggestDocs1, new Entry("apple", 4), new Entry("aples", 3));
|
||||
query = new PrefixCompletionQuery(analyzer, new Term("sug_field_2", "ap"));
|
||||
TopSuggestDocs suggestDocs2 = suggestIndexSearcher.suggest(query, 4);
|
||||
TopSuggestDocs suggestDocs2 = suggestIndexSearcher.suggest(query, 4, false);
|
||||
assertSuggestions(suggestDocs2, new Entry("april", 3), new Entry("apartment", 2));
|
||||
|
||||
// check that the doc ids are consistent
|
||||
|
@ -372,7 +593,7 @@ public class TestSuggestField extends LuceneTestCase {
|
|||
DirectoryReader reader = iw.getReader();
|
||||
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
|
||||
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, 1);
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, 1, false);
|
||||
assertSuggestions(suggest, new Entry("abc_" + num, num));
|
||||
|
||||
reader.close();
|
||||
|
@ -402,7 +623,7 @@ public class TestSuggestField extends LuceneTestCase {
|
|||
DirectoryReader reader = iw.getReader();
|
||||
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
|
||||
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, (entries.size() == 0) ? 1 : entries.size());
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, (entries.size() == 0) ? 1 : entries.size(), false);
|
||||
assertSuggestions(suggest, entries.toArray(new Entry[entries.size()]));
|
||||
|
||||
reader.close();
|
||||
|
@ -430,7 +651,7 @@ public class TestSuggestField extends LuceneTestCase {
|
|||
DirectoryReader reader = iw.getReader();
|
||||
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
|
||||
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, num);
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
|
||||
assertEquals(num, suggest.totalHits);
|
||||
for (SuggestScoreDoc suggestScoreDoc : suggest.scoreLookupDocs()) {
|
||||
String key = suggestScoreDoc.key.toString();
|
||||
|
@ -456,7 +677,7 @@ public class TestSuggestField extends LuceneTestCase {
|
|||
for (int i = 0; i < num; i++) {
|
||||
Document document = new Document();
|
||||
String suggest = prefixes[i % 3] + TestUtil.randomSimpleString(random(), 10) + "_" +String.valueOf(i);
|
||||
int weight = Math.abs(random().nextInt());
|
||||
int weight = random().nextInt(Integer.MAX_VALUE);
|
||||
document.add(new SuggestField("suggest_field", suggest, weight));
|
||||
mappings.put(suggest, weight);
|
||||
iw.addDocument(document);
|
||||
|
@ -470,7 +691,7 @@ public class TestSuggestField extends LuceneTestCase {
|
|||
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
|
||||
for (String prefix : prefixes) {
|
||||
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", prefix));
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, num);
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
|
||||
assertTrue(suggest.totalHits > 0);
|
||||
float topScore = -1;
|
||||
for (SuggestScoreDoc scoreDoc : suggest.scoreLookupDocs()) {
|
||||
|
@ -498,7 +719,7 @@ public class TestSuggestField extends LuceneTestCase {
|
|||
for (int i = 0; i < num; i++) {
|
||||
Document document = lineFileDocs.nextDoc();
|
||||
String title = document.getField("title").stringValue();
|
||||
int weight = Math.abs(random().nextInt());
|
||||
int weight = random().nextInt(Integer.MAX_VALUE);
|
||||
Integer prevWeight = mappings.get(title);
|
||||
if (prevWeight == null || prevWeight < weight) {
|
||||
mappings.put(title, weight);
|
||||
|
@ -519,7 +740,7 @@ public class TestSuggestField extends LuceneTestCase {
|
|||
String title = entry.getKey();
|
||||
|
||||
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", title));
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, mappings.size());
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, mappings.size(), false);
|
||||
assertTrue(suggest.totalHits > 0);
|
||||
boolean matched = false;
|
||||
for (ScoreDoc scoreDoc : suggest.scoreDocs) {
|
||||
|
@ -577,13 +798,13 @@ public class TestSuggestField extends LuceneTestCase {
|
|||
try {
|
||||
startingGun.await();
|
||||
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_1", prefix1));
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, num);
|
||||
TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
|
||||
assertSuggestions(suggest, entries1);
|
||||
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_2", prefix2));
|
||||
suggest = indexSearcher.suggest(query, num);
|
||||
suggest = indexSearcher.suggest(query, num, false);
|
||||
assertSuggestions(suggest, entries2);
|
||||
query = new PrefixCompletionQuery(analyzer, new Term("suggest_field_3", prefix3));
|
||||
suggest = indexSearcher.suggest(query, num);
|
||||
suggest = indexSearcher.suggest(query, num, false);
|
||||
assertSuggestions(suggest, entries3);
|
||||
} catch (Throwable e) {
|
||||
errors.add(e);
|
||||
|
@ -607,28 +828,39 @@ public class TestSuggestField extends LuceneTestCase {
|
|||
final String output;
|
||||
final float value;
|
||||
final String context;
|
||||
final int id;
|
||||
|
||||
Entry(String output, float value) {
|
||||
this(output, null, value);
|
||||
}
|
||||
|
||||
Entry(String output, String context, float value) {
|
||||
this(output, context, value, -1);
|
||||
}
|
||||
|
||||
Entry(String output, String context, float value, int id) {
|
||||
this.output = output;
|
||||
this.value = value;
|
||||
this.context = context;
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "key=" + output + " score=" + value + " context=" + context + " id=" + id;
|
||||
}
|
||||
}
|
||||
|
||||
static void assertSuggestions(TopDocs actual, Entry... expected) {
|
||||
SuggestScoreDoc[] suggestScoreDocs = (SuggestScoreDoc[]) actual.scoreDocs;
|
||||
assertThat(suggestScoreDocs.length, equalTo(expected.length));
|
||||
for (int i = 0; i < suggestScoreDocs.length; i++) {
|
||||
for (int i = 0; i < Math.min(expected.length, suggestScoreDocs.length); i++) {
|
||||
SuggestScoreDoc lookupDoc = suggestScoreDocs[i];
|
||||
String msg = "Expected: " + toString(expected[i]) + " Actual: " + toString(lookupDoc);
|
||||
String msg = "Hit " + i + ": expected: " + toString(expected[i]) + " but actual: " + toString(lookupDoc);
|
||||
assertThat(msg, lookupDoc.key.toString(), equalTo(expected[i].output));
|
||||
assertThat(msg, lookupDoc.score, equalTo(expected[i].value));
|
||||
assertThat(msg, lookupDoc.context, equalTo(expected[i].context));
|
||||
}
|
||||
assertThat(suggestScoreDocs.length, equalTo(expected.length));
|
||||
}
|
||||
|
||||
private static String toString(Entry expected) {
|
||||
|
|
Loading…
Reference in New Issue