Expose duplicate removal in the completion suggester (#26496)

This change exposes the duplicate removal option added in Lucene for the completion suggester
with a new option called `skip_duplicates` (defaults to false).
This commit also adapts the custom suggest collector to handle deduplication when multiple contexts match the input.

Closes #23364
This commit is contained in:
Jim Ferenczi 2017-09-07 17:11:01 +02:00 committed by GitHub
parent abe83c4fac
commit d68d8c9cef
14 changed files with 394 additions and 96 deletions

View File

@ -18,17 +18,16 @@
*/ */
package org.elasticsearch.search.suggest.completion; package org.elasticsearch.search.suggest.completion;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.BulkScorer; import org.apache.lucene.search.BulkScorer;
import org.apache.lucene.search.CollectionTerminatedException; import org.apache.lucene.search.CollectionTerminatedException;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Weight; import org.apache.lucene.search.Weight;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.document.CompletionQuery; import org.apache.lucene.search.suggest.document.CompletionQuery;
import org.apache.lucene.search.suggest.document.TopSuggestDocs; import org.apache.lucene.search.suggest.document.TopSuggestDocs;
import org.apache.lucene.search.suggest.document.TopSuggestDocsCollector; import org.apache.lucene.search.suggest.document.TopSuggestDocsCollector;
import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.common.text.Text; import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.mapper.CompletionFieldMapper; import org.elasticsearch.index.mapper.CompletionFieldMapper;
import org.elasticsearch.search.suggest.Suggest; import org.elasticsearch.search.suggest.Suggest;
@ -53,12 +52,14 @@ public class CompletionSuggester extends Suggester<CompletionSuggestionContext>
final CompletionSuggestionContext suggestionContext, final IndexSearcher searcher, CharsRefBuilder spare) throws IOException { final CompletionSuggestionContext suggestionContext, final IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
if (suggestionContext.getFieldType() != null) { if (suggestionContext.getFieldType() != null) {
final CompletionFieldMapper.CompletionFieldType fieldType = suggestionContext.getFieldType(); final CompletionFieldMapper.CompletionFieldType fieldType = suggestionContext.getFieldType();
CompletionSuggestion completionSuggestion = new CompletionSuggestion(name, suggestionContext.getSize()); CompletionSuggestion completionSuggestion =
new CompletionSuggestion(name, suggestionContext.getSize(), suggestionContext.isSkipDuplicates());
spare.copyUTF8Bytes(suggestionContext.getText()); spare.copyUTF8Bytes(suggestionContext.getText());
CompletionSuggestion.Entry completionSuggestEntry = new CompletionSuggestion.Entry( CompletionSuggestion.Entry completionSuggestEntry = new CompletionSuggestion.Entry(
new Text(spare.toString()), 0, spare.length()); new Text(spare.toString()), 0, spare.length());
completionSuggestion.addTerm(completionSuggestEntry); completionSuggestion.addTerm(completionSuggestEntry);
TopSuggestDocsCollector collector = new TopDocumentsCollector(suggestionContext.getSize()); TopSuggestDocsCollector collector =
new TopDocumentsCollector(suggestionContext.getSize(), suggestionContext.isSkipDuplicates());
suggest(searcher, suggestionContext.toQuery(), collector); suggest(searcher, suggestionContext.toQuery(), collector);
int numResult = 0; int numResult = 0;
for (TopSuggestDocs.SuggestScoreDoc suggestScoreDoc : collector.get().scoreLookupDocs()) { for (TopSuggestDocs.SuggestScoreDoc suggestScoreDoc : collector.get().scoreLookupDocs()) {
@ -97,8 +98,21 @@ public class CompletionSuggester extends Suggester<CompletionSuggestionContext>
} }
} }
// TODO: this should be refactored and moved to lucene /**
// see https://issues.apache.org/jira/browse/LUCENE-6880 * TODO: this should be refactored and moved to lucene see https://issues.apache.org/jira/browse/LUCENE-6880
*
* Custom collector that returns top documents from the completion suggester.
* When suggestions are augmented with contexts values this collector groups suggestions coming from the same document
* but matching different contexts together. Each document is counted as 1 entry and the provided size is the expected number
* of documents that should be returned (not the number of suggestions).
* This collector is also able to filter duplicate suggestion coming from different documents.
* When different contexts match the same suggestion form only the best one (sorted by weight) is kept.
* In order to keep this feature fast, the de-duplication of suggestions with different contexts is done
* only on the top N*num_contexts (where N is the number of documents to return) suggestions per segment.
* This means that skip_duplicates will visit at most N*num_contexts suggestions per segment to find unique suggestions
* that match the input. If more than N*num_contexts suggestions are duplicated with different contexts this collector
* will not be able to return more than one suggestion even when N is greater than 1.
**/
private static final class TopDocumentsCollector extends TopSuggestDocsCollector { private static final class TopDocumentsCollector extends TopSuggestDocsCollector {
/** /**
@ -150,93 +164,53 @@ public class CompletionSuggester extends Suggester<CompletionSuggestionContext>
} }
} }
private static final class SuggestDocPriorityQueue extends PriorityQueue<SuggestDoc> { private final Map<Integer, SuggestDoc> docsMap;
SuggestDocPriorityQueue(int maxSize) { TopDocumentsCollector(int num, boolean skipDuplicates) {
super(maxSize); super(Math.max(1, num), skipDuplicates);
} this.docsMap = new LinkedHashMap<>(num);
@Override
protected boolean lessThan(SuggestDoc a, SuggestDoc b) {
if (a.score == b.score) {
int cmp = Lookup.CHARSEQUENCE_COMPARATOR.compare(a.key, b.key);
if (cmp == 0) {
// prefer smaller doc id, in case of a tie
return a.doc > b.doc;
} else {
return cmp > 0;
}
}
return a.score < b.score;
}
public SuggestDoc[] getResults() {
int size = size();
SuggestDoc[] res = new SuggestDoc[size];
for (int i = size - 1; i >= 0; i--) {
res[i] = pop();
}
return res;
}
}
private final int num;
private final SuggestDocPriorityQueue pq;
private final Map<Integer, SuggestDoc> scoreDocMap;
// TODO: expose dup removal
TopDocumentsCollector(int num) {
super(1, false); // TODO hack, we don't use the underlying pq, so we allocate a size of 1
this.num = num;
this.scoreDocMap = new LinkedHashMap<>(num);
this.pq = new SuggestDocPriorityQueue(num);
}
@Override
public int getCountToCollect() {
// This is only needed because we initialize
// the base class with 1 instead of the actual num
return num;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
super.doSetNextReader(context);
updateResults();
}
private void updateResults() {
for (SuggestDoc suggestDoc : scoreDocMap.values()) {
if (pq.insertWithOverflow(suggestDoc) == suggestDoc) {
break;
}
}
scoreDocMap.clear();
} }
@Override @Override
public void collect(int docID, CharSequence key, CharSequence context, float score) throws IOException { public void collect(int docID, CharSequence key, CharSequence context, float score) throws IOException {
if (scoreDocMap.containsKey(docID)) { int globalDoc = docID + docBase;
SuggestDoc suggestDoc = scoreDocMap.get(docID); if (docsMap.containsKey(globalDoc)) {
suggestDoc.add(key, context, score); docsMap.get(globalDoc).add(key, context, score);
} else if (scoreDocMap.size() <= num) {
scoreDocMap.put(docID, new SuggestDoc(docBase + docID, key, context, score));
} else { } else {
throw new CollectionTerminatedException(); docsMap.put(globalDoc, new SuggestDoc(globalDoc, key, context, score));
super.collect(docID, key, context, score);
} }
} }
@Override @Override
public TopSuggestDocs get() throws IOException { public TopSuggestDocs get() throws IOException {
updateResults(); // to empty the last set of collected suggest docs TopSuggestDocs entries = super.get();
TopSuggestDocs.SuggestScoreDoc[] suggestScoreDocs = pq.getResults(); if (entries.scoreDocs.length == 0) {
if (suggestScoreDocs.length > 0) {
return new TopSuggestDocs(suggestScoreDocs.length, suggestScoreDocs, suggestScoreDocs[0].score);
} else {
return TopSuggestDocs.EMPTY; return TopSuggestDocs.EMPTY;
} }
// The parent class returns suggestions, not documents, and dedup only the surface form (without contexts).
// The following code groups suggestions matching different contexts by document id and dedup the surface form + contexts
// if needed (skip_duplicates).
int size = entries.scoreDocs.length;
final List<TopSuggestDocs.SuggestScoreDoc> suggestDocs = new ArrayList(size);
final CharArraySet seenSurfaceForms = doSkipDuplicates() ? new CharArraySet(size, false) : null;
for (TopSuggestDocs.SuggestScoreDoc suggestEntry : entries.scoreLookupDocs()) {
final SuggestDoc suggestDoc;
if (docsMap != null) {
suggestDoc = docsMap.get(suggestEntry.doc);
} else {
suggestDoc = new SuggestDoc(suggestEntry.doc, suggestEntry.key, suggestEntry.context, suggestEntry.score);
}
if (doSkipDuplicates()) {
if (seenSurfaceForms.contains(suggestDoc.key)) {
continue;
}
seenSurfaceForms.add(suggestDoc.key);
}
suggestDocs.add(suggestDoc);
}
return new TopSuggestDocs((int) entries.totalHits,
suggestDocs.toArray(new TopSuggestDocs.SuggestScoreDoc[0]), entries.getMaxScore());
} }
} }
} }

View File

@ -18,8 +18,10 @@
*/ */
package org.elasticsearch.search.suggest.completion; package org.elasticsearch.search.suggest.completion;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Lookup;
import org.elasticsearch.Version;
import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.StreamOutput;
@ -68,11 +70,38 @@ public final class CompletionSuggestion extends Suggest.Suggestion<CompletionSug
public static final int TYPE = 4; public static final int TYPE = 4;
private boolean skipDuplicates;
public CompletionSuggestion() { public CompletionSuggestion() {
} }
public CompletionSuggestion(String name, int size) { /**
* Ctr
* @param name The name for the suggestions
* @param size The number of suggestions to return
* @param skipDuplicates Whether duplicate suggestions should be filtered out
*/
public CompletionSuggestion(String name, int size, boolean skipDuplicates) {
super(name, size); super(name, size);
this.skipDuplicates = skipDuplicates;
}
@Override
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
// TODO should be backported to 6.1.0
if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
skipDuplicates = in.readBoolean();
}
}
@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
// TODO should be backported to 6.1.0
if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
out.writeBoolean(skipDuplicates);
}
} }
/** /**
@ -95,7 +124,7 @@ public final class CompletionSuggestion extends Suggest.Suggestion<CompletionSug
} }
public static CompletionSuggestion fromXContent(XContentParser parser, String name) throws IOException { public static CompletionSuggestion fromXContent(XContentParser parser, String name) throws IOException {
CompletionSuggestion suggestion = new CompletionSuggestion(name, -1); CompletionSuggestion suggestion = new CompletionSuggestion(name, -1, false);
parseEntries(parser, suggestion, CompletionSuggestion.Entry::fromXContent); parseEntries(parser, suggestion, CompletionSuggestion.Entry::fromXContent);
return suggestion; return suggestion;
} }
@ -146,9 +175,19 @@ public final class CompletionSuggestion extends Suggest.Suggestion<CompletionSug
// the global top <code>size</code> entries are collected from the shard results // the global top <code>size</code> entries are collected from the shard results
// using a priority queue // using a priority queue
OptionPriorityQueue priorityQueue = new OptionPriorityQueue(leader.getSize(), COMPARATOR); OptionPriorityQueue priorityQueue = new OptionPriorityQueue(leader.getSize(), COMPARATOR);
// Dedup duplicate suggestions (based on the surface form) if skip duplicates is activated
final CharArraySet seenSurfaceForms = leader.skipDuplicates ? new CharArraySet(leader.getSize(), false) : null;
for (Suggest.Suggestion<Entry> suggestion : toReduce) { for (Suggest.Suggestion<Entry> suggestion : toReduce) {
assert suggestion.getName().equals(name) : "name should be identical across all suggestions"; assert suggestion.getName().equals(name) : "name should be identical across all suggestions";
for (Entry.Option option : ((CompletionSuggestion) suggestion).getOptions()) { for (Entry.Option option : ((CompletionSuggestion) suggestion).getOptions()) {
if (leader.skipDuplicates) {
assert ((CompletionSuggestion) suggestion).skipDuplicates;
String text = option.getText().string();
if (seenSurfaceForms.contains(text)) {
continue;
}
seenSurfaceForms.add(text);
}
if (option == priorityQueue.insertWithOverflow(option)) { if (option == priorityQueue.insertWithOverflow(option)) {
// if the current option has overflown from pq, // if the current option has overflown from pq,
// we can assume all of the successive options // we can assume all of the successive options
@ -157,7 +196,7 @@ public final class CompletionSuggestion extends Suggest.Suggestion<CompletionSug
} }
} }
} }
final CompletionSuggestion suggestion = new CompletionSuggestion(leader.getName(), leader.getSize()); final CompletionSuggestion suggestion = new CompletionSuggestion(leader.getName(), leader.getSize(), leader.skipDuplicates);
final Entry entry = new Entry(leaderEntry.getText(), leaderEntry.getOffset(), leaderEntry.getLength()); final Entry entry = new Entry(leaderEntry.getText(), leaderEntry.getOffset(), leaderEntry.getLength());
Collections.addAll(entry.getOptions(), priorityQueue.get()); Collections.addAll(entry.getOptions(), priorityQueue.get());
suggestion.addTerm(entry); suggestion.addTerm(entry);

View File

@ -19,6 +19,7 @@
package org.elasticsearch.search.suggest.completion; package org.elasticsearch.search.suggest.completion;
import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.Version;
import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamInput;
@ -57,6 +58,7 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
private static final XContentType CONTEXT_BYTES_XCONTENT_TYPE = XContentType.JSON; private static final XContentType CONTEXT_BYTES_XCONTENT_TYPE = XContentType.JSON;
static final String SUGGESTION_NAME = "completion"; static final String SUGGESTION_NAME = "completion";
static final ParseField CONTEXTS_FIELD = new ParseField("contexts", "context"); static final ParseField CONTEXTS_FIELD = new ParseField("contexts", "context");
static final ParseField SKIP_DUPLICATES_FIELD = new ParseField("skip_duplicates");
/** /**
* { * {
@ -94,11 +96,13 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
v.contextBytes = builder.bytes(); v.contextBytes = builder.bytes();
p.skipChildren(); p.skipChildren();
}, CONTEXTS_FIELD, ObjectParser.ValueType.OBJECT); // context is deprecated }, CONTEXTS_FIELD, ObjectParser.ValueType.OBJECT); // context is deprecated
PARSER.declareBoolean(CompletionSuggestionBuilder::skipDuplicates, SKIP_DUPLICATES_FIELD);
} }
protected FuzzyOptions fuzzyOptions; protected FuzzyOptions fuzzyOptions;
protected RegexOptions regexOptions; protected RegexOptions regexOptions;
protected BytesReference contextBytes = null; protected BytesReference contextBytes = null;
protected boolean skipDuplicates = false;
public CompletionSuggestionBuilder(String field) { public CompletionSuggestionBuilder(String field) {
super(field); super(field);
@ -113,6 +117,7 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
fuzzyOptions = in.fuzzyOptions; fuzzyOptions = in.fuzzyOptions;
regexOptions = in.regexOptions; regexOptions = in.regexOptions;
contextBytes = in.contextBytes; contextBytes = in.contextBytes;
skipDuplicates = in.skipDuplicates;
} }
/** /**
@ -123,6 +128,10 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
fuzzyOptions = in.readOptionalWriteable(FuzzyOptions::new); fuzzyOptions = in.readOptionalWriteable(FuzzyOptions::new);
regexOptions = in.readOptionalWriteable(RegexOptions::new); regexOptions = in.readOptionalWriteable(RegexOptions::new);
contextBytes = in.readOptionalBytesReference(); contextBytes = in.readOptionalBytesReference();
// TODO should be backported to 6.1.0
if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
skipDuplicates = in.readBoolean();
}
} }
@Override @Override
@ -130,6 +139,10 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
out.writeOptionalWriteable(fuzzyOptions); out.writeOptionalWriteable(fuzzyOptions);
out.writeOptionalWriteable(regexOptions); out.writeOptionalWriteable(regexOptions);
out.writeOptionalBytesReference(contextBytes); out.writeOptionalBytesReference(contextBytes);
// TODO should be backported to 6.1.0
if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
out.writeBoolean(skipDuplicates);
}
} }
/** /**
@ -210,6 +223,21 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
return this; return this;
} }
/**
* Returns whether duplicate suggestions should be filtered out.
*/
public boolean skipDuplicates() {
return skipDuplicates;
}
/**
* Should duplicates be filtered or not. Defaults to <tt>false</tt>.
*/
public CompletionSuggestionBuilder skipDuplicates(boolean skipDuplicates) {
this.skipDuplicates = skipDuplicates;
return this;
}
private static class InnerBuilder extends CompletionSuggestionBuilder { private static class InnerBuilder extends CompletionSuggestionBuilder {
private String field; private String field;
@ -231,6 +259,9 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
if (regexOptions != null) { if (regexOptions != null) {
regexOptions.toXContent(builder, params); regexOptions.toXContent(builder, params);
} }
if (skipDuplicates) {
builder.field(SKIP_DUPLICATES_FIELD.getPreferredName(), skipDuplicates);
}
if (contextBytes != null) { if (contextBytes != null) {
builder.rawField(CONTEXTS_FIELD.getPreferredName(), contextBytes); builder.rawField(CONTEXTS_FIELD.getPreferredName(), contextBytes);
} }
@ -255,6 +286,7 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
// copy over common settings to each suggestion builder // copy over common settings to each suggestion builder
final MapperService mapperService = context.getMapperService(); final MapperService mapperService = context.getMapperService();
populateCommonFields(mapperService, suggestionContext); populateCommonFields(mapperService, suggestionContext);
suggestionContext.setSkipDuplicates(skipDuplicates);
suggestionContext.setFuzzyOptions(fuzzyOptions); suggestionContext.setFuzzyOptions(fuzzyOptions);
suggestionContext.setRegexOptions(regexOptions); suggestionContext.setRegexOptions(regexOptions);
MappedFieldType mappedFieldType = mapperService.fullName(suggestionContext.getField()); MappedFieldType mappedFieldType = mapperService.fullName(suggestionContext.getField());
@ -302,13 +334,14 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
@Override @Override
protected boolean doEquals(CompletionSuggestionBuilder other) { protected boolean doEquals(CompletionSuggestionBuilder other) {
return Objects.equals(fuzzyOptions, other.fuzzyOptions) && return skipDuplicates == other.skipDuplicates &&
Objects.equals(fuzzyOptions, other.fuzzyOptions) &&
Objects.equals(regexOptions, other.regexOptions) && Objects.equals(regexOptions, other.regexOptions) &&
Objects.equals(contextBytes, other.contextBytes); Objects.equals(contextBytes, other.contextBytes);
} }
@Override @Override
protected int doHashCode() { protected int doHashCode() {
return Objects.hash(fuzzyOptions, regexOptions, contextBytes); return Objects.hash(fuzzyOptions, regexOptions, contextBytes, skipDuplicates);
} }
} }

View File

@ -40,6 +40,7 @@ public class CompletionSuggestionContext extends SuggestionSearchContext.Suggest
private CompletionFieldMapper.CompletionFieldType fieldType; private CompletionFieldMapper.CompletionFieldType fieldType;
private FuzzyOptions fuzzyOptions; private FuzzyOptions fuzzyOptions;
private RegexOptions regexOptions; private RegexOptions regexOptions;
private boolean skipDuplicates;
private Map<String, List<ContextMapping.InternalQueryContext>> queryContexts = Collections.emptyMap(); private Map<String, List<ContextMapping.InternalQueryContext>> queryContexts = Collections.emptyMap();
CompletionFieldMapper.CompletionFieldType getFieldType() { CompletionFieldMapper.CompletionFieldType getFieldType() {
@ -62,6 +63,10 @@ public class CompletionSuggestionContext extends SuggestionSearchContext.Suggest
this.queryContexts = queryContexts; this.queryContexts = queryContexts;
} }
void setSkipDuplicates(boolean skipDuplicates) {
this.skipDuplicates = skipDuplicates;
}
public FuzzyOptions getFuzzyOptions() { public FuzzyOptions getFuzzyOptions() {
return fuzzyOptions; return fuzzyOptions;
} }
@ -74,6 +79,10 @@ public class CompletionSuggestionContext extends SuggestionSearchContext.Suggest
return queryContexts; return queryContexts;
} }
public boolean isSkipDuplicates() {
return skipDuplicates;
}
CompletionQuery toQuery() { CompletionQuery toQuery() {
CompletionFieldMapper.CompletionFieldType fieldType = getFieldType(); CompletionFieldMapper.CompletionFieldType fieldType = getFieldType();
final CompletionQuery query; final CompletionQuery query;

View File

@ -72,7 +72,7 @@ public class SearchPhaseControllerTests extends ESTestCase {
public void testSort() throws Exception { public void testSort() throws Exception {
List<CompletionSuggestion> suggestions = new ArrayList<>(); List<CompletionSuggestion> suggestions = new ArrayList<>();
for (int i = 0; i < randomIntBetween(1, 5); i++) { for (int i = 0; i < randomIntBetween(1, 5); i++) {
suggestions.add(new CompletionSuggestion(randomAlphaOfLength(randomIntBetween(1, 5)), randomIntBetween(1, 20))); suggestions.add(new CompletionSuggestion(randomAlphaOfLength(randomIntBetween(1, 5)), randomIntBetween(1, 20), false));
} }
int nShards = randomIntBetween(1, 20); int nShards = randomIntBetween(1, 20);
int queryResultSize = randomBoolean() ? 0 : randomIntBetween(1, nShards * 2); int queryResultSize = randomBoolean() ? 0 : randomIntBetween(1, nShards * 2);
@ -139,7 +139,7 @@ public class SearchPhaseControllerTests extends ESTestCase {
for (int i = 0; i < randomIntBetween(1, 5); i++) { for (int i = 0; i < randomIntBetween(1, 5); i++) {
int size = randomIntBetween(1, 20); int size = randomIntBetween(1, 20);
maxSuggestSize += size; maxSuggestSize += size;
suggestions.add(new CompletionSuggestion(randomAlphaOfLength(randomIntBetween(1, 5)), size)); suggestions.add(new CompletionSuggestion(randomAlphaOfLength(randomIntBetween(1, 5)), size, false));
} }
int nShards = randomIntBetween(1, 20); int nShards = randomIntBetween(1, 20);
int queryResultSize = randomBoolean() ? 0 : randomIntBetween(1, nShards * 2); int queryResultSize = randomBoolean() ? 0 : randomIntBetween(1, nShards * 2);
@ -202,7 +202,7 @@ public class SearchPhaseControllerTests extends ESTestCase {
List<CompletionSuggestion> shardSuggestion = new ArrayList<>(); List<CompletionSuggestion> shardSuggestion = new ArrayList<>();
for (CompletionSuggestion completionSuggestion : suggestions) { for (CompletionSuggestion completionSuggestion : suggestions) {
CompletionSuggestion suggestion = new CompletionSuggestion( CompletionSuggestion suggestion = new CompletionSuggestion(
completionSuggestion.getName(), completionSuggestion.getSize()); completionSuggestion.getName(), completionSuggestion.getSize(), false);
final CompletionSuggestion.Entry completionEntry = new CompletionSuggestion.Entry(new Text(""), 0, 5); final CompletionSuggestion.Entry completionEntry = new CompletionSuggestion.Entry(new Text(""), 0, 5);
suggestion.addTerm(completionEntry); suggestion.addTerm(completionEntry);
int optionSize = randomIntBetween(1, suggestion.getSize()); int optionSize = randomIntBetween(1, suggestion.getSize());

View File

@ -858,6 +858,38 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase {
} }
} }
public void testSkipDuplicates() throws Exception {
final CompletionMappingBuilder mapping = new CompletionMappingBuilder();
createIndexAndMapping(mapping);
int numDocs = randomIntBetween(10, 100);
int numUnique = randomIntBetween(1, numDocs);
List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>();
for (int i = 1; i <= numDocs; i++) {
int id = i % numUnique;
indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i)
.setSource(jsonBuilder()
.startObject()
.startObject(FIELD)
.field("input", "suggestion" + id)
.field("weight", id)
.endObject()
.endObject()
));
}
String[] expected = new String[numUnique];
int sugg = numUnique - 1;
for (int i = 0; i < numUnique; i++) {
expected[i] = "suggestion" + sugg--;
}
indexRandom(true, indexRequestBuilders);
CompletionSuggestionBuilder completionSuggestionBuilder =
SuggestBuilders.completionSuggestion(FIELD).prefix("sugg").skipDuplicates(true).size(numUnique);
SearchResponse searchResponse = client().prepareSearch(INDEX)
.suggest(new SuggestBuilder().addSuggestion("suggestions", completionSuggestionBuilder)).execute().actionGet();
assertSuggestions(searchResponse, true, "suggestions", expected);
}
public void assertSuggestions(String suggestionName, SuggestionBuilder suggestBuilder, String... suggestions) { public void assertSuggestions(String suggestionName, SuggestionBuilder suggestBuilder, String... suggestions) {
SearchResponse searchResponse = client().prepareSearch(INDEX).suggest(new SuggestBuilder().addSuggestion(suggestionName, suggestBuilder)).execute().actionGet(); SearchResponse searchResponse = client().prepareSearch(INDEX).suggest(new SuggestBuilder().addSuggestion(suggestionName, suggestBuilder)).execute().actionGet();
assertSuggestions(searchResponse, suggestionName, suggestions); assertSuggestions(searchResponse, suggestionName, suggestions);
@ -1108,6 +1140,28 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase {
} }
} }
public void testMultiDocSuggestions() throws Exception {
final CompletionMappingBuilder mapping = new CompletionMappingBuilder();
createIndexAndMapping(mapping);
int numDocs = 10;
List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>();
for (int i = 1; i <= numDocs; i++) {
indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i)
.setSource(jsonBuilder()
.startObject()
.startObject(FIELD)
.array("input", "suggestion" + i, "suggestions" + i, "suggester" + i)
.field("weight", i)
.endObject()
.endObject()
));
}
indexRandom(true, indexRequestBuilders);
CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion(FIELD).prefix("sugg");
assertSuggestions("foo", prefix, "suggester10", "suggester9", "suggester8", "suggester7", "suggester6");
}
public static boolean isReservedChar(char c) { public static boolean isReservedChar(char c) {
switch (c) { switch (c) {
case '\u001F': case '\u001F':

View File

@ -639,6 +639,50 @@ public class ContextCompletionSuggestSearchIT extends ESIntegTestCase {
assertEquals("Hotel Amsterdam in Berlin", searchResponse.getSuggest().getSuggestion(suggestionName).iterator().next().getOptions().iterator().next().getText().string()); assertEquals("Hotel Amsterdam in Berlin", searchResponse.getSuggest().getSuggestion(suggestionName).iterator().next().getOptions().iterator().next().getText().string());
} }
public void testSkipDuplicatesWithContexts() throws Exception {
LinkedHashMap<String, ContextMapping> map = new LinkedHashMap<>();
map.put("type", ContextBuilder.category("type").field("type").build());
map.put("cat", ContextBuilder.category("cat").field("cat").build());
final CompletionMappingBuilder mapping = new CompletionMappingBuilder().context(map);
createIndexAndMapping(mapping);
int numDocs = randomIntBetween(10, 100);
int numUnique = randomIntBetween(1, numDocs);
List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>();
for (int i = 0; i < numDocs; i++) {
int id = i % numUnique;
XContentBuilder source = jsonBuilder()
.startObject()
.startObject(FIELD)
.field("input", "suggestion" + id)
.field("weight", id)
.endObject()
.field("cat", "cat" + id % 2)
.field("type", "type" + id)
.endObject();
indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i)
.setSource(source));
}
String[] expected = new String[numUnique];
for (int i = 0; i < numUnique; i++) {
expected[i] = "suggestion" + (numUnique-1-i);
}
indexRandom(true, indexRequestBuilders);
CompletionSuggestionBuilder completionSuggestionBuilder =
SuggestBuilders.completionSuggestion(FIELD).prefix("sugg").skipDuplicates(true).size(numUnique);
assertSuggestions("suggestions", completionSuggestionBuilder, expected);
Map<String, List<? extends ToXContent>> contextMap = new HashMap<>();
contextMap.put("cat", Arrays.asList(CategoryQueryContext.builder().setCategory("cat0").build()));
completionSuggestionBuilder =
SuggestBuilders.completionSuggestion(FIELD).prefix("sugg").contexts(contextMap).skipDuplicates(true).size(numUnique);
String[] expectedModulo = Arrays.stream(expected)
.filter((s) -> Integer.parseInt(s.substring("suggestion".length())) % 2 == 0)
.toArray(String[]::new);
assertSuggestions("suggestions", completionSuggestionBuilder, expectedModulo);
}
public void assertSuggestions(String suggestionName, SuggestionBuilder suggestBuilder, String... suggestions) { public void assertSuggestions(String suggestionName, SuggestionBuilder suggestBuilder, String... suggestions) {
SearchResponse searchResponse = client().prepareSearch(INDEX).suggest( SearchResponse searchResponse = client().prepareSearch(INDEX).suggest(
new SuggestBuilder().addSuggestion(suggestionName, suggestBuilder) new SuggestBuilder().addSuggestion(suggestionName, suggestBuilder)

View File

@ -139,7 +139,7 @@ public class SuggestTests extends ESTestCase {
public void testFilter() throws Exception { public void testFilter() throws Exception {
List<Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>>> suggestions; List<Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>>> suggestions;
CompletionSuggestion completionSuggestion = new CompletionSuggestion(randomAlphaOfLength(10), 2); CompletionSuggestion completionSuggestion = new CompletionSuggestion(randomAlphaOfLength(10), 2, false);
PhraseSuggestion phraseSuggestion = new PhraseSuggestion(randomAlphaOfLength(10), 2); PhraseSuggestion phraseSuggestion = new PhraseSuggestion(randomAlphaOfLength(10), 2);
TermSuggestion termSuggestion = new TermSuggestion(randomAlphaOfLength(10), 2, SortBy.SCORE); TermSuggestion termSuggestion = new TermSuggestion(randomAlphaOfLength(10), 2, SortBy.SCORE);
suggestions = Arrays.asList(completionSuggestion, phraseSuggestion, termSuggestion); suggestions = Arrays.asList(completionSuggestion, phraseSuggestion, termSuggestion);
@ -160,7 +160,7 @@ public class SuggestTests extends ESTestCase {
suggestions = new ArrayList<>(); suggestions = new ArrayList<>();
int n = randomIntBetween(2, 5); int n = randomIntBetween(2, 5);
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
suggestions.add(new CompletionSuggestion(randomAlphaOfLength(10), randomIntBetween(3, 5))); suggestions.add(new CompletionSuggestion(randomAlphaOfLength(10), randomIntBetween(3, 5), false));
} }
Collections.shuffle(suggestions, random()); Collections.shuffle(suggestions, random());
Suggest suggest = new Suggest(suggestions); Suggest suggest = new Suggest(suggestions);

View File

@ -79,7 +79,7 @@ public class SuggestionTests extends ESTestCase {
suggestion = new PhraseSuggestion(name, size); suggestion = new PhraseSuggestion(name, size);
entrySupplier = () -> SuggestionEntryTests.createTestItem(PhraseSuggestion.Entry.class); entrySupplier = () -> SuggestionEntryTests.createTestItem(PhraseSuggestion.Entry.class);
} else if (type == CompletionSuggestion.class) { } else if (type == CompletionSuggestion.class) {
suggestion = new CompletionSuggestion(name, size); suggestion = new CompletionSuggestion(name, size, randomBoolean());
entrySupplier = () -> SuggestionEntryTests.createTestItem(CompletionSuggestion.Entry.class); entrySupplier = () -> SuggestionEntryTests.createTestItem(CompletionSuggestion.Entry.class);
} else { } else {
throw new UnsupportedOperationException("type not supported [" + type + "]"); throw new UnsupportedOperationException("type not supported [" + type + "]");
@ -249,7 +249,7 @@ public class SuggestionTests extends ESTestCase {
CompletionSuggestion.Entry.Option option = new CompletionSuggestion.Entry.Option(1, new Text("someText"), 1.3f, contexts); CompletionSuggestion.Entry.Option option = new CompletionSuggestion.Entry.Option(1, new Text("someText"), 1.3f, contexts);
CompletionSuggestion.Entry entry = new CompletionSuggestion.Entry(new Text("entryText"), 42, 313); CompletionSuggestion.Entry entry = new CompletionSuggestion.Entry(new Text("entryText"), 42, 313);
entry.addOption(option); entry.addOption(option);
CompletionSuggestion suggestion = new CompletionSuggestion("suggestionName", 5); CompletionSuggestion suggestion = new CompletionSuggestion("suggestionName", 5, randomBoolean());
suggestion.addTerm(entry); suggestion.addTerm(entry);
BytesReference xContent = toXContent(suggestion, XContentType.JSON, params, randomBoolean()); BytesReference xContent = toXContent(suggestion, XContentType.JSON, params, randomBoolean());
assertEquals( assertEquals(
@ -265,4 +265,4 @@ public class SuggestionTests extends ESTestCase {
+ "}]}", xContent.utf8ToString()); + "}]}", xContent.utf8ToString());
} }
} }
} }

View File

@ -114,6 +114,7 @@ public class CompletionSuggesterBuilderTests extends AbstractSuggestionBuilderTe
contextMap.put(geoQueryContextName, contexts); contextMap.put(geoQueryContextName, contexts);
} }
testBuilder.contexts(contextMap); testBuilder.contexts(contextMap);
testBuilder.skipDuplicates(randomBoolean());
return testBuilder; return testBuilder;
} }
@ -128,7 +129,7 @@ public class CompletionSuggesterBuilderTests extends AbstractSuggestionBuilderTe
@Override @Override
protected void mutateSpecificParameters(CompletionSuggestionBuilder builder) throws IOException { protected void mutateSpecificParameters(CompletionSuggestionBuilder builder) throws IOException {
switch (randomIntBetween(0, 4)) { switch (randomIntBetween(0, 5)) {
case 0: case 0:
int nCatContext = randomIntBetween(1, 5); int nCatContext = randomIntBetween(1, 5);
List<CategoryQueryContext> contexts = new ArrayList<>(nCatContext); List<CategoryQueryContext> contexts = new ArrayList<>(nCatContext);
@ -154,6 +155,9 @@ public class CompletionSuggesterBuilderTests extends AbstractSuggestionBuilderTe
case 4: case 4:
builder.regex(randomAlphaOfLength(10), RegexOptionsTests.randomRegexOptions()); builder.regex(randomAlphaOfLength(10), RegexOptionsTests.randomRegexOptions());
break; break;
case 5:
builder.skipDuplicates(!builder.skipDuplicates);
break;
default: default:
throw new IllegalStateException("should not through"); throw new IllegalStateException("should not through");
} }
@ -182,5 +186,6 @@ public class CompletionSuggesterBuilderTests extends AbstractSuggestionBuilderTe
assertEquals(parsedContextBytes.get(contextName), queryContexts.get(contextName)); assertEquals(parsedContextBytes.get(contextName), queryContexts.get(contextName));
} }
assertEquals(builder.regexOptions, completionSuggestionCtx.getRegexOptions()); assertEquals(builder.regexOptions, completionSuggestionCtx.getRegexOptions());
assertEquals(builder.skipDuplicates, completionSuggestionCtx.isSkipDuplicates());
} }
} }

View File

@ -24,6 +24,7 @@ import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.ESTestCase;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
@ -38,7 +39,7 @@ public class CompletionSuggestionTests extends ESTestCase {
String name = randomAlphaOfLength(10); String name = randomAlphaOfLength(10);
int size = randomIntBetween(3, 5); int size = randomIntBetween(3, 5);
for (int i = 0; i < nShards; i++) { for (int i = 0; i < nShards; i++) {
CompletionSuggestion suggestion = new CompletionSuggestion(name, size); CompletionSuggestion suggestion = new CompletionSuggestion(name, size, false);
suggestion.addTerm(new CompletionSuggestion.Entry(new Text(""), 0, 0)); suggestion.addTerm(new CompletionSuggestion.Entry(new Text(""), 0, 0));
shardSuggestions.add(suggestion); shardSuggestions.add(suggestion);
} }

View File

@ -277,6 +277,7 @@ The basic completion suggester query supports the following parameters:
`field`:: The name of the field on which to run the query (required). `field`:: The name of the field on which to run the query (required).
`size`:: The number of suggestions to return (defaults to `5`). `size`:: The number of suggestions to return (defaults to `5`).
`skip_duplicates`:: Whether duplicate suggestions should be filtered out (defaults to `false`).
NOTE: The completion suggester considers all documents in the index. NOTE: The completion suggester considers all documents in the index.
See <<suggester-context>> for an explanation of how to query a subset of See <<suggester-context>> for an explanation of how to query a subset of
@ -291,6 +292,33 @@ index completions into a single shard index. In case of high heap usage due to
shard size, it is still recommended to break index into multiple shards instead shard size, it is still recommended to break index into multiple shards instead
of optimizing for completion performance. of optimizing for completion performance.
[[skip_duplicates]]
==== Skip duplicate suggestions
Queries can return duplicate suggestions coming from different documents.
It is possible to modify this behavior by setting `skip_duplicates` to true.
When set, this option filters out documents with duplicate suggestions from the result.
[source,js]
--------------------------------------------------
POST music/_search?pretty
{
"suggest": {
"song-suggest" : {
"prefix" : "nor",
"completion" : {
"field" : "suggest",
"skip_duplicates": true
}
}
}
}
--------------------------------------------------
// CONSOLE
WARNING: when set to true this option can slow down search because more suggestions
need to be visited to find the top N.
[[fuzzy]] [[fuzzy]]
==== Fuzzy queries ==== Fuzzy queries

View File

@ -291,3 +291,42 @@ setup:
- match: { suggest.result.0.options.1._type: "test" } - match: { suggest.result.0.options.1._type: "test" }
- match: { suggest.result.0.options.1._source.title: "title_bar" } - match: { suggest.result.0.options.1._source.title: "title_bar" }
- match: { suggest.result.0.options.1._source.count: 4 } - match: { suggest.result.0.options.1._source.count: 4 }
---
"Skip duplicates should work":
- skip:
version: " - 6.99.99"
reason: skip_duplicates was added in 7.0 (TODO should be backported to 6.1)
- do:
index:
index: test
type: test
id: 1
body:
suggest_1: "bar"
- do:
index:
index: test
type: test
id: 2
body:
suggest_1: "bar"
- do:
indices.refresh: {}
- do:
search:
body:
suggest:
result:
text: "b"
completion:
field: suggest_1
skip_duplicates: true
- length: { suggest.result: 1 }
- length: { suggest.result.0.options: 1 }
- match: { suggest.result.0.options.0.text: "bar" }

View File

@ -276,4 +276,76 @@ setup:
- length: { suggest.result: 1 } - length: { suggest.result: 1 }
- length: { suggest.result.0.options: 1 } - length: { suggest.result.0.options: 1 }
- match: { suggest.result.0.options.0.text: "Marriot in Berlin" } - match: { suggest.result.0.options.0.text: "Marriot in Berlin" }
---
"Skip duplicates with contexts should work":
- skip:
version: " - 6.99.99"
reason: skip_duplicates was added in 7.0 (TODO should be backported to 6.1)
- do:
index:
index: test
type: test
id: 1
body:
suggest_context:
input: "foo"
contexts:
color: "red"
- do:
index:
index: test
type: test
id: 1
body:
suggest_context:
input: "foo"
contexts:
color: "red"
- do:
index:
index: test
type: test
id: 2
body:
suggest_context:
input: "foo"
contexts:
color: "blue"
- do:
indices.refresh: {}
- do:
search:
body:
suggest:
result:
text: "foo"
completion:
field: suggest_context
skip_duplicates: true
contexts:
color: "red"
- length: { suggest.result: 1 }
- length: { suggest.result.0.options: 1 }
- match: { suggest.result.0.options.0.text: "foo" }
- do:
search:
body:
suggest:
result:
text: "foo"
completion:
skip_duplicates: true
field: suggest_context
- length: { suggest.result: 1 }
- length: { suggest.result.0.options: 1 }
- match: { suggest.result.0.options.0.text: "foo" }