Expose duplicate removal in the completion suggester (#26496)

This change exposes the duplicate removal option added in Lucene for the completion suggester
with a new option called `skip_duplicates` (defaults to false).
This commit also adapts the custom suggest collector to handle deduplication when multiple contexts match the input.

Closes #23364
This commit is contained in:
Jim Ferenczi 2017-09-07 17:11:01 +02:00 committed by GitHub
parent abe83c4fac
commit d68d8c9cef
14 changed files with 394 additions and 96 deletions

View File

@ -18,17 +18,16 @@
*/
package org.elasticsearch.search.suggest.completion;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.BulkScorer;
import org.apache.lucene.search.CollectionTerminatedException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.document.CompletionQuery;
import org.apache.lucene.search.suggest.document.TopSuggestDocs;
import org.apache.lucene.search.suggest.document.TopSuggestDocsCollector;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.mapper.CompletionFieldMapper;
import org.elasticsearch.search.suggest.Suggest;
@ -53,12 +52,14 @@ public class CompletionSuggester extends Suggester<CompletionSuggestionContext>
final CompletionSuggestionContext suggestionContext, final IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
if (suggestionContext.getFieldType() != null) {
final CompletionFieldMapper.CompletionFieldType fieldType = suggestionContext.getFieldType();
CompletionSuggestion completionSuggestion = new CompletionSuggestion(name, suggestionContext.getSize());
CompletionSuggestion completionSuggestion =
new CompletionSuggestion(name, suggestionContext.getSize(), suggestionContext.isSkipDuplicates());
spare.copyUTF8Bytes(suggestionContext.getText());
CompletionSuggestion.Entry completionSuggestEntry = new CompletionSuggestion.Entry(
new Text(spare.toString()), 0, spare.length());
completionSuggestion.addTerm(completionSuggestEntry);
TopSuggestDocsCollector collector = new TopDocumentsCollector(suggestionContext.getSize());
TopSuggestDocsCollector collector =
new TopDocumentsCollector(suggestionContext.getSize(), suggestionContext.isSkipDuplicates());
suggest(searcher, suggestionContext.toQuery(), collector);
int numResult = 0;
for (TopSuggestDocs.SuggestScoreDoc suggestScoreDoc : collector.get().scoreLookupDocs()) {
@ -97,8 +98,21 @@ public class CompletionSuggester extends Suggester<CompletionSuggestionContext>
}
}
// TODO: this should be refactored and moved to lucene
// see https://issues.apache.org/jira/browse/LUCENE-6880
/**
* TODO: this should be refactored and moved to lucene see https://issues.apache.org/jira/browse/LUCENE-6880
*
* Custom collector that returns top documents from the completion suggester.
* When suggestions are augmented with contexts values this collector groups suggestions coming from the same document
* but matching different contexts together. Each document is counted as 1 entry and the provided size is the expected number
* of documents that should be returned (not the number of suggestions).
* This collector is also able to filter duplicate suggestion coming from different documents.
* When different contexts match the same suggestion form only the best one (sorted by weight) is kept.
* In order to keep this feature fast, the de-duplication of suggestions with different contexts is done
* only on the top N*num_contexts (where N is the number of documents to return) suggestions per segment.
* This means that skip_duplicates will visit at most N*num_contexts suggestions per segment to find unique suggestions
* that match the input. If more than N*num_contexts suggestions are duplicated with different contexts this collector
* will not be able to return more than one suggestion even when N is greater than 1.
**/
private static final class TopDocumentsCollector extends TopSuggestDocsCollector {
/**
@ -150,93 +164,53 @@ public class CompletionSuggester extends Suggester<CompletionSuggestionContext>
}
}
private static final class SuggestDocPriorityQueue extends PriorityQueue<SuggestDoc> {
private final Map<Integer, SuggestDoc> docsMap;
SuggestDocPriorityQueue(int maxSize) {
super(maxSize);
}
@Override
protected boolean lessThan(SuggestDoc a, SuggestDoc b) {
if (a.score == b.score) {
int cmp = Lookup.CHARSEQUENCE_COMPARATOR.compare(a.key, b.key);
if (cmp == 0) {
// prefer smaller doc id, in case of a tie
return a.doc > b.doc;
} else {
return cmp > 0;
}
}
return a.score < b.score;
}
public SuggestDoc[] getResults() {
int size = size();
SuggestDoc[] res = new SuggestDoc[size];
for (int i = size - 1; i >= 0; i--) {
res[i] = pop();
}
return res;
}
}
private final int num;
private final SuggestDocPriorityQueue pq;
private final Map<Integer, SuggestDoc> scoreDocMap;
// TODO: expose dup removal
TopDocumentsCollector(int num) {
super(1, false); // TODO hack, we don't use the underlying pq, so we allocate a size of 1
this.num = num;
this.scoreDocMap = new LinkedHashMap<>(num);
this.pq = new SuggestDocPriorityQueue(num);
}
@Override
public int getCountToCollect() {
// This is only needed because we initialize
// the base class with 1 instead of the actual num
return num;
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
super.doSetNextReader(context);
updateResults();
}
private void updateResults() {
for (SuggestDoc suggestDoc : scoreDocMap.values()) {
if (pq.insertWithOverflow(suggestDoc) == suggestDoc) {
break;
}
}
scoreDocMap.clear();
TopDocumentsCollector(int num, boolean skipDuplicates) {
super(Math.max(1, num), skipDuplicates);
this.docsMap = new LinkedHashMap<>(num);
}
@Override
public void collect(int docID, CharSequence key, CharSequence context, float score) throws IOException {
if (scoreDocMap.containsKey(docID)) {
SuggestDoc suggestDoc = scoreDocMap.get(docID);
suggestDoc.add(key, context, score);
} else if (scoreDocMap.size() <= num) {
scoreDocMap.put(docID, new SuggestDoc(docBase + docID, key, context, score));
int globalDoc = docID + docBase;
if (docsMap.containsKey(globalDoc)) {
docsMap.get(globalDoc).add(key, context, score);
} else {
throw new CollectionTerminatedException();
docsMap.put(globalDoc, new SuggestDoc(globalDoc, key, context, score));
super.collect(docID, key, context, score);
}
}
@Override
public TopSuggestDocs get() throws IOException {
updateResults(); // to empty the last set of collected suggest docs
TopSuggestDocs.SuggestScoreDoc[] suggestScoreDocs = pq.getResults();
if (suggestScoreDocs.length > 0) {
return new TopSuggestDocs(suggestScoreDocs.length, suggestScoreDocs, suggestScoreDocs[0].score);
} else {
TopSuggestDocs entries = super.get();
if (entries.scoreDocs.length == 0) {
return TopSuggestDocs.EMPTY;
}
// The parent class returns suggestions, not documents, and dedup only the surface form (without contexts).
// The following code groups suggestions matching different contexts by document id and dedup the surface form + contexts
// if needed (skip_duplicates).
int size = entries.scoreDocs.length;
final List<TopSuggestDocs.SuggestScoreDoc> suggestDocs = new ArrayList(size);
final CharArraySet seenSurfaceForms = doSkipDuplicates() ? new CharArraySet(size, false) : null;
for (TopSuggestDocs.SuggestScoreDoc suggestEntry : entries.scoreLookupDocs()) {
final SuggestDoc suggestDoc;
if (docsMap != null) {
suggestDoc = docsMap.get(suggestEntry.doc);
} else {
suggestDoc = new SuggestDoc(suggestEntry.doc, suggestEntry.key, suggestEntry.context, suggestEntry.score);
}
if (doSkipDuplicates()) {
if (seenSurfaceForms.contains(suggestDoc.key)) {
continue;
}
seenSurfaceForms.add(suggestDoc.key);
}
suggestDocs.add(suggestDoc);
}
return new TopSuggestDocs((int) entries.totalHits,
suggestDocs.toArray(new TopSuggestDocs.SuggestScoreDoc[0]), entries.getMaxScore());
}
}
}

View File

@ -18,8 +18,10 @@
*/
package org.elasticsearch.search.suggest.completion;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.suggest.Lookup;
import org.elasticsearch.Version;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
@ -68,11 +70,38 @@ public final class CompletionSuggestion extends Suggest.Suggestion<CompletionSug
public static final int TYPE = 4;
private boolean skipDuplicates;
public CompletionSuggestion() {
}
public CompletionSuggestion(String name, int size) {
/**
* Ctr
* @param name The name for the suggestions
* @param size The number of suggestions to return
* @param skipDuplicates Whether duplicate suggestions should be filtered out
*/
public CompletionSuggestion(String name, int size, boolean skipDuplicates) {
super(name, size);
this.skipDuplicates = skipDuplicates;
}
@Override
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
// TODO should be backported to 6.1.0
if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
skipDuplicates = in.readBoolean();
}
}
@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
// TODO should be backported to 6.1.0
if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
out.writeBoolean(skipDuplicates);
}
}
/**
@ -95,7 +124,7 @@ public final class CompletionSuggestion extends Suggest.Suggestion<CompletionSug
}
public static CompletionSuggestion fromXContent(XContentParser parser, String name) throws IOException {
CompletionSuggestion suggestion = new CompletionSuggestion(name, -1);
CompletionSuggestion suggestion = new CompletionSuggestion(name, -1, false);
parseEntries(parser, suggestion, CompletionSuggestion.Entry::fromXContent);
return suggestion;
}
@ -146,9 +175,19 @@ public final class CompletionSuggestion extends Suggest.Suggestion<CompletionSug
// the global top <code>size</code> entries are collected from the shard results
// using a priority queue
OptionPriorityQueue priorityQueue = new OptionPriorityQueue(leader.getSize(), COMPARATOR);
// Dedup duplicate suggestions (based on the surface form) if skip duplicates is activated
final CharArraySet seenSurfaceForms = leader.skipDuplicates ? new CharArraySet(leader.getSize(), false) : null;
for (Suggest.Suggestion<Entry> suggestion : toReduce) {
assert suggestion.getName().equals(name) : "name should be identical across all suggestions";
for (Entry.Option option : ((CompletionSuggestion) suggestion).getOptions()) {
if (leader.skipDuplicates) {
assert ((CompletionSuggestion) suggestion).skipDuplicates;
String text = option.getText().string();
if (seenSurfaceForms.contains(text)) {
continue;
}
seenSurfaceForms.add(text);
}
if (option == priorityQueue.insertWithOverflow(option)) {
// if the current option has overflown from pq,
// we can assume all of the successive options
@ -157,7 +196,7 @@ public final class CompletionSuggestion extends Suggest.Suggestion<CompletionSug
}
}
}
final CompletionSuggestion suggestion = new CompletionSuggestion(leader.getName(), leader.getSize());
final CompletionSuggestion suggestion = new CompletionSuggestion(leader.getName(), leader.getSize(), leader.skipDuplicates);
final Entry entry = new Entry(leaderEntry.getText(), leaderEntry.getOffset(), leaderEntry.getLength());
Collections.addAll(entry.getOptions(), priorityQueue.get());
suggestion.addTerm(entry);

View File

@ -19,6 +19,7 @@
package org.elasticsearch.search.suggest.completion;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.Version;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.StreamInput;
@ -57,6 +58,7 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
private static final XContentType CONTEXT_BYTES_XCONTENT_TYPE = XContentType.JSON;
static final String SUGGESTION_NAME = "completion";
static final ParseField CONTEXTS_FIELD = new ParseField("contexts", "context");
static final ParseField SKIP_DUPLICATES_FIELD = new ParseField("skip_duplicates");
/**
* {
@ -94,11 +96,13 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
v.contextBytes = builder.bytes();
p.skipChildren();
}, CONTEXTS_FIELD, ObjectParser.ValueType.OBJECT); // context is deprecated
PARSER.declareBoolean(CompletionSuggestionBuilder::skipDuplicates, SKIP_DUPLICATES_FIELD);
}
protected FuzzyOptions fuzzyOptions;
protected RegexOptions regexOptions;
protected BytesReference contextBytes = null;
protected boolean skipDuplicates = false;
public CompletionSuggestionBuilder(String field) {
super(field);
@ -113,6 +117,7 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
fuzzyOptions = in.fuzzyOptions;
regexOptions = in.regexOptions;
contextBytes = in.contextBytes;
skipDuplicates = in.skipDuplicates;
}
/**
@ -123,6 +128,10 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
fuzzyOptions = in.readOptionalWriteable(FuzzyOptions::new);
regexOptions = in.readOptionalWriteable(RegexOptions::new);
contextBytes = in.readOptionalBytesReference();
// TODO should be backported to 6.1.0
if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
skipDuplicates = in.readBoolean();
}
}
@Override
@ -130,6 +139,10 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
out.writeOptionalWriteable(fuzzyOptions);
out.writeOptionalWriteable(regexOptions);
out.writeOptionalBytesReference(contextBytes);
// TODO should be backported to 6.1.0
if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
out.writeBoolean(skipDuplicates);
}
}
/**
@ -210,6 +223,21 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
return this;
}
/**
* Returns whether duplicate suggestions should be filtered out.
*/
public boolean skipDuplicates() {
return skipDuplicates;
}
/**
* Should duplicates be filtered or not. Defaults to <tt>false</tt>.
*/
public CompletionSuggestionBuilder skipDuplicates(boolean skipDuplicates) {
this.skipDuplicates = skipDuplicates;
return this;
}
private static class InnerBuilder extends CompletionSuggestionBuilder {
private String field;
@ -231,6 +259,9 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
if (regexOptions != null) {
regexOptions.toXContent(builder, params);
}
if (skipDuplicates) {
builder.field(SKIP_DUPLICATES_FIELD.getPreferredName(), skipDuplicates);
}
if (contextBytes != null) {
builder.rawField(CONTEXTS_FIELD.getPreferredName(), contextBytes);
}
@ -255,6 +286,7 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
// copy over common settings to each suggestion builder
final MapperService mapperService = context.getMapperService();
populateCommonFields(mapperService, suggestionContext);
suggestionContext.setSkipDuplicates(skipDuplicates);
suggestionContext.setFuzzyOptions(fuzzyOptions);
suggestionContext.setRegexOptions(regexOptions);
MappedFieldType mappedFieldType = mapperService.fullName(suggestionContext.getField());
@ -302,13 +334,14 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
@Override
protected boolean doEquals(CompletionSuggestionBuilder other) {
return Objects.equals(fuzzyOptions, other.fuzzyOptions) &&
return skipDuplicates == other.skipDuplicates &&
Objects.equals(fuzzyOptions, other.fuzzyOptions) &&
Objects.equals(regexOptions, other.regexOptions) &&
Objects.equals(contextBytes, other.contextBytes);
}
@Override
protected int doHashCode() {
return Objects.hash(fuzzyOptions, regexOptions, contextBytes);
return Objects.hash(fuzzyOptions, regexOptions, contextBytes, skipDuplicates);
}
}

View File

@ -40,6 +40,7 @@ public class CompletionSuggestionContext extends SuggestionSearchContext.Suggest
private CompletionFieldMapper.CompletionFieldType fieldType;
private FuzzyOptions fuzzyOptions;
private RegexOptions regexOptions;
private boolean skipDuplicates;
private Map<String, List<ContextMapping.InternalQueryContext>> queryContexts = Collections.emptyMap();
CompletionFieldMapper.CompletionFieldType getFieldType() {
@ -62,6 +63,10 @@ public class CompletionSuggestionContext extends SuggestionSearchContext.Suggest
this.queryContexts = queryContexts;
}
void setSkipDuplicates(boolean skipDuplicates) {
this.skipDuplicates = skipDuplicates;
}
public FuzzyOptions getFuzzyOptions() {
return fuzzyOptions;
}
@ -74,6 +79,10 @@ public class CompletionSuggestionContext extends SuggestionSearchContext.Suggest
return queryContexts;
}
public boolean isSkipDuplicates() {
return skipDuplicates;
}
CompletionQuery toQuery() {
CompletionFieldMapper.CompletionFieldType fieldType = getFieldType();
final CompletionQuery query;

View File

@ -72,7 +72,7 @@ public class SearchPhaseControllerTests extends ESTestCase {
public void testSort() throws Exception {
List<CompletionSuggestion> suggestions = new ArrayList<>();
for (int i = 0; i < randomIntBetween(1, 5); i++) {
suggestions.add(new CompletionSuggestion(randomAlphaOfLength(randomIntBetween(1, 5)), randomIntBetween(1, 20)));
suggestions.add(new CompletionSuggestion(randomAlphaOfLength(randomIntBetween(1, 5)), randomIntBetween(1, 20), false));
}
int nShards = randomIntBetween(1, 20);
int queryResultSize = randomBoolean() ? 0 : randomIntBetween(1, nShards * 2);
@ -139,7 +139,7 @@ public class SearchPhaseControllerTests extends ESTestCase {
for (int i = 0; i < randomIntBetween(1, 5); i++) {
int size = randomIntBetween(1, 20);
maxSuggestSize += size;
suggestions.add(new CompletionSuggestion(randomAlphaOfLength(randomIntBetween(1, 5)), size));
suggestions.add(new CompletionSuggestion(randomAlphaOfLength(randomIntBetween(1, 5)), size, false));
}
int nShards = randomIntBetween(1, 20);
int queryResultSize = randomBoolean() ? 0 : randomIntBetween(1, nShards * 2);
@ -202,7 +202,7 @@ public class SearchPhaseControllerTests extends ESTestCase {
List<CompletionSuggestion> shardSuggestion = new ArrayList<>();
for (CompletionSuggestion completionSuggestion : suggestions) {
CompletionSuggestion suggestion = new CompletionSuggestion(
completionSuggestion.getName(), completionSuggestion.getSize());
completionSuggestion.getName(), completionSuggestion.getSize(), false);
final CompletionSuggestion.Entry completionEntry = new CompletionSuggestion.Entry(new Text(""), 0, 5);
suggestion.addTerm(completionEntry);
int optionSize = randomIntBetween(1, suggestion.getSize());

View File

@ -858,6 +858,38 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase {
}
}
public void testSkipDuplicates() throws Exception {
final CompletionMappingBuilder mapping = new CompletionMappingBuilder();
createIndexAndMapping(mapping);
int numDocs = randomIntBetween(10, 100);
int numUnique = randomIntBetween(1, numDocs);
List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>();
for (int i = 1; i <= numDocs; i++) {
int id = i % numUnique;
indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i)
.setSource(jsonBuilder()
.startObject()
.startObject(FIELD)
.field("input", "suggestion" + id)
.field("weight", id)
.endObject()
.endObject()
));
}
String[] expected = new String[numUnique];
int sugg = numUnique - 1;
for (int i = 0; i < numUnique; i++) {
expected[i] = "suggestion" + sugg--;
}
indexRandom(true, indexRequestBuilders);
CompletionSuggestionBuilder completionSuggestionBuilder =
SuggestBuilders.completionSuggestion(FIELD).prefix("sugg").skipDuplicates(true).size(numUnique);
SearchResponse searchResponse = client().prepareSearch(INDEX)
.suggest(new SuggestBuilder().addSuggestion("suggestions", completionSuggestionBuilder)).execute().actionGet();
assertSuggestions(searchResponse, true, "suggestions", expected);
}
public void assertSuggestions(String suggestionName, SuggestionBuilder suggestBuilder, String... suggestions) {
SearchResponse searchResponse = client().prepareSearch(INDEX).suggest(new SuggestBuilder().addSuggestion(suggestionName, suggestBuilder)).execute().actionGet();
assertSuggestions(searchResponse, suggestionName, suggestions);
@ -1108,6 +1140,28 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase {
}
}
public void testMultiDocSuggestions() throws Exception {
final CompletionMappingBuilder mapping = new CompletionMappingBuilder();
createIndexAndMapping(mapping);
int numDocs = 10;
List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>();
for (int i = 1; i <= numDocs; i++) {
indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i)
.setSource(jsonBuilder()
.startObject()
.startObject(FIELD)
.array("input", "suggestion" + i, "suggestions" + i, "suggester" + i)
.field("weight", i)
.endObject()
.endObject()
));
}
indexRandom(true, indexRequestBuilders);
CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion(FIELD).prefix("sugg");
assertSuggestions("foo", prefix, "suggester10", "suggester9", "suggester8", "suggester7", "suggester6");
}
public static boolean isReservedChar(char c) {
switch (c) {
case '\u001F':

View File

@ -639,6 +639,50 @@ public class ContextCompletionSuggestSearchIT extends ESIntegTestCase {
assertEquals("Hotel Amsterdam in Berlin", searchResponse.getSuggest().getSuggestion(suggestionName).iterator().next().getOptions().iterator().next().getText().string());
}
public void testSkipDuplicatesWithContexts() throws Exception {
LinkedHashMap<String, ContextMapping> map = new LinkedHashMap<>();
map.put("type", ContextBuilder.category("type").field("type").build());
map.put("cat", ContextBuilder.category("cat").field("cat").build());
final CompletionMappingBuilder mapping = new CompletionMappingBuilder().context(map);
createIndexAndMapping(mapping);
int numDocs = randomIntBetween(10, 100);
int numUnique = randomIntBetween(1, numDocs);
List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>();
for (int i = 0; i < numDocs; i++) {
int id = i % numUnique;
XContentBuilder source = jsonBuilder()
.startObject()
.startObject(FIELD)
.field("input", "suggestion" + id)
.field("weight", id)
.endObject()
.field("cat", "cat" + id % 2)
.field("type", "type" + id)
.endObject();
indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i)
.setSource(source));
}
String[] expected = new String[numUnique];
for (int i = 0; i < numUnique; i++) {
expected[i] = "suggestion" + (numUnique-1-i);
}
indexRandom(true, indexRequestBuilders);
CompletionSuggestionBuilder completionSuggestionBuilder =
SuggestBuilders.completionSuggestion(FIELD).prefix("sugg").skipDuplicates(true).size(numUnique);
assertSuggestions("suggestions", completionSuggestionBuilder, expected);
Map<String, List<? extends ToXContent>> contextMap = new HashMap<>();
contextMap.put("cat", Arrays.asList(CategoryQueryContext.builder().setCategory("cat0").build()));
completionSuggestionBuilder =
SuggestBuilders.completionSuggestion(FIELD).prefix("sugg").contexts(contextMap).skipDuplicates(true).size(numUnique);
String[] expectedModulo = Arrays.stream(expected)
.filter((s) -> Integer.parseInt(s.substring("suggestion".length())) % 2 == 0)
.toArray(String[]::new);
assertSuggestions("suggestions", completionSuggestionBuilder, expectedModulo);
}
public void assertSuggestions(String suggestionName, SuggestionBuilder suggestBuilder, String... suggestions) {
SearchResponse searchResponse = client().prepareSearch(INDEX).suggest(
new SuggestBuilder().addSuggestion(suggestionName, suggestBuilder)

View File

@ -139,7 +139,7 @@ public class SuggestTests extends ESTestCase {
public void testFilter() throws Exception {
List<Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>>> suggestions;
CompletionSuggestion completionSuggestion = new CompletionSuggestion(randomAlphaOfLength(10), 2);
CompletionSuggestion completionSuggestion = new CompletionSuggestion(randomAlphaOfLength(10), 2, false);
PhraseSuggestion phraseSuggestion = new PhraseSuggestion(randomAlphaOfLength(10), 2);
TermSuggestion termSuggestion = new TermSuggestion(randomAlphaOfLength(10), 2, SortBy.SCORE);
suggestions = Arrays.asList(completionSuggestion, phraseSuggestion, termSuggestion);
@ -160,7 +160,7 @@ public class SuggestTests extends ESTestCase {
suggestions = new ArrayList<>();
int n = randomIntBetween(2, 5);
for (int i = 0; i < n; i++) {
suggestions.add(new CompletionSuggestion(randomAlphaOfLength(10), randomIntBetween(3, 5)));
suggestions.add(new CompletionSuggestion(randomAlphaOfLength(10), randomIntBetween(3, 5), false));
}
Collections.shuffle(suggestions, random());
Suggest suggest = new Suggest(suggestions);

View File

@ -79,7 +79,7 @@ public class SuggestionTests extends ESTestCase {
suggestion = new PhraseSuggestion(name, size);
entrySupplier = () -> SuggestionEntryTests.createTestItem(PhraseSuggestion.Entry.class);
} else if (type == CompletionSuggestion.class) {
suggestion = new CompletionSuggestion(name, size);
suggestion = new CompletionSuggestion(name, size, randomBoolean());
entrySupplier = () -> SuggestionEntryTests.createTestItem(CompletionSuggestion.Entry.class);
} else {
throw new UnsupportedOperationException("type not supported [" + type + "]");
@ -249,7 +249,7 @@ public class SuggestionTests extends ESTestCase {
CompletionSuggestion.Entry.Option option = new CompletionSuggestion.Entry.Option(1, new Text("someText"), 1.3f, contexts);
CompletionSuggestion.Entry entry = new CompletionSuggestion.Entry(new Text("entryText"), 42, 313);
entry.addOption(option);
CompletionSuggestion suggestion = new CompletionSuggestion("suggestionName", 5);
CompletionSuggestion suggestion = new CompletionSuggestion("suggestionName", 5, randomBoolean());
suggestion.addTerm(entry);
BytesReference xContent = toXContent(suggestion, XContentType.JSON, params, randomBoolean());
assertEquals(
@ -265,4 +265,4 @@ public class SuggestionTests extends ESTestCase {
+ "}]}", xContent.utf8ToString());
}
}
}
}

View File

@ -114,6 +114,7 @@ public class CompletionSuggesterBuilderTests extends AbstractSuggestionBuilderTe
contextMap.put(geoQueryContextName, contexts);
}
testBuilder.contexts(contextMap);
testBuilder.skipDuplicates(randomBoolean());
return testBuilder;
}
@ -128,7 +129,7 @@ public class CompletionSuggesterBuilderTests extends AbstractSuggestionBuilderTe
@Override
protected void mutateSpecificParameters(CompletionSuggestionBuilder builder) throws IOException {
switch (randomIntBetween(0, 4)) {
switch (randomIntBetween(0, 5)) {
case 0:
int nCatContext = randomIntBetween(1, 5);
List<CategoryQueryContext> contexts = new ArrayList<>(nCatContext);
@ -154,6 +155,9 @@ public class CompletionSuggesterBuilderTests extends AbstractSuggestionBuilderTe
case 4:
builder.regex(randomAlphaOfLength(10), RegexOptionsTests.randomRegexOptions());
break;
case 5:
builder.skipDuplicates(!builder.skipDuplicates);
break;
default:
throw new IllegalStateException("should not through");
}
@ -182,5 +186,6 @@ public class CompletionSuggesterBuilderTests extends AbstractSuggestionBuilderTe
assertEquals(parsedContextBytes.get(contextName), queryContexts.get(contextName));
}
assertEquals(builder.regexOptions, completionSuggestionCtx.getRegexOptions());
assertEquals(builder.skipDuplicates, completionSuggestionCtx.isSkipDuplicates());
}
}

View File

@ -24,6 +24,7 @@ import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.test.ESTestCase;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
@ -38,7 +39,7 @@ public class CompletionSuggestionTests extends ESTestCase {
String name = randomAlphaOfLength(10);
int size = randomIntBetween(3, 5);
for (int i = 0; i < nShards; i++) {
CompletionSuggestion suggestion = new CompletionSuggestion(name, size);
CompletionSuggestion suggestion = new CompletionSuggestion(name, size, false);
suggestion.addTerm(new CompletionSuggestion.Entry(new Text(""), 0, 0));
shardSuggestions.add(suggestion);
}

View File

@ -277,6 +277,7 @@ The basic completion suggester query supports the following parameters:
`field`:: The name of the field on which to run the query (required).
`size`:: The number of suggestions to return (defaults to `5`).
`skip_duplicates`:: Whether duplicate suggestions should be filtered out (defaults to `false`).
NOTE: The completion suggester considers all documents in the index.
See <<suggester-context>> for an explanation of how to query a subset of
@ -291,6 +292,33 @@ index completions into a single shard index. In case of high heap usage due to
shard size, it is still recommended to break index into multiple shards instead
of optimizing for completion performance.
[[skip_duplicates]]
==== Skip duplicate suggestions
Queries can return duplicate suggestions coming from different documents.
It is possible to modify this behavior by setting `skip_duplicates` to true.
When set, this option filters out documents with duplicate suggestions from the result.
[source,js]
--------------------------------------------------
POST music/_search?pretty
{
"suggest": {
"song-suggest" : {
"prefix" : "nor",
"completion" : {
"field" : "suggest",
"skip_duplicates": true
}
}
}
}
--------------------------------------------------
// CONSOLE
WARNING: when set to true this option can slow down search because more suggestions
need to be visited to find the top N.
[[fuzzy]]
==== Fuzzy queries

View File

@ -291,3 +291,42 @@ setup:
- match: { suggest.result.0.options.1._type: "test" }
- match: { suggest.result.0.options.1._source.title: "title_bar" }
- match: { suggest.result.0.options.1._source.count: 4 }
---
"Skip duplicates should work":
- skip:
version: " - 6.99.99"
reason: skip_duplicates was added in 7.0 (TODO should be backported to 6.1)
- do:
index:
index: test
type: test
id: 1
body:
suggest_1: "bar"
- do:
index:
index: test
type: test
id: 2
body:
suggest_1: "bar"
- do:
indices.refresh: {}
- do:
search:
body:
suggest:
result:
text: "b"
completion:
field: suggest_1
skip_duplicates: true
- length: { suggest.result: 1 }
- length: { suggest.result.0.options: 1 }
- match: { suggest.result.0.options.0.text: "bar" }

View File

@ -276,4 +276,76 @@ setup:
- length: { suggest.result: 1 }
- length: { suggest.result.0.options: 1 }
- match: { suggest.result.0.options.0.text: "Marriot in Berlin" }
- match: { suggest.result.0.options.0.text: "Marriot in Berlin" }
---
"Skip duplicates with contexts should work":
- skip:
version: " - 6.99.99"
reason: skip_duplicates was added in 7.0 (TODO should be backported to 6.1)
- do:
index:
index: test
type: test
id: 1
body:
suggest_context:
input: "foo"
contexts:
color: "red"
- do:
index:
index: test
type: test
id: 1
body:
suggest_context:
input: "foo"
contexts:
color: "red"
- do:
index:
index: test
type: test
id: 2
body:
suggest_context:
input: "foo"
contexts:
color: "blue"
- do:
indices.refresh: {}
- do:
search:
body:
suggest:
result:
text: "foo"
completion:
field: suggest_context
skip_duplicates: true
contexts:
color: "red"
- length: { suggest.result: 1 }
- length: { suggest.result.0.options: 1 }
- match: { suggest.result.0.options.0.text: "foo" }
- do:
search:
body:
suggest:
result:
text: "foo"
completion:
skip_duplicates: true
field: suggest_context
- length: { suggest.result: 1 }
- length: { suggest.result.0.options: 1 }
- match: { suggest.result.0.options.0.text: "foo" }