mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-27 10:28:28 +00:00
Expose duplicate removal in the completion suggester (#26496)
This change exposes the duplicate removal option added in Lucene for the completion suggester with a new option called `skip_duplicates` (defaults to false). This commit also adapts the custom suggest collector to handle deduplication when multiple contexts match the input. Closes #23364
This commit is contained in:
parent
abe83c4fac
commit
d68d8c9cef
@ -18,17 +18,16 @@
|
||||
*/
|
||||
package org.elasticsearch.search.suggest.completion;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.BulkScorer;
|
||||
import org.apache.lucene.search.CollectionTerminatedException;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.search.suggest.document.CompletionQuery;
|
||||
import org.apache.lucene.search.suggest.document.TopSuggestDocs;
|
||||
import org.apache.lucene.search.suggest.document.TopSuggestDocsCollector;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.elasticsearch.common.text.Text;
|
||||
import org.elasticsearch.index.mapper.CompletionFieldMapper;
|
||||
import org.elasticsearch.search.suggest.Suggest;
|
||||
@ -53,12 +52,14 @@ public class CompletionSuggester extends Suggester<CompletionSuggestionContext>
|
||||
final CompletionSuggestionContext suggestionContext, final IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
|
||||
if (suggestionContext.getFieldType() != null) {
|
||||
final CompletionFieldMapper.CompletionFieldType fieldType = suggestionContext.getFieldType();
|
||||
CompletionSuggestion completionSuggestion = new CompletionSuggestion(name, suggestionContext.getSize());
|
||||
CompletionSuggestion completionSuggestion =
|
||||
new CompletionSuggestion(name, suggestionContext.getSize(), suggestionContext.isSkipDuplicates());
|
||||
spare.copyUTF8Bytes(suggestionContext.getText());
|
||||
CompletionSuggestion.Entry completionSuggestEntry = new CompletionSuggestion.Entry(
|
||||
new Text(spare.toString()), 0, spare.length());
|
||||
completionSuggestion.addTerm(completionSuggestEntry);
|
||||
TopSuggestDocsCollector collector = new TopDocumentsCollector(suggestionContext.getSize());
|
||||
TopSuggestDocsCollector collector =
|
||||
new TopDocumentsCollector(suggestionContext.getSize(), suggestionContext.isSkipDuplicates());
|
||||
suggest(searcher, suggestionContext.toQuery(), collector);
|
||||
int numResult = 0;
|
||||
for (TopSuggestDocs.SuggestScoreDoc suggestScoreDoc : collector.get().scoreLookupDocs()) {
|
||||
@ -97,8 +98,21 @@ public class CompletionSuggester extends Suggester<CompletionSuggestionContext>
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: this should be refactored and moved to lucene
|
||||
// see https://issues.apache.org/jira/browse/LUCENE-6880
|
||||
/**
|
||||
* TODO: this should be refactored and moved to lucene see https://issues.apache.org/jira/browse/LUCENE-6880
|
||||
*
|
||||
* Custom collector that returns top documents from the completion suggester.
|
||||
* When suggestions are augmented with contexts values this collector groups suggestions coming from the same document
|
||||
* but matching different contexts together. Each document is counted as 1 entry and the provided size is the expected number
|
||||
* of documents that should be returned (not the number of suggestions).
|
||||
* This collector is also able to filter duplicate suggestion coming from different documents.
|
||||
* When different contexts match the same suggestion form only the best one (sorted by weight) is kept.
|
||||
* In order to keep this feature fast, the de-duplication of suggestions with different contexts is done
|
||||
* only on the top N*num_contexts (where N is the number of documents to return) suggestions per segment.
|
||||
* This means that skip_duplicates will visit at most N*num_contexts suggestions per segment to find unique suggestions
|
||||
* that match the input. If more than N*num_contexts suggestions are duplicated with different contexts this collector
|
||||
* will not be able to return more than one suggestion even when N is greater than 1.
|
||||
**/
|
||||
private static final class TopDocumentsCollector extends TopSuggestDocsCollector {
|
||||
|
||||
/**
|
||||
@ -150,93 +164,53 @@ public class CompletionSuggester extends Suggester<CompletionSuggestionContext>
|
||||
}
|
||||
}
|
||||
|
||||
private static final class SuggestDocPriorityQueue extends PriorityQueue<SuggestDoc> {
|
||||
private final Map<Integer, SuggestDoc> docsMap;
|
||||
|
||||
SuggestDocPriorityQueue(int maxSize) {
|
||||
super(maxSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean lessThan(SuggestDoc a, SuggestDoc b) {
|
||||
if (a.score == b.score) {
|
||||
int cmp = Lookup.CHARSEQUENCE_COMPARATOR.compare(a.key, b.key);
|
||||
if (cmp == 0) {
|
||||
// prefer smaller doc id, in case of a tie
|
||||
return a.doc > b.doc;
|
||||
} else {
|
||||
return cmp > 0;
|
||||
}
|
||||
}
|
||||
return a.score < b.score;
|
||||
}
|
||||
|
||||
public SuggestDoc[] getResults() {
|
||||
int size = size();
|
||||
SuggestDoc[] res = new SuggestDoc[size];
|
||||
for (int i = size - 1; i >= 0; i--) {
|
||||
res[i] = pop();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
private final int num;
|
||||
private final SuggestDocPriorityQueue pq;
|
||||
private final Map<Integer, SuggestDoc> scoreDocMap;
|
||||
|
||||
// TODO: expose dup removal
|
||||
|
||||
TopDocumentsCollector(int num) {
|
||||
super(1, false); // TODO hack, we don't use the underlying pq, so we allocate a size of 1
|
||||
this.num = num;
|
||||
this.scoreDocMap = new LinkedHashMap<>(num);
|
||||
this.pq = new SuggestDocPriorityQueue(num);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getCountToCollect() {
|
||||
// This is only needed because we initialize
|
||||
// the base class with 1 instead of the actual num
|
||||
return num;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||
super.doSetNextReader(context);
|
||||
updateResults();
|
||||
}
|
||||
|
||||
private void updateResults() {
|
||||
for (SuggestDoc suggestDoc : scoreDocMap.values()) {
|
||||
if (pq.insertWithOverflow(suggestDoc) == suggestDoc) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
scoreDocMap.clear();
|
||||
TopDocumentsCollector(int num, boolean skipDuplicates) {
|
||||
super(Math.max(1, num), skipDuplicates);
|
||||
this.docsMap = new LinkedHashMap<>(num);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int docID, CharSequence key, CharSequence context, float score) throws IOException {
|
||||
if (scoreDocMap.containsKey(docID)) {
|
||||
SuggestDoc suggestDoc = scoreDocMap.get(docID);
|
||||
suggestDoc.add(key, context, score);
|
||||
} else if (scoreDocMap.size() <= num) {
|
||||
scoreDocMap.put(docID, new SuggestDoc(docBase + docID, key, context, score));
|
||||
int globalDoc = docID + docBase;
|
||||
if (docsMap.containsKey(globalDoc)) {
|
||||
docsMap.get(globalDoc).add(key, context, score);
|
||||
} else {
|
||||
throw new CollectionTerminatedException();
|
||||
docsMap.put(globalDoc, new SuggestDoc(globalDoc, key, context, score));
|
||||
super.collect(docID, key, context, score);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TopSuggestDocs get() throws IOException {
|
||||
updateResults(); // to empty the last set of collected suggest docs
|
||||
TopSuggestDocs.SuggestScoreDoc[] suggestScoreDocs = pq.getResults();
|
||||
if (suggestScoreDocs.length > 0) {
|
||||
return new TopSuggestDocs(suggestScoreDocs.length, suggestScoreDocs, suggestScoreDocs[0].score);
|
||||
} else {
|
||||
TopSuggestDocs entries = super.get();
|
||||
if (entries.scoreDocs.length == 0) {
|
||||
return TopSuggestDocs.EMPTY;
|
||||
}
|
||||
// The parent class returns suggestions, not documents, and dedup only the surface form (without contexts).
|
||||
// The following code groups suggestions matching different contexts by document id and dedup the surface form + contexts
|
||||
// if needed (skip_duplicates).
|
||||
int size = entries.scoreDocs.length;
|
||||
final List<TopSuggestDocs.SuggestScoreDoc> suggestDocs = new ArrayList(size);
|
||||
final CharArraySet seenSurfaceForms = doSkipDuplicates() ? new CharArraySet(size, false) : null;
|
||||
for (TopSuggestDocs.SuggestScoreDoc suggestEntry : entries.scoreLookupDocs()) {
|
||||
final SuggestDoc suggestDoc;
|
||||
if (docsMap != null) {
|
||||
suggestDoc = docsMap.get(suggestEntry.doc);
|
||||
} else {
|
||||
suggestDoc = new SuggestDoc(suggestEntry.doc, suggestEntry.key, suggestEntry.context, suggestEntry.score);
|
||||
}
|
||||
if (doSkipDuplicates()) {
|
||||
if (seenSurfaceForms.contains(suggestDoc.key)) {
|
||||
continue;
|
||||
}
|
||||
seenSurfaceForms.add(suggestDoc.key);
|
||||
}
|
||||
suggestDocs.add(suggestDoc);
|
||||
}
|
||||
return new TopSuggestDocs((int) entries.totalHits,
|
||||
suggestDocs.toArray(new TopSuggestDocs.SuggestScoreDoc[0]), entries.getMaxScore());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -18,8 +18,10 @@
|
||||
*/
|
||||
package org.elasticsearch.search.suggest.completion;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
@ -68,11 +70,38 @@ public final class CompletionSuggestion extends Suggest.Suggestion<CompletionSug
|
||||
|
||||
public static final int TYPE = 4;
|
||||
|
||||
private boolean skipDuplicates;
|
||||
|
||||
public CompletionSuggestion() {
|
||||
}
|
||||
|
||||
public CompletionSuggestion(String name, int size) {
|
||||
/**
|
||||
* Ctr
|
||||
* @param name The name for the suggestions
|
||||
* @param size The number of suggestions to return
|
||||
* @param skipDuplicates Whether duplicate suggestions should be filtered out
|
||||
*/
|
||||
public CompletionSuggestion(String name, int size, boolean skipDuplicates) {
|
||||
super(name, size);
|
||||
this.skipDuplicates = skipDuplicates;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFrom(StreamInput in) throws IOException {
|
||||
super.readFrom(in);
|
||||
// TODO should be backported to 6.1.0
|
||||
if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
|
||||
skipDuplicates = in.readBoolean();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
super.writeTo(out);
|
||||
// TODO should be backported to 6.1.0
|
||||
if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
|
||||
out.writeBoolean(skipDuplicates);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -95,7 +124,7 @@ public final class CompletionSuggestion extends Suggest.Suggestion<CompletionSug
|
||||
}
|
||||
|
||||
public static CompletionSuggestion fromXContent(XContentParser parser, String name) throws IOException {
|
||||
CompletionSuggestion suggestion = new CompletionSuggestion(name, -1);
|
||||
CompletionSuggestion suggestion = new CompletionSuggestion(name, -1, false);
|
||||
parseEntries(parser, suggestion, CompletionSuggestion.Entry::fromXContent);
|
||||
return suggestion;
|
||||
}
|
||||
@ -146,9 +175,19 @@ public final class CompletionSuggestion extends Suggest.Suggestion<CompletionSug
|
||||
// the global top <code>size</code> entries are collected from the shard results
|
||||
// using a priority queue
|
||||
OptionPriorityQueue priorityQueue = new OptionPriorityQueue(leader.getSize(), COMPARATOR);
|
||||
// Dedup duplicate suggestions (based on the surface form) if skip duplicates is activated
|
||||
final CharArraySet seenSurfaceForms = leader.skipDuplicates ? new CharArraySet(leader.getSize(), false) : null;
|
||||
for (Suggest.Suggestion<Entry> suggestion : toReduce) {
|
||||
assert suggestion.getName().equals(name) : "name should be identical across all suggestions";
|
||||
for (Entry.Option option : ((CompletionSuggestion) suggestion).getOptions()) {
|
||||
if (leader.skipDuplicates) {
|
||||
assert ((CompletionSuggestion) suggestion).skipDuplicates;
|
||||
String text = option.getText().string();
|
||||
if (seenSurfaceForms.contains(text)) {
|
||||
continue;
|
||||
}
|
||||
seenSurfaceForms.add(text);
|
||||
}
|
||||
if (option == priorityQueue.insertWithOverflow(option)) {
|
||||
// if the current option has overflown from pq,
|
||||
// we can assume all of the successive options
|
||||
@ -157,7 +196,7 @@ public final class CompletionSuggestion extends Suggest.Suggestion<CompletionSug
|
||||
}
|
||||
}
|
||||
}
|
||||
final CompletionSuggestion suggestion = new CompletionSuggestion(leader.getName(), leader.getSize());
|
||||
final CompletionSuggestion suggestion = new CompletionSuggestion(leader.getName(), leader.getSize(), leader.skipDuplicates);
|
||||
final Entry entry = new Entry(leaderEntry.getText(), leaderEntry.getOffset(), leaderEntry.getLength());
|
||||
Collections.addAll(entry.getOptions(), priorityQueue.get());
|
||||
suggestion.addTerm(entry);
|
||||
|
@ -19,6 +19,7 @@
|
||||
package org.elasticsearch.search.suggest.completion;
|
||||
|
||||
import org.elasticsearch.ElasticsearchParseException;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
@ -57,6 +58,7 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
|
||||
private static final XContentType CONTEXT_BYTES_XCONTENT_TYPE = XContentType.JSON;
|
||||
static final String SUGGESTION_NAME = "completion";
|
||||
static final ParseField CONTEXTS_FIELD = new ParseField("contexts", "context");
|
||||
static final ParseField SKIP_DUPLICATES_FIELD = new ParseField("skip_duplicates");
|
||||
|
||||
/**
|
||||
* {
|
||||
@ -94,11 +96,13 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
|
||||
v.contextBytes = builder.bytes();
|
||||
p.skipChildren();
|
||||
}, CONTEXTS_FIELD, ObjectParser.ValueType.OBJECT); // context is deprecated
|
||||
PARSER.declareBoolean(CompletionSuggestionBuilder::skipDuplicates, SKIP_DUPLICATES_FIELD);
|
||||
}
|
||||
|
||||
protected FuzzyOptions fuzzyOptions;
|
||||
protected RegexOptions regexOptions;
|
||||
protected BytesReference contextBytes = null;
|
||||
protected boolean skipDuplicates = false;
|
||||
|
||||
public CompletionSuggestionBuilder(String field) {
|
||||
super(field);
|
||||
@ -113,6 +117,7 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
|
||||
fuzzyOptions = in.fuzzyOptions;
|
||||
regexOptions = in.regexOptions;
|
||||
contextBytes = in.contextBytes;
|
||||
skipDuplicates = in.skipDuplicates;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -123,6 +128,10 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
|
||||
fuzzyOptions = in.readOptionalWriteable(FuzzyOptions::new);
|
||||
regexOptions = in.readOptionalWriteable(RegexOptions::new);
|
||||
contextBytes = in.readOptionalBytesReference();
|
||||
// TODO should be backported to 6.1.0
|
||||
if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
|
||||
skipDuplicates = in.readBoolean();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -130,6 +139,10 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
|
||||
out.writeOptionalWriteable(fuzzyOptions);
|
||||
out.writeOptionalWriteable(regexOptions);
|
||||
out.writeOptionalBytesReference(contextBytes);
|
||||
// TODO should be backported to 6.1.0
|
||||
if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
|
||||
out.writeBoolean(skipDuplicates);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -210,6 +223,21 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether duplicate suggestions should be filtered out.
|
||||
*/
|
||||
public boolean skipDuplicates() {
|
||||
return skipDuplicates;
|
||||
}
|
||||
|
||||
/**
|
||||
* Should duplicates be filtered or not. Defaults to <tt>false</tt>.
|
||||
*/
|
||||
public CompletionSuggestionBuilder skipDuplicates(boolean skipDuplicates) {
|
||||
this.skipDuplicates = skipDuplicates;
|
||||
return this;
|
||||
}
|
||||
|
||||
private static class InnerBuilder extends CompletionSuggestionBuilder {
|
||||
private String field;
|
||||
|
||||
@ -231,6 +259,9 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
|
||||
if (regexOptions != null) {
|
||||
regexOptions.toXContent(builder, params);
|
||||
}
|
||||
if (skipDuplicates) {
|
||||
builder.field(SKIP_DUPLICATES_FIELD.getPreferredName(), skipDuplicates);
|
||||
}
|
||||
if (contextBytes != null) {
|
||||
builder.rawField(CONTEXTS_FIELD.getPreferredName(), contextBytes);
|
||||
}
|
||||
@ -255,6 +286,7 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
|
||||
// copy over common settings to each suggestion builder
|
||||
final MapperService mapperService = context.getMapperService();
|
||||
populateCommonFields(mapperService, suggestionContext);
|
||||
suggestionContext.setSkipDuplicates(skipDuplicates);
|
||||
suggestionContext.setFuzzyOptions(fuzzyOptions);
|
||||
suggestionContext.setRegexOptions(regexOptions);
|
||||
MappedFieldType mappedFieldType = mapperService.fullName(suggestionContext.getField());
|
||||
@ -302,13 +334,14 @@ public class CompletionSuggestionBuilder extends SuggestionBuilder<CompletionSug
|
||||
|
||||
@Override
|
||||
protected boolean doEquals(CompletionSuggestionBuilder other) {
|
||||
return Objects.equals(fuzzyOptions, other.fuzzyOptions) &&
|
||||
return skipDuplicates == other.skipDuplicates &&
|
||||
Objects.equals(fuzzyOptions, other.fuzzyOptions) &&
|
||||
Objects.equals(regexOptions, other.regexOptions) &&
|
||||
Objects.equals(contextBytes, other.contextBytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int doHashCode() {
|
||||
return Objects.hash(fuzzyOptions, regexOptions, contextBytes);
|
||||
return Objects.hash(fuzzyOptions, regexOptions, contextBytes, skipDuplicates);
|
||||
}
|
||||
}
|
||||
|
@ -40,6 +40,7 @@ public class CompletionSuggestionContext extends SuggestionSearchContext.Suggest
|
||||
private CompletionFieldMapper.CompletionFieldType fieldType;
|
||||
private FuzzyOptions fuzzyOptions;
|
||||
private RegexOptions regexOptions;
|
||||
private boolean skipDuplicates;
|
||||
private Map<String, List<ContextMapping.InternalQueryContext>> queryContexts = Collections.emptyMap();
|
||||
|
||||
CompletionFieldMapper.CompletionFieldType getFieldType() {
|
||||
@ -62,6 +63,10 @@ public class CompletionSuggestionContext extends SuggestionSearchContext.Suggest
|
||||
this.queryContexts = queryContexts;
|
||||
}
|
||||
|
||||
void setSkipDuplicates(boolean skipDuplicates) {
|
||||
this.skipDuplicates = skipDuplicates;
|
||||
}
|
||||
|
||||
public FuzzyOptions getFuzzyOptions() {
|
||||
return fuzzyOptions;
|
||||
}
|
||||
@ -74,6 +79,10 @@ public class CompletionSuggestionContext extends SuggestionSearchContext.Suggest
|
||||
return queryContexts;
|
||||
}
|
||||
|
||||
public boolean isSkipDuplicates() {
|
||||
return skipDuplicates;
|
||||
}
|
||||
|
||||
CompletionQuery toQuery() {
|
||||
CompletionFieldMapper.CompletionFieldType fieldType = getFieldType();
|
||||
final CompletionQuery query;
|
||||
|
@ -72,7 +72,7 @@ public class SearchPhaseControllerTests extends ESTestCase {
|
||||
public void testSort() throws Exception {
|
||||
List<CompletionSuggestion> suggestions = new ArrayList<>();
|
||||
for (int i = 0; i < randomIntBetween(1, 5); i++) {
|
||||
suggestions.add(new CompletionSuggestion(randomAlphaOfLength(randomIntBetween(1, 5)), randomIntBetween(1, 20)));
|
||||
suggestions.add(new CompletionSuggestion(randomAlphaOfLength(randomIntBetween(1, 5)), randomIntBetween(1, 20), false));
|
||||
}
|
||||
int nShards = randomIntBetween(1, 20);
|
||||
int queryResultSize = randomBoolean() ? 0 : randomIntBetween(1, nShards * 2);
|
||||
@ -139,7 +139,7 @@ public class SearchPhaseControllerTests extends ESTestCase {
|
||||
for (int i = 0; i < randomIntBetween(1, 5); i++) {
|
||||
int size = randomIntBetween(1, 20);
|
||||
maxSuggestSize += size;
|
||||
suggestions.add(new CompletionSuggestion(randomAlphaOfLength(randomIntBetween(1, 5)), size));
|
||||
suggestions.add(new CompletionSuggestion(randomAlphaOfLength(randomIntBetween(1, 5)), size, false));
|
||||
}
|
||||
int nShards = randomIntBetween(1, 20);
|
||||
int queryResultSize = randomBoolean() ? 0 : randomIntBetween(1, nShards * 2);
|
||||
@ -202,7 +202,7 @@ public class SearchPhaseControllerTests extends ESTestCase {
|
||||
List<CompletionSuggestion> shardSuggestion = new ArrayList<>();
|
||||
for (CompletionSuggestion completionSuggestion : suggestions) {
|
||||
CompletionSuggestion suggestion = new CompletionSuggestion(
|
||||
completionSuggestion.getName(), completionSuggestion.getSize());
|
||||
completionSuggestion.getName(), completionSuggestion.getSize(), false);
|
||||
final CompletionSuggestion.Entry completionEntry = new CompletionSuggestion.Entry(new Text(""), 0, 5);
|
||||
suggestion.addTerm(completionEntry);
|
||||
int optionSize = randomIntBetween(1, suggestion.getSize());
|
||||
|
@ -858,6 +858,38 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
public void testSkipDuplicates() throws Exception {
|
||||
final CompletionMappingBuilder mapping = new CompletionMappingBuilder();
|
||||
createIndexAndMapping(mapping);
|
||||
int numDocs = randomIntBetween(10, 100);
|
||||
int numUnique = randomIntBetween(1, numDocs);
|
||||
List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>();
|
||||
for (int i = 1; i <= numDocs; i++) {
|
||||
int id = i % numUnique;
|
||||
indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i)
|
||||
.setSource(jsonBuilder()
|
||||
.startObject()
|
||||
.startObject(FIELD)
|
||||
.field("input", "suggestion" + id)
|
||||
.field("weight", id)
|
||||
.endObject()
|
||||
.endObject()
|
||||
));
|
||||
}
|
||||
String[] expected = new String[numUnique];
|
||||
int sugg = numUnique - 1;
|
||||
for (int i = 0; i < numUnique; i++) {
|
||||
expected[i] = "suggestion" + sugg--;
|
||||
}
|
||||
indexRandom(true, indexRequestBuilders);
|
||||
CompletionSuggestionBuilder completionSuggestionBuilder =
|
||||
SuggestBuilders.completionSuggestion(FIELD).prefix("sugg").skipDuplicates(true).size(numUnique);
|
||||
|
||||
SearchResponse searchResponse = client().prepareSearch(INDEX)
|
||||
.suggest(new SuggestBuilder().addSuggestion("suggestions", completionSuggestionBuilder)).execute().actionGet();
|
||||
assertSuggestions(searchResponse, true, "suggestions", expected);
|
||||
}
|
||||
|
||||
public void assertSuggestions(String suggestionName, SuggestionBuilder suggestBuilder, String... suggestions) {
|
||||
SearchResponse searchResponse = client().prepareSearch(INDEX).suggest(new SuggestBuilder().addSuggestion(suggestionName, suggestBuilder)).execute().actionGet();
|
||||
assertSuggestions(searchResponse, suggestionName, suggestions);
|
||||
@ -1108,6 +1140,28 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
public void testMultiDocSuggestions() throws Exception {
|
||||
final CompletionMappingBuilder mapping = new CompletionMappingBuilder();
|
||||
createIndexAndMapping(mapping);
|
||||
int numDocs = 10;
|
||||
List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>();
|
||||
for (int i = 1; i <= numDocs; i++) {
|
||||
indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i)
|
||||
.setSource(jsonBuilder()
|
||||
.startObject()
|
||||
.startObject(FIELD)
|
||||
.array("input", "suggestion" + i, "suggestions" + i, "suggester" + i)
|
||||
.field("weight", i)
|
||||
.endObject()
|
||||
.endObject()
|
||||
));
|
||||
}
|
||||
indexRandom(true, indexRequestBuilders);
|
||||
CompletionSuggestionBuilder prefix = SuggestBuilders.completionSuggestion(FIELD).prefix("sugg");
|
||||
assertSuggestions("foo", prefix, "suggester10", "suggester9", "suggester8", "suggester7", "suggester6");
|
||||
}
|
||||
|
||||
|
||||
public static boolean isReservedChar(char c) {
|
||||
switch (c) {
|
||||
case '\u001F':
|
||||
|
@ -639,6 +639,50 @@ public class ContextCompletionSuggestSearchIT extends ESIntegTestCase {
|
||||
assertEquals("Hotel Amsterdam in Berlin", searchResponse.getSuggest().getSuggestion(suggestionName).iterator().next().getOptions().iterator().next().getText().string());
|
||||
}
|
||||
|
||||
public void testSkipDuplicatesWithContexts() throws Exception {
|
||||
LinkedHashMap<String, ContextMapping> map = new LinkedHashMap<>();
|
||||
map.put("type", ContextBuilder.category("type").field("type").build());
|
||||
map.put("cat", ContextBuilder.category("cat").field("cat").build());
|
||||
final CompletionMappingBuilder mapping = new CompletionMappingBuilder().context(map);
|
||||
createIndexAndMapping(mapping);
|
||||
int numDocs = randomIntBetween(10, 100);
|
||||
int numUnique = randomIntBetween(1, numDocs);
|
||||
List<IndexRequestBuilder> indexRequestBuilders = new ArrayList<>();
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
int id = i % numUnique;
|
||||
XContentBuilder source = jsonBuilder()
|
||||
.startObject()
|
||||
.startObject(FIELD)
|
||||
.field("input", "suggestion" + id)
|
||||
.field("weight", id)
|
||||
.endObject()
|
||||
.field("cat", "cat" + id % 2)
|
||||
.field("type", "type" + id)
|
||||
.endObject();
|
||||
indexRequestBuilders.add(client().prepareIndex(INDEX, TYPE, "" + i)
|
||||
.setSource(source));
|
||||
}
|
||||
String[] expected = new String[numUnique];
|
||||
for (int i = 0; i < numUnique; i++) {
|
||||
expected[i] = "suggestion" + (numUnique-1-i);
|
||||
}
|
||||
indexRandom(true, indexRequestBuilders);
|
||||
CompletionSuggestionBuilder completionSuggestionBuilder =
|
||||
SuggestBuilders.completionSuggestion(FIELD).prefix("sugg").skipDuplicates(true).size(numUnique);
|
||||
|
||||
assertSuggestions("suggestions", completionSuggestionBuilder, expected);
|
||||
|
||||
Map<String, List<? extends ToXContent>> contextMap = new HashMap<>();
|
||||
contextMap.put("cat", Arrays.asList(CategoryQueryContext.builder().setCategory("cat0").build()));
|
||||
completionSuggestionBuilder =
|
||||
SuggestBuilders.completionSuggestion(FIELD).prefix("sugg").contexts(contextMap).skipDuplicates(true).size(numUnique);
|
||||
|
||||
String[] expectedModulo = Arrays.stream(expected)
|
||||
.filter((s) -> Integer.parseInt(s.substring("suggestion".length())) % 2 == 0)
|
||||
.toArray(String[]::new);
|
||||
assertSuggestions("suggestions", completionSuggestionBuilder, expectedModulo);
|
||||
}
|
||||
|
||||
public void assertSuggestions(String suggestionName, SuggestionBuilder suggestBuilder, String... suggestions) {
|
||||
SearchResponse searchResponse = client().prepareSearch(INDEX).suggest(
|
||||
new SuggestBuilder().addSuggestion(suggestionName, suggestBuilder)
|
||||
|
@ -139,7 +139,7 @@ public class SuggestTests extends ESTestCase {
|
||||
|
||||
public void testFilter() throws Exception {
|
||||
List<Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>>> suggestions;
|
||||
CompletionSuggestion completionSuggestion = new CompletionSuggestion(randomAlphaOfLength(10), 2);
|
||||
CompletionSuggestion completionSuggestion = new CompletionSuggestion(randomAlphaOfLength(10), 2, false);
|
||||
PhraseSuggestion phraseSuggestion = new PhraseSuggestion(randomAlphaOfLength(10), 2);
|
||||
TermSuggestion termSuggestion = new TermSuggestion(randomAlphaOfLength(10), 2, SortBy.SCORE);
|
||||
suggestions = Arrays.asList(completionSuggestion, phraseSuggestion, termSuggestion);
|
||||
@ -160,7 +160,7 @@ public class SuggestTests extends ESTestCase {
|
||||
suggestions = new ArrayList<>();
|
||||
int n = randomIntBetween(2, 5);
|
||||
for (int i = 0; i < n; i++) {
|
||||
suggestions.add(new CompletionSuggestion(randomAlphaOfLength(10), randomIntBetween(3, 5)));
|
||||
suggestions.add(new CompletionSuggestion(randomAlphaOfLength(10), randomIntBetween(3, 5), false));
|
||||
}
|
||||
Collections.shuffle(suggestions, random());
|
||||
Suggest suggest = new Suggest(suggestions);
|
||||
|
@ -79,7 +79,7 @@ public class SuggestionTests extends ESTestCase {
|
||||
suggestion = new PhraseSuggestion(name, size);
|
||||
entrySupplier = () -> SuggestionEntryTests.createTestItem(PhraseSuggestion.Entry.class);
|
||||
} else if (type == CompletionSuggestion.class) {
|
||||
suggestion = new CompletionSuggestion(name, size);
|
||||
suggestion = new CompletionSuggestion(name, size, randomBoolean());
|
||||
entrySupplier = () -> SuggestionEntryTests.createTestItem(CompletionSuggestion.Entry.class);
|
||||
} else {
|
||||
throw new UnsupportedOperationException("type not supported [" + type + "]");
|
||||
@ -249,7 +249,7 @@ public class SuggestionTests extends ESTestCase {
|
||||
CompletionSuggestion.Entry.Option option = new CompletionSuggestion.Entry.Option(1, new Text("someText"), 1.3f, contexts);
|
||||
CompletionSuggestion.Entry entry = new CompletionSuggestion.Entry(new Text("entryText"), 42, 313);
|
||||
entry.addOption(option);
|
||||
CompletionSuggestion suggestion = new CompletionSuggestion("suggestionName", 5);
|
||||
CompletionSuggestion suggestion = new CompletionSuggestion("suggestionName", 5, randomBoolean());
|
||||
suggestion.addTerm(entry);
|
||||
BytesReference xContent = toXContent(suggestion, XContentType.JSON, params, randomBoolean());
|
||||
assertEquals(
|
||||
@ -265,4 +265,4 @@ public class SuggestionTests extends ESTestCase {
|
||||
+ "}]}", xContent.utf8ToString());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -114,6 +114,7 @@ public class CompletionSuggesterBuilderTests extends AbstractSuggestionBuilderTe
|
||||
contextMap.put(geoQueryContextName, contexts);
|
||||
}
|
||||
testBuilder.contexts(contextMap);
|
||||
testBuilder.skipDuplicates(randomBoolean());
|
||||
return testBuilder;
|
||||
}
|
||||
|
||||
@ -128,7 +129,7 @@ public class CompletionSuggesterBuilderTests extends AbstractSuggestionBuilderTe
|
||||
|
||||
@Override
|
||||
protected void mutateSpecificParameters(CompletionSuggestionBuilder builder) throws IOException {
|
||||
switch (randomIntBetween(0, 4)) {
|
||||
switch (randomIntBetween(0, 5)) {
|
||||
case 0:
|
||||
int nCatContext = randomIntBetween(1, 5);
|
||||
List<CategoryQueryContext> contexts = new ArrayList<>(nCatContext);
|
||||
@ -154,6 +155,9 @@ public class CompletionSuggesterBuilderTests extends AbstractSuggestionBuilderTe
|
||||
case 4:
|
||||
builder.regex(randomAlphaOfLength(10), RegexOptionsTests.randomRegexOptions());
|
||||
break;
|
||||
case 5:
|
||||
builder.skipDuplicates(!builder.skipDuplicates);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("should not through");
|
||||
}
|
||||
@ -182,5 +186,6 @@ public class CompletionSuggesterBuilderTests extends AbstractSuggestionBuilderTe
|
||||
assertEquals(parsedContextBytes.get(contextName), queryContexts.get(contextName));
|
||||
}
|
||||
assertEquals(builder.regexOptions, completionSuggestionCtx.getRegexOptions());
|
||||
assertEquals(builder.skipDuplicates, completionSuggestionCtx.isSkipDuplicates());
|
||||
}
|
||||
}
|
||||
|
@ -24,6 +24,7 @@ import org.elasticsearch.search.suggest.Suggest;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
@ -38,7 +39,7 @@ public class CompletionSuggestionTests extends ESTestCase {
|
||||
String name = randomAlphaOfLength(10);
|
||||
int size = randomIntBetween(3, 5);
|
||||
for (int i = 0; i < nShards; i++) {
|
||||
CompletionSuggestion suggestion = new CompletionSuggestion(name, size);
|
||||
CompletionSuggestion suggestion = new CompletionSuggestion(name, size, false);
|
||||
suggestion.addTerm(new CompletionSuggestion.Entry(new Text(""), 0, 0));
|
||||
shardSuggestions.add(suggestion);
|
||||
}
|
||||
|
@ -277,6 +277,7 @@ The basic completion suggester query supports the following parameters:
|
||||
|
||||
`field`:: The name of the field on which to run the query (required).
|
||||
`size`:: The number of suggestions to return (defaults to `5`).
|
||||
`skip_duplicates`:: Whether duplicate suggestions should be filtered out (defaults to `false`).
|
||||
|
||||
NOTE: The completion suggester considers all documents in the index.
|
||||
See <<suggester-context>> for an explanation of how to query a subset of
|
||||
@ -291,6 +292,33 @@ index completions into a single shard index. In case of high heap usage due to
|
||||
shard size, it is still recommended to break index into multiple shards instead
|
||||
of optimizing for completion performance.
|
||||
|
||||
[[skip_duplicates]]
|
||||
==== Skip duplicate suggestions
|
||||
|
||||
Queries can return duplicate suggestions coming from different documents.
|
||||
It is possible to modify this behavior by setting `skip_duplicates` to true.
|
||||
When set, this option filters out documents with duplicate suggestions from the result.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
POST music/_search?pretty
|
||||
{
|
||||
"suggest": {
|
||||
"song-suggest" : {
|
||||
"prefix" : "nor",
|
||||
"completion" : {
|
||||
"field" : "suggest",
|
||||
"skip_duplicates": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
WARNING: when set to true this option can slow down search because more suggestions
|
||||
need to be visited to find the top N.
|
||||
|
||||
[[fuzzy]]
|
||||
==== Fuzzy queries
|
||||
|
||||
|
@ -291,3 +291,42 @@ setup:
|
||||
- match: { suggest.result.0.options.1._type: "test" }
|
||||
- match: { suggest.result.0.options.1._source.title: "title_bar" }
|
||||
- match: { suggest.result.0.options.1._source.count: 4 }
|
||||
|
||||
---
|
||||
"Skip duplicates should work":
|
||||
- skip:
|
||||
version: " - 6.99.99"
|
||||
reason: skip_duplicates was added in 7.0 (TODO should be backported to 6.1)
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
type: test
|
||||
id: 1
|
||||
body:
|
||||
suggest_1: "bar"
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
type: test
|
||||
id: 2
|
||||
body:
|
||||
suggest_1: "bar"
|
||||
|
||||
- do:
|
||||
indices.refresh: {}
|
||||
|
||||
- do:
|
||||
search:
|
||||
body:
|
||||
suggest:
|
||||
result:
|
||||
text: "b"
|
||||
completion:
|
||||
field: suggest_1
|
||||
skip_duplicates: true
|
||||
|
||||
- length: { suggest.result: 1 }
|
||||
- length: { suggest.result.0.options: 1 }
|
||||
- match: { suggest.result.0.options.0.text: "bar" }
|
||||
|
@ -276,4 +276,76 @@ setup:
|
||||
|
||||
- length: { suggest.result: 1 }
|
||||
- length: { suggest.result.0.options: 1 }
|
||||
- match: { suggest.result.0.options.0.text: "Marriot in Berlin" }
|
||||
- match: { suggest.result.0.options.0.text: "Marriot in Berlin" }
|
||||
|
||||
---
|
||||
"Skip duplicates with contexts should work":
|
||||
- skip:
|
||||
version: " - 6.99.99"
|
||||
reason: skip_duplicates was added in 7.0 (TODO should be backported to 6.1)
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
type: test
|
||||
id: 1
|
||||
body:
|
||||
suggest_context:
|
||||
input: "foo"
|
||||
contexts:
|
||||
color: "red"
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
type: test
|
||||
id: 1
|
||||
body:
|
||||
suggest_context:
|
||||
input: "foo"
|
||||
contexts:
|
||||
color: "red"
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
type: test
|
||||
id: 2
|
||||
body:
|
||||
suggest_context:
|
||||
input: "foo"
|
||||
contexts:
|
||||
color: "blue"
|
||||
|
||||
- do:
|
||||
indices.refresh: {}
|
||||
|
||||
- do:
|
||||
search:
|
||||
body:
|
||||
suggest:
|
||||
result:
|
||||
text: "foo"
|
||||
completion:
|
||||
field: suggest_context
|
||||
skip_duplicates: true
|
||||
contexts:
|
||||
color: "red"
|
||||
|
||||
- length: { suggest.result: 1 }
|
||||
- length: { suggest.result.0.options: 1 }
|
||||
- match: { suggest.result.0.options.0.text: "foo" }
|
||||
|
||||
- do:
|
||||
search:
|
||||
body:
|
||||
suggest:
|
||||
result:
|
||||
text: "foo"
|
||||
completion:
|
||||
skip_duplicates: true
|
||||
field: suggest_context
|
||||
|
||||
- length: { suggest.result: 1 }
|
||||
- length: { suggest.result.0.options: 1 }
|
||||
- match: { suggest.result.0.options.0.text: "foo" }
|
||||
|
Loading…
x
Reference in New Issue
Block a user